kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d2ab4e9a9375513bde35c2c4ecb85ed3b33cf6d7
parent fa5bef9f094356eb6e7054031eff88aa0a4c98b9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 15:15:00 -0700

refactor: split six >2KLOC files into focused modules

Each split is pure code motion — no semantic changes. Public headers
unchanged; new private internal headers (arch/<arch>/internal.h,
parse_priv.h, pp_priv.h) host cross-file forward decls.

- src/arch/{x64,rv64,aarch64}/ each split into emit.c (encoding +
  prologue/epilogue), alloc.c (regpool + labels + control flow), and
  ops.c (load/store/binop/call/atomics/intrinsics + vtable). Helpers
  promoted across files are prefixed per arch (x64_*, rv64_*, aa64_*)
  to avoid link-time collisions.
- src/parse/parse.c into parse_type.c, parse_expr.c, parse_init.c,
  parse_stmt.c + residual TU driver/lex/scope.
- src/link/link_layout.c into link_resolve.c (archives + symbols + GC),
  link_reloc_layout.c (vaddr binding + GOT/iPLT/stubs/relocs), and
  residual link_layout.c (section placement).
- src/pp/pp.c into pp_expand.c (hideset + macros + expansion) and
  pp_directive.c (#if eval + #include/#line/#pragma/#error/#embed) +
  residual source stack + lifecycle.

Dead aa_panic() helper removed (all callers were replaced with real
implementations in prior commits).

Diffstat:
Dsrc/arch/aarch64.c | 3457-------------------------------------------------------------------------------
Asrc/arch/aarch64/alloc.c | 318+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/aarch64/emit.c | 546+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/aarch64/internal.h | 312+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/aarch64/ops.c | 1895+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/arch/rv64.c | 2765-------------------------------------------------------------------------------
Asrc/arch/rv64/alloc.c | 394+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/rv64/emit.c | 332+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/rv64/internal.h | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/rv64/ops.c | 1840+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/arch/x64.c | 3075-------------------------------------------------------------------------------
Asrc/arch/x64/alloc.c | 378+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64/emit.c | 647+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64/internal.h | 257+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64/ops.c | 1916+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/link/link_internal.h | 81++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/link/link_layout.c | 2528++++---------------------------------------------------------------------------
Asrc/link/link_reloc_layout.c | 1236+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_resolve.c | 597+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/parse/parse.c | 5946++-----------------------------------------------------------------------------
Asrc/parse/parse_expr.c | 1795+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/parse_init.c | 808+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/parse_priv.h | 431+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/parse_stmt.c | 689+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/parse_type.c | 1121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/pp/pp.c | 2460+------------------------------------------------------------------------------
Asrc/pp/pp_directive.c | 1252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/pp/pp_expand.c | 1008+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/pp/pp_priv.h | 278+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
29 files changed, 18546 insertions(+), 20038 deletions(-)

diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c @@ -1,3457 +0,0 @@ -/* Minimal AArch64 CGTarget. - * - * Single-pass codegen for the cg test corpus (Groups A, B, C). Frame - * layout uses a fixed-size prologue placeholder patched at func_end so - * frame_size and the callee-save register count are knowable when the - * prologue is finally written. FP-relative (x29) addressing is used for - * local slots and incoming stack args so that per-slot offsets can be - * assigned at frame_slot() time without depending on the eventual - * frame_size or callee-save count. SP-relative addressing is used for - * outgoing stack args. - * - * Frame layout (low SP -> high): - * outgoing args (max_outgoing bytes, 16-aligned) - * int reg saves (n_int_pairs * 16) -- x19/x20, x21/x22, ... - * fp reg saves (n_fp_pairs * 16) -- d8/d9, d10/d11, ... - * local slots (cum_off bytes) - * x29, x30 save (16 bytes) -- x29 = sp + frame_size - 16 - * - * Single-pass register allocator: a free-mask pool per class hands out - * the lowest free index. INT pool covers x19..x28 (10 callee-saves); - * FP pool covers v8..v23, with v8..v15 callee-saved and v16..v23 - * caller-saved scratch — lowest-bit-first allocation prefers callee- - * saves. Only the prefix actually used (high-water mark) is saved by - * the prologue. Width derives from Operand.type via type_is_64. CG - * drives spill/reload through alloc_reg returning REG_NONE on - * exhaustion plus the spill_reg/reload_reg vtable entries. - * - * Multi-function: each func_begin/func_end pair owns its own frame state - * via the AAImpl fields, so the harness can build several functions in - * one TU. */ - -#include <string.h> - -#include "arch/aa64_asm.h" -#include "arch/aa64_isa.h" -#include "arch/aa64_regs.h" -#include "arch/arch.h" -#include "core/arena.h" -#include "obj/obj.h" -#include "type/type.h" - -/* ============================================================ - * Local encoding helpers (kept here, not in aa64_isa.h, while the - * disassembler-shared table only needs the Group A/C subset). - * ============================================================ */ - -#define AA64_NOP 0xD503201Fu - -/* ADD/SUB immediate (aa64_add_imm / aa64_sub_imm) live in - * arch/aa64_isa.h alongside the rest of the immediate-encoding family. - * Rd/Rn = 31 means SP for these encodings (not ZR). */ - -/* STP/LDP signed offset, X registers. Offset is byte offset, must be a - * multiple of 8; encoded value = byte_offset / 8 in a signed 7-bit field - * (range -512..504). */ -static inline u32 aa64_stp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { - i32 sc = byte_off >> 3; - return 0xA9000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { - i32 sc = byte_off >> 3; - return 0xA9400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -/* STP/LDP signed offset, D registers (64-bit FP, scale 8). */ -static inline u32 aa64_stp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { - i32 sc = byte_off >> 3; - return 0x6D000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { - i32 sc = byte_off >> 3; - return 0x6D400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} - -/* LDUR / STUR (general regs, unscaled simm9 in -256..255). - * size: 0=B, 1=H, 2=W, 3=X. */ -static inline u32 aa64_stur(u32 size, u32 Rt, u32 Rn, i32 simm9) { - return 0x38000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldur(u32 size, u32 Rt, u32 Rn, i32 simm9) { - return 0x38400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -/* LDUR/STUR for SIMD & FP registers (V=1). size: 2=S (32-bit), 3=D (64-bit). */ -static inline u32 aa64_stur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) { - return 0x3C000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) { - return 0x3C400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} - -/* STR/LDR scaled (unsigned imm12). byte_off must be a multiple of (1<<size). */ -static inline u32 aa64_str_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { - u32 sc = byte_off >> size; - return 0x39000000u | (size << 30) | ((sc & 0xfffu) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldr_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { - u32 sc = byte_off >> size; - return 0x39400000u | (size << 30) | ((sc & 0xfffu) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -/* STR (SIMD & FP, unsigned offset). size: 2=S (32), 3=D (64). */ -static inline u32 aa64_str_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { - u32 sc = byte_off >> size; - return 0x3D000000u | (size << 30) | ((sc & 0xfffu) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} - -/* MRS Xt, TPIDR_EL0 — read AArch64 user thread pointer. */ -static inline u32 aa64_mrs_tpidr_el0(u32 Rt) { - return 0xD53BD040u | (Rt & 0x1fu); -} -/* Branch (unconditional, 26-bit imm). Emitted with imm26=0 when paired - * with a JUMP26/CALL26 relocation; the patcher fills in imm26. */ -static inline u32 aa64_b_base(void) { return 0x14000000u; } -static inline u32 aa64_bl_base(void) { return 0x94000000u; } - -/* ADRP base (Rd in low 5 bits). imm bits filled by relocation. */ -static inline u32 aa64_adrp_base(u32 Rd) { return 0x90000000u | (Rd & 0x1f); } - -/* LDR (unsigned offset) for SIMD & FP, used after ADRP for FP literals. - * size 2 => S (32-bit). imm12 patched by linker. */ -static inline u32 aa64_ldr_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { - u32 sc = byte_off >> size; - return 0x3D400000u | (size << 30) | ((sc & 0xfffu) << 10) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} - -/* FMOV (scalar register). type: 0=single, 1=double. */ -static inline u32 aa64_fmov_reg(u32 type, u32 Rd, u32 Rn) { - return 0x1E204000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* SUBS immediate (used to encode CMP Xn, #imm via SUBS ZR, Xn, #imm). */ -static inline u32 aa64_subs_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12) { - return 0x71000000u | (sf << 31) | ((imm12 & 0xfff) << 10) | - ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* CSET Wd/Xd, EQ — alias of CSINC Rd, ZR, ZR, NE (inverted EQ). */ -static inline u32 aa64_cset_eq(u32 sf, u32 Rd) { - return 0x1A800400u | (sf << 31) | (31u << 16) | (0x1u << 12) | (31u << 5) | - (Rd & 0x1f); -} - -/* FCVTZS (scalar fp -> integer, round toward zero, signed). - * sf: 0=W, 1=X. type: 0=S, 1=D. */ -static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn) { - return 0x1E380000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} -static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn) { - return 0x1E390000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} -static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn) { - return 0x1E220000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} -static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn) { - return 0x1E230000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} - -/* FCVT — between FP precisions. S→D widens; D→S narrows. */ -static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn) { - return 0x1E22C000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn) { - return 0x1E624000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* FMOV between FP and GPR (BITCAST). */ -static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) /* GPR→FP, single */ -{ - return 0x1E270000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) /* FP→GPR, single */ -{ - return 0x1E260000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) /* GPR→FP, double */ -{ - return 0x9E670000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) /* FP→GPR, double */ -{ - return 0x9E660000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* SUB (extended register), 64-bit, UXTX, shift 0. Unlike SUB shifted-reg - * (where Rd=31 means ZR), this form treats Rd/Rn=31 as SP — needed to - * decrement SP by a register amount during alloca. */ -static inline u32 aa64_sub_extreg_x_uxtx(u32 Rd, u32 Rn, u32 Rm) { - return 0xCB206000u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* SUBS shifted register (Rd=ZR encodes CMP). */ -static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) { - return 0x6B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} - -/* B.cond — imm19 at bits 5..23 left as zero; patched by linker / MCEmitter. */ -static inline u32 aa64_b_cond(u32 cond) { return 0x54000000u | (cond & 0xfu); } - -/* CSINC Rd, Rn, Rm, cond (CSEL family with op2=01). CSET Rd, cond - * is CSINC Rd, ZR, ZR, !cond. */ -static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { - return 0x1A800400u | (sf << 31) | ((Rm & 0x1f) << 16) | - ((cond & 0xfu) << 12) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond) { - return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u); -} - -/* FADD / FSUB / FMUL / FDIV (scalar). type: 0=S (float), 1=D (double). */ -static inline u32 aa64_fadd(u32 type, u32 Rd, u32 Rn, u32 Rm) { - return 0x1E202800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fsub(u32 type, u32 Rd, u32 Rn, u32 Rm) { - return 0x1E203800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fmul(u32 type, u32 Rd, u32 Rn, u32 Rm) { - return 0x1E200800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) { - return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* SBFM / UBFM / BFM (bitfield move family). - * sf opc(2) 100110 N immr(6) imms(6) Rn(5) Rd(5) - * opc: 00=SBFM, 01=BFM, 10=UBFM. N must equal sf. */ -static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { - return 0x13000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | - ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { - return 0x53000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | - ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { - return 0x33000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | - ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* ============================================================ - * Per-class register pool (free-mask + high-water mark). - * - * The mask uses bit i for the i-th register in the class's contiguous - * range, so allocation is `__builtin_ctz` over the free mask and - * deallocation is bit-set. `hwm` records the highest-index-+1 ever - * allocated, which the prologue/epilogue uses to size the callee-save - * area. 32-bit masks suffice for every aarch64/x86_64/RISC-V class. - * ============================================================ */ - -typedef struct RegPool { - u32 free; /* bit i set ⇔ regs[base + i] is free */ - u32 hwm; /* highest-index-+1 ever allocated */ - u8 base; /* first physical reg in the class */ - u8 nregs; /* count; bits [nregs..32) are always 0 */ - u8 pad[2]; -} RegPool; - -static void regpool_init(RegPool* p, u8 base, u8 nregs) { - p->base = base; - p->nregs = nregs; - p->hwm = 0; - p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); -} - -static Reg regpool_alloc(RegPool* p) { - if (p->free == 0) return (Reg)REG_NONE; - u32 idx = (u32)__builtin_ctz(p->free); - p->free &= ~(1u << idx); - if (idx + 1u > p->hwm) p->hwm = idx + 1u; - return (Reg)(p->base + idx); -} - -/* Returns 1 on successful free, 0 if `r` is outside this pool's range, - * -1 on double-free (caller is expected to panic). */ -static int regpool_free(RegPool* p, Reg r) { - u32 rn = (u32)r; - if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0; - u32 idx = rn - p->base; - u32 bit = 1u << idx; - if (p->free & bit) return -1; - p->free |= bit; - return 1; -} - -/* ============================================================ - * AAImpl - * ============================================================ */ - -#define AA_PROLOGUE_WORDS \ - 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */ - -typedef struct AASlot { - u32 off; /* bytes below fp; address = x29 - off */ - u32 size; - u32 align; - u8 kind; /* FrameSlotKind */ - u8 pad[3]; -} AASlot; - -typedef struct AAScope { - u8 kind; /* ScopeKind */ - u8 has_else; - u8 pad[2]; - MCLabel else_label; /* SCOPE_IF: false branch target / end-of-then */ - MCLabel end_label; /* SCOPE_IF: join point past the whole if/else */ - Label break_label; /* SCOPE_LOOP/BLOCK: explicit break target */ - Label continue_label; /* SCOPE_LOOP: explicit continue target */ -} AAScope; - -typedef struct AAImpl { - CGTarget base; - SrcLoc loc; - const CGFuncDesc* fd; - - /* Function emission. */ - u32 func_start; - u32 prologue_pos; - MCLabel epilogue_label; - - /* Frame layout (in bytes; final frame_size computed at func_end). */ - AASlot* slots; - u32 nslots; - u32 slots_cap; - u32 cum_off; /* total bytes consumed by local slots */ - u32 max_outgoing; /* max stack arg bytes for any call */ - - /* Param incoming tracking — set by func_begin from ABIFuncInfo. */ - u32 next_param_int; /* x0..x7 consumed so far */ - u32 next_param_fp; /* v0..v7 consumed so far */ - u32 next_param_stack; /* offset into caller's stack arg area */ - u8 has_sret; /* sret pointer arrived in x8 */ - FrameSlot sret_ptr_slot; /* hidden slot holding incoming x8 */ - - /* Reg allocator pools. Bit i set in `free` means the i-th register in - * the class's contiguous range (base..base+nregs-1) is available. The - * high-water mark `hwm` is the largest index+1 ever allocated for the - * class — used by the prologue to decide how many callee-saves to push. - * - * INT pool: base = 19, nregs = 10 (x19..x28). - * FP pool : base = 8, nregs = 16 (v8..v23). The first 8 (v8..v15) are - * AAPCS64 callee-saves; v16..v23 are caller-saved scratch handed out - * after the callee-saved range fills. Allocation is lowest-bit-first - * so callee-saves are still preferred. */ - RegPool int_pool; - RegPool fp_pool; - - /* Structured-scope stack. Entries are not popped — IDs returned to - * the caller are stable indices into this array for the lifetime - * of the function. nscopes is reset at func_begin. */ - AAScope* scopes; - u32 nscopes; - u32 scopes_cap; - - /* alloca: each call emits an `ADD result, SP, #0` placeholder; at - * func_end the imm12 is patched with the final max_outgoing. Tracks - * (instruction pos, dst reg) for each placeholder. has_alloca also - * triggers SP-from-FP restoration in the epilogue. */ - u8 has_alloca; - struct AAAllocaPatch { - u32 pos; - u32 dst_reg; - }* add_patches; - u32 nadd_patches; - u32 add_patches_cap; - - /* Variadic — AAPCS64 register save areas reserved at function entry. - * gp_save_slot holds 8*8=64 bytes (x0..x7); fp_save_slot holds 8*16=128 - * bytes (v0..v7 with 16-byte stride). Saves are emitted in func_begin - * after the prologue placeholder so FP is already valid when they run. */ - u8 is_variadic; - FrameSlot gp_save_slot; - FrameSlot fp_save_slot; -} AAImpl; - -static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; } - -/* Forward decls used before definition. */ -static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d); -static AASlot* slot_get(AAImpl* a, FrameSlot fs); -static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch); -static void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); -static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); -static void aa_free_reg(CGTarget* t, Reg r, RegClass cls); - -/* ---- helpers ---- */ - -static int type_is_64(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 1; - default: - return 0; - } -} - -static int type_is_fp_double(const Type* t) { - return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); -} - -static int type_is_signed(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_SHORT: - case TY_INT: - case TY_LONG: - case TY_LLONG: - return 1; - default: - return 0; - } -} - -static u32 type_byte_size(const Type* t) { - if (!t) return 4; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_UCHAR: - case TY_BOOL: - return 1; - case TY_SHORT: - case TY_USHORT: - return 2; - case TY_INT: - case TY_UINT: - case TY_FLOAT: - return 4; - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 8; - default: - return 8; - } -} - -/* Encode size index for STUR/LDUR (0=B,1=H,2=W,3=X). */ -static u32 size_idx_for_bytes(u32 nbytes) { - switch (nbytes) { - case 1: - return 0; - case 2: - return 1; - case 4: - return 2; - case 8: - return 3; - default: - return 3; - } -} - -static u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } - -/* Single new producer-side dependency from the backend on Debug. Per - * doc/DWARF.md §3.2 the only Debug call the aarch64 backend makes is - * debug_emit_row, fed (text_section, offset_at_emit_start, pending_loc). - * The forward decl of `Debug` lives in arch/arch.h; we declare the - * function here so the backend doesn't need to include debug/debug.h. */ -extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); - -static void emit32(MCEmitter* mc, u32 word) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 b[4]; - b[0] = (u8)(word & 0xff); - b[1] = (u8)((word >> 8) & 0xff); - b[2] = (u8)((word >> 16) & 0xff); - b[3] = (u8)((word >> 24) & 0xff); - mc->emit_bytes(mc, b, 4); - if (mc->debug) { - /* (section, offset, pending_loc) row. Per §3.1 Class 2: granularity is - * per-instruction; Debug deduplicates identical consecutive rows so a - * multi-instruction CG op with a single set_loc is cheap. The pending - * loc lives on MCEmitter (set by m_set_loc) so emit32 can read it - * without reaching into the per-arch impl. */ - debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); - } -} - -static void patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) { - u8 b[4]; - b[0] = (u8)(word & 0xff); - b[1] = (u8)((word >> 8) & 0xff); - b[2] = (u8)((word >> 16) & 0xff); - b[3] = (u8)((word >> 24) & 0xff); - obj_patch(obj, sec_id, ofs, b, 4); -} - -static _Noreturn void aa_panic(CGTarget* t, const char* what) { - SrcLoc loc = impl_of(t)->loc; - compiler_panic(t->c, loc, "aarch64: %s not implemented", what); -} - -/* ---- AArch64 immediate encoding helpers ---- */ - -/* Materialize a u64 into a register using MOVZ/MOVN/MOVK. Used both for - * the public load_imm() and internally for synthesizing immediates. */ -static void emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm) { - const u32 nslots = sf ? 4u : 2u; - u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu); - - for (u32 i = 0; i < nslots; ++i) { - u32 slot = (u32)((v >> (i * 16)) & 0xffffu); - u64 cleared = v & ~((u64)0xffffu << (i * 16)); - if (slot != 0 && cleared == 0) { - emit32(mc, aa64_movz(sf, Rd, slot, i)); - return; - } - } - - { - u64 inv = sf ? ~v : ((~v) & 0xffffffffu); - for (u32 i = 0; i < nslots; ++i) { - u32 slot = (u32)((inv >> (i * 16)) & 0xffffu); - u64 cleared = inv & ~((u64)0xffffu << (i * 16)); - if (cleared == 0) { - emit32(mc, aa64_movn(sf, Rd, slot, i)); - return; - } - } - } - - int placed = 0; - for (u32 i = 0; i < nslots; ++i) { - u32 slot = (u32)((v >> (i * 16)) & 0xffffu); - if (!placed) { - if (slot == 0) continue; - emit32(mc, aa64_movz(sf, Rd, slot, i)); - placed = 1; - } else if (slot != 0) { - emit32(mc, aa64_movk(sf, Rd, slot, i)); - } - } - if (!placed) emit32(mc, aa64_movz(sf, Rd, 0, 0)); -} - -static void emit_sp_add(MCEmitter* mc, u32 imm) { - if (imm <= 0xfff) { - emit32(mc, aa64_add_imm(1, 31, 31, imm, 0)); - } else if ((imm & 0xfff) == 0 && (imm >> 12) <= 0xfff) { - emit32(mc, aa64_add_imm(1, 31, 31, imm >> 12, 1)); - } else { - emit32(mc, aa64_add_imm(1, 31, 31, (imm >> 12) & 0xfff, 1)); - emit32(mc, aa64_add_imm(1, 31, 31, imm & 0xfff, 0)); - } -} - -/* ---- function lifecycle ---- */ - -static void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - mc->set_section(mc, fd->text_section_id); - mc->emit_align(mc, 4, 0); - - a->fd = fd; - a->func_start = mc->pos(mc); - a->next_param_int = 0; - a->next_param_fp = 0; - a->next_param_stack = 0; - a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; - a->cum_off = 0; - a->max_outgoing = 0; - regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u); /* x19..x28 */ - regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u); /* v8..v23 */ - a->nslots = 0; - a->nscopes = 0; - a->has_alloca = 0; - a->nadd_patches = 0; - a->sret_ptr_slot = FRAME_SLOT_NONE; - a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; - a->gp_save_slot = FRAME_SLOT_NONE; - a->fp_save_slot = FRAME_SLOT_NONE; - a->epilogue_label = mc->label_new(mc); - - mc->cfi_startproc(mc); - - /* Reserve a fixed-size prologue placeholder, NOP-filled. We patch the - * prefix at func_end with the real prologue once frame_size and the - * callee-save count are known. */ - a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) emit32(mc, AA64_NOP); - - /* If the function returns indirect (sret), x8 holds the destination - * pointer on entry. Reserve a hidden slot to spill it into so the - * body can use x8 as scratch and ret can recover the dest pointer. */ - if (a->has_sret) { - FrameSlotDesc fsd = { - .type = NULL, - .name = 0, - .loc = (SrcLoc){0, 0, 0}, - .size = 8, - .align = 8, - .kind = FS_SPILL, - .flags = 0, - }; - a->sret_ptr_slot = aa_frame_slot(t, &fsd); - } - - /* Variadic: reserve GP and FP register save areas and emit saves of - * x0..x7 / d0..d7 here, after the prologue placeholder, so FP is set - * up. Param stores below run after these saves but before any user - * code clobbers x0..x7. */ - if (a->is_variadic) { - FrameSlotDesc gpd = { - .type = NULL, - .name = 0, - .loc = (SrcLoc){0, 0, 0}, - .size = 64, - .align = 8, - .kind = FS_SPILL, - .flags = 0, - }; - a->gp_save_slot = aa_frame_slot(t, &gpd); - FrameSlotDesc fpd = { - .type = NULL, - .name = 0, - .loc = (SrcLoc){0, 0, 0}, - .size = 128, - .align = 16, - .kind = FS_SPILL, - .flags = 0, - }; - a->fp_save_slot = aa_frame_slot(t, &fpd); - AASlot* gs = slot_get(a, a->gp_save_slot); - AASlot* fs = slot_get(a, a->fp_save_slot); - for (u32 i = 0; i < 8; ++i) { - emit32(mc, aa64_stur(3, i, 29, -(i32)gs->off + (i32)i * 8)); - } - for (u32 i = 0; i < 8; ++i) { - emit32(mc, aa64_stur_fp(3, i, 29, -(i32)fs->off + (i32)i * 16)); - } - } -} - -static void aa_func_end(CGTarget* t) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - /* Compute callee-save layout. Only v8..v15 are callee-saved; the - * caller-saved v16..v23 are handed out by alloc_reg too but never - * appear in prologue saves. */ - u32 n_int_pairs = (a->int_pool.hwm + 1) / 2; /* round up */ - u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm; - u32 n_fp_pairs = (used_fp_cs + 1) / 2; - - u32 outgoing_off = 0; - u32 int_save_off = a->max_outgoing; - u32 fp_save_off = int_save_off + n_int_pairs * 16; - u32 locals_off = fp_save_off + n_fp_pairs * 16; - u32 fp_lr_off = locals_off + a->cum_off; - u32 frame_size = fp_lr_off + 16; - /* round to 16. */ - frame_size = (frame_size + 15u) & ~15u; - fp_lr_off = frame_size - 16; - - (void)outgoing_off; - - /* Emit epilogue at current pos, then place label. The label we emit - * must point at the first instruction of the epilogue so `b epilogue` - * branches land here. */ - mc->label_place(mc, a->epilogue_label); - - /* If the body called alloca, SP may sit below the locals area. - * Restore SP from FP before reloading callee-saves, since those use - * SP-relative offsets. */ - if (a->has_alloca) { - if (fp_lr_off <= 0xfff) { - emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=*/29, fp_lr_off, 0)); - } else { - compiler_panic(t->c, a->loc, - "aarch64: has_alloca + fp_lr_off %u out of imm12 range", - fp_lr_off); - } - } - - /* Restore FP saves, then INT saves, then fp/lr, then add sp + ret. */ - for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) { - u32 r0 = 8u + (u32)i * 2u; - u32 r1 = r0 + 1u; - emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u))); - } - for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) { - u32 r0 = 19u + (u32)i * 2u; - u32 r1 = r0 + 1u; - emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u))); - } - emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off)); - emit_sp_add(mc, frame_size); - emit32(mc, aa64_ret(AA64_LR)); - - /* Now patch prologue placeholder. */ - u32 pos = a->prologue_pos; - ObjBuilder* obj = t->obj; - u32 sec = a->fd->text_section_id; - - u32 words[AA_PROLOGUE_WORDS]; - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) words[i] = AA64_NOP; - u32 wi = 0; - - /* sub sp, sp, #frame_size — may take 2 insns if > 4095. */ - if (frame_size <= 0xfff) { - words[wi++] = aa64_sub_imm(1, 31, 31, frame_size, 0); - } else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) { - words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1); - } else { - if (wi + 2 > AA_PROLOGUE_WORDS) { - compiler_panic(t->c, a->loc, - "aarch64: prologue overflow for frame_size %u", - frame_size); - } - words[wi++] = aa64_sub_imm(1, 31, 31, (frame_size >> 12) & 0xfff, 1); - words[wi++] = aa64_sub_imm(1, 31, 31, frame_size & 0xfff, 0); - } - /* stp x29, x30, [sp, #fp_lr_off]; add x29, sp, #fp_lr_off */ - words[wi++] = aa64_stp_x(29, 30, 31, (i32)fp_lr_off); - words[wi++] = aa64_add_imm(1, 29, 31, fp_lr_off, 0); - /* If sret, save incoming x8 (caller's destination pointer). */ - if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { - AASlot* s = slot_get(a, a->sret_ptr_slot); - if (s) { - if (wi >= AA_PROLOGUE_WORDS) goto overflow; - words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off); - } - } - /* INT pair saves. */ - for (u32 i = 0; i < n_int_pairs; ++i) { - u32 r0 = 19u + i * 2u; - u32 r1 = r0 + 1u; - if (wi >= AA_PROLOGUE_WORDS) goto overflow; - words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u)); - } - for (u32 i = 0; i < n_fp_pairs; ++i) { - u32 r0 = 8u + i * 2u; - u32 r1 = r0 + 1u; - if (wi >= AA_PROLOGUE_WORDS) goto overflow; - words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u)); - } - if (0) { - overflow: - compiler_panic( - t->c, a->loc, - "aarch64: prologue placeholder too small (used %u of %u words)", wi, - AA_PROLOGUE_WORDS); - } - - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) { - patch32(obj, sec, pos + i * 4u, words[i]); - } - - /* Patch each alloca's `ADD dst, SP, #0` placeholder with the final - * max_outgoing offset, now that the high-water mark is known. */ - if (a->max_outgoing > 0xfff) { - compiler_panic( - t->c, a->loc, - "aarch64: max_outgoing %u out of imm12 range for alloca patch", - a->max_outgoing); - } - for (u32 i = 0; i < a->nadd_patches; ++i) { - u32 dr = a->add_patches[i].dst_reg; - u32 word = aa64_add_imm(1, dr, /*Rn=SP*/ 31, a->max_outgoing, 0); - patch32(obj, sec, a->add_patches[i].pos, word); - } - - /* Define the function symbol. */ - u32 end = mc->pos(mc); - obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start, - (u64)(end - a->func_start)); - - mc->cfi_endproc(mc); - a->fd = NULL; -} - -/* ---- registers / frame ---- */ - -static Reg aa_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { - AAImpl* a = impl_of(t); - (void)ty; - /* Lowest-bit-first allocation hands out callee-saves before caller- - * saves on the FP side (v8..v15 then v16..v23) — short-lived - * materializations (e.g. j06 building 9 FP arg regs with no - * intervening call) thus reach into the caller-saved range. */ - if (cls == RC_INT) return regpool_alloc(&a->int_pool); - if (cls == RC_FP) return regpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls); -} - -static void aa_free_reg(CGTarget* t, Reg r, RegClass cls) { - AAImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: - compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl", - (int)cls); - } - int rc = regpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, - "aarch64 free_reg: reg %u already free in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); - } - compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} - -static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d) { - AAImpl* a = impl_of(t); - if (a->nslots == a->slots_cap) { - u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; - AASlot* nbuf = arena_array(t->c->tu, AASlot, ncap); - if (a->slots) memcpy(nbuf, a->slots, sizeof(AASlot) * a->nslots); - a->slots = nbuf; - a->slots_cap = ncap; - } - u32 size = d->size ? d->size : 8; - u32 align = d->align ? d->align : 1; - u32 next = a->cum_off + size; - /* Round up so that slot start (= fp - off) is align-aligned. fp is - * 16-aligned, so requiring off aligned to `align` suffices. */ - u32 mask = align - 1; - next = (next + mask) & ~mask; - - AASlot* s = &a->slots[a->nslots]; - s->off = next; - s->size = size; - s->align = align; - s->kind = d->kind; - - a->cum_off = next; - a->nslots++; - return (FrameSlot)(a->nslots); /* 1-based; FRAME_SLOT_NONE == 0 */ -} - -static AASlot* slot_get(AAImpl* a, FrameSlot fs) { - if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; - return &a->slots[fs - 1]; -} - -/* ---- param: store incoming arg(s) into the home slot ---- */ - -static void aa_param(CGTarget* t, const CGParamDesc* p) { - AAImpl* a = impl_of(t); - AASlot* s = slot_get(a, p->slot); - if (!s) { - compiler_panic(t->c, a->loc, "aarch64 param: bad slot"); - } - const ABIArgInfo* ai = p->abi; - - if (ai->kind == ABI_ARG_IGNORE) return; - if (ai->kind == ABI_ARG_INDIRECT) { - /* Caller passes a pointer to a copy. Materialize that pointer - * into a scratch reg, then memcpy `s->size` bytes from there - * into the slot — so subsequent LOCAL_op(slot) reads/writes the - * struct contents directly, not the pointer. */ - u32 ptr_reg; - if (a->next_param_int < 8) { - ptr_reg = a->next_param_int++; - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off))); - ptr_reg = 9; - } - u32 nbytes = s->size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i)); - emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i)); - emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i)); - emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i)); - emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i)); - i += 1; - } - return; - } - /* DIRECT: place each part. */ - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 part_off = pt->src_offset; - u32 sz = pt->size; - u32 sidx = size_idx_for_bytes(sz); - - if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 8) { - u32 reg = a->next_param_int++; - emit32(t->mc, aa64_stur(sidx, reg, 29, -(i32)s->off + (i32)part_off)); - } else { - /* Each stack-passed slot is 8 bytes regardless of part size. */ - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit32(t->mc, aa64_ldur(sidx, 9, 29, (i32)(16 + caller_off))); - emit32(t->mc, aa64_stur(sidx, 9, 29, -(i32)s->off + (i32)part_off)); - } - } else if (pt->cls == ABI_CLASS_FP) { - if (a->next_param_fp < 8) { - u32 reg = a->next_param_fp++; - emit32(t->mc, - aa64_stur_fp(sidx, reg, 29, -(i32)s->off + (i32)part_off)); - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit32(t->mc, aa64_ldur_fp(sidx, 0, 29, (i32)(16 + caller_off))); - emit32(t->mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off + (i32)part_off)); - } - } else { - compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - aa_panic(t, "clobbers"); -} - -static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out, - RegClass* cls_out) { - (void)t; - size_t len = 0; - const char* s = pool_str(t->c->global, name, &len); - if (!s || !len) return 1; - /* pool_str does not guarantee NUL-termination; copy into a small buffer. */ - char buf[8]; - if (len >= sizeof buf) return 1; - memcpy(buf, s, len); - buf[len] = '\0'; - u32 dwarf; - if (aa64_register_index(buf, &dwarf) != 0) return 1; - if (dwarf <= 30u) { /* x0..x30 */ - if (out) *out = (Reg)dwarf; - if (cls_out) *cls_out = RC_INT; - return 0; - } - if (dwarf >= 64u && dwarf <= 95u) { /* v0..v31 */ - if (out) *out = (Reg)(dwarf - 64u); - if (cls_out) *cls_out = RC_FP; - return 0; - } - /* sp/pc and others — not allocatable, treat as unresolvable. */ - return 1; -} -static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot, - MemAccess ma) { - AAImpl* a = impl_of(t); - if (src.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "aarch64 spill_reg: src is not OPK_REG"); - } - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - aa_store(t, addr, src, ma); - aa_free_reg(t, src.v.reg, src.cls); -} - -static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, - MemAccess ma) { - AAImpl* a = impl_of(t); - if (dst.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "aarch64 reload_reg: dst is not OPK_REG"); - } - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - aa_load(t, dst, addr, ma); -} - -/* ---- labels / control flow ---- - * - * Label is a transparent wrapper around MCLabel — the MCEmitter already - * tracks placement and applies pending fixups. Jumps emit a B with - * imm26=0 paired with R_AARCH64_JUMP26; conditional branches emit a - * B.cond with imm19=0 paired with R_AARCH64_CONDBR19. */ - -static Label aa_label_new(CGTarget* t) { - return (Label)t->mc->label_new(t->mc); -} - -static void aa_label_place(CGTarget* t, Label l) { - t->mc->label_place(t->mc, (MCLabel)l); -} - -static void aa_jump(CGTarget* t, Label l) { - MCEmitter* mc = t->mc; - emit32(mc, aa64_b_base()); - mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0); -} - -/* Map CmpOp → AArch64 condition code. Boolean (i1) "true" means take the - * branch / set 1. */ -static u32 cmp_to_cond(CmpOp op) { - switch (op) { - case CMP_EQ: - return 0x0u; /* EQ */ - case CMP_NE: - return 0x1u; /* NE */ - case CMP_LT_U: - return 0x3u; /* CC/LO */ - case CMP_LE_U: - return 0x9u; /* LS */ - case CMP_GT_U: - return 0x8u; /* HI */ - case CMP_GE_U: - return 0x2u; /* CS/HS */ - case CMP_LT_S: - return 0xbu; /* LT */ - case CMP_LE_S: - return 0xdu; /* LE */ - case CMP_GT_S: - return 0xcu; /* GT */ - case CMP_GE_S: - return 0xau; /* GE */ - /* FP compares route through FCMP, not yet exercised here. */ - default: - return 0x0u; - } -} - -/* Emit CMP a, b (= SUBS ZR, a, b). Uses the 12-bit-imm form when `b` is - * an OPK_IMM that fits; otherwise materializes through scratch x9/x10 - * and uses the shifted-register form. CMP is not commutative across the - * condition codes, so an IMM-on-LHS still materializes (the caller has - * to swap the cond if it wants to swap the operands). Width comes from - * `a`; signedness lives in the cond. */ -static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { - MCEmitter* mc = t->mc; - u32 sf = type_is_64(a_op.type) ? 1u : 0u; - if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { - u32 imm12, sh; - if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) { - u32 rn = force_reg_int(t, a_op, sf, 9); - emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh)); - return; - } - } - u32 rn = force_reg_int(t, a_op, sf, 9); - u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u); - emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, rn, rm)); -} - -static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, - Label l) { - MCEmitter* mc = t->mc; - emit_cmp_ab(t, a, b); - emit32(mc, aa64_b_cond(cmp_to_cond(op))); - mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0); -} - -static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { - emit_cmp_ab(t, a, b); - u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; - emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op))); -} - -/* ---- structured scopes (SCOPE_IF only for v1) ---- */ - -static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) { - AAImpl* a = impl_of(t); - if (a->nscopes == a->scopes_cap) { - u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; - AAScope* nb = arena_array(t->c->tu, AAScope, ncap); - if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes); - a->scopes = nb; - a->scopes_cap = ncap; - } - AAScope* sc = &a->scopes[a->nscopes]; - sc->kind = (u8)d->kind; - sc->has_else = 0; - sc->else_label = 0; - sc->end_label = 0; - sc->break_label = d->break_label; - sc->continue_label = d->continue_label; - - if (d->kind == SCOPE_IF) { - sc->else_label = t->mc->label_new(t->mc); - sc->end_label = t->mc->label_new(t->mc); - /* Test cond against zero, branch to else_label on EQ (false). */ - u32 sf = type_is_64(d->cond.type) ? 1u : 0u; - u32 rn = force_reg_int(t, d->cond, sf, 9); - emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, rn, 0)); - emit32(t->mc, aa64_b_cond(0x0u /*EQ*/)); - t->mc->emit_label_ref(t->mc, sc->else_label, R_AARCH64_CONDBR19, 4, 0); - } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { - /* Structured loop/block: bookkeep only. The caller drives - * label_place + jump itself; break_to/continue_to forward to the - * recorded labels. No instructions emitted here. */ - } else { - compiler_panic(t->c, a->loc, - "aarch64 scope_begin: kind %d not yet implemented", - (int)d->kind); - } - - a->nscopes++; - return (CGScope)a->nscopes; /* 1-based */ -} - -static void aa_scope_else(CGTarget* t, CGScope s) { - AAImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u", - (unsigned)s); - } - AAScope* sc = &a->scopes[s - 1]; - /* End of the then-arm: jump past the else body. */ - emit32(t->mc, aa64_b_base()); - t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0); - /* Begin of the else-arm. */ - t->mc->label_place(t->mc, sc->else_label); - sc->has_else = 1; -} - -static void aa_scope_end(CGTarget* t, CGScope s) { - AAImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u", - (unsigned)s); - } - AAScope* sc = &a->scopes[s - 1]; - if (sc->kind == SCOPE_IF) { - if (!sc->has_else) { - /* No else body — false-branch lands at scope_end. */ - t->mc->label_place(t->mc, sc->else_label); - } - t->mc->label_place(t->mc, sc->end_label); - } - /* SCOPE_LOOP / SCOPE_BLOCK: caller has already placed the break_label. */ -} - -static void aa_break_to(CGTarget* t, CGScope s) { - AAImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "aarch64 break_to: bad scope %u", (unsigned)s); - } - AAScope* sc = &a->scopes[s - 1]; - aa_jump(t, sc->break_label); -} - -static void aa_continue_to(CGTarget* t, CGScope s) { - AAImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "aarch64 continue_to: bad scope %u", - (unsigned)s); - } - AAScope* sc = &a->scopes[s - 1]; - aa_jump(t, sc->continue_label); -} - -/* ---- data movement ---- */ - -static void aa_load_imm(CGTarget* t, Operand dst, i64 imm) { - u32 sf = type_is_64(dst.type) ? 1u : 0u; - emit_load_imm(t->mc, sf, reg_num(dst), imm); -} - -/* load_const: emit ADRP + LDR Sd, [Xt, #:lo12:sym] against a fresh - * symbol in .rodata. Used by b08 to materialize a float bit pattern. */ -static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) { - AAImpl* a = impl_of(t); - if (dst.cls != RC_FP) { - compiler_panic(t->c, a->loc, "aarch64 load_const: only FP supported in v1"); - } - - /* Find or create .rodata. obj_align_to bumps the section's recorded - * align as a side effect of placement, so we pass 1 here. */ - Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); - ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); - - u32 cur_section = t->mc->section_id; - t->mc->set_section(t->mc, ro); - u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); - t->mc->emit_bytes(t->mc, cb.bytes, cb.size); - - /* Local symbol pointing at the literal. */ - char namebuf[64]; - static u32 lit_seq = 0; - int len = 0; - { - const char* prefix = ".LCFP"; - for (; prefix[len]; ++len) namebuf[len] = prefix[len]; - u32 v = lit_seq++; - char tmp[16]; - int tn = 0; - if (v == 0) - tmp[tn++] = '0'; - else { - while (v) { - tmp[tn++] = '0' + (char)(v % 10); - v /= 10; - } - } - for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; - namebuf[len] = 0; - } - Sym sname = pool_intern_cstr(t->c->global, namebuf); - ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, - (u64)cb.size); - - t->mc->set_section(t->mc, cur_section); - - /* ADRP X9, sym ; LDR Sd, [X9, #:lo12:sym] */ - u32 adrp_pos = t->mc->pos(t->mc); - emit32(t->mc, aa64_adrp_base(9)); - t->mc->emit_reloc_at(t->mc, cur_section, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, - sym, 0, 0, 0); - - u32 ldr_pos = t->mc->pos(t->mc); - u32 sidx = (cb.size == 8) ? 3u : 2u; - emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0)); - RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC - : R_AARCH64_LDST32_ABS_LO12_NC; - t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0); -} - -static void aa_copy(CGTarget* t, Operand dst, Operand src) { - if (dst.cls == RC_FP || src.cls == RC_FP) { - u32 type = type_is_fp_double(dst.type) ? 1u : 0u; - emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src))); - return; - } - u32 sf = type_is_64(dst.type) ? 1u : 0u; - emit32(t->mc, aa64_mov_reg(sf, reg_num(dst), reg_num(src))); -} - -/* ---- load / store / addr_of ---- */ - -/* Reloc kind for an LDR/STR (immediate, unsigned offset) of `nbytes`. */ -static RelocKind ldst_lo12_reloc_for(u32 nbytes) { - switch (nbytes) { - case 1: - return R_AARCH64_LDST8_ABS_LO12_NC; - case 2: - return R_AARCH64_LDST16_ABS_LO12_NC; - case 4: - return R_AARCH64_LDST32_ABS_LO12_NC; - case 8: - return R_AARCH64_LDST64_ABS_LO12_NC; - default: - return R_AARCH64_LDST64_ABS_LO12_NC; - } -} - -/* Forward decl: addend fixup after a GOT load lands here when the - * addend doesn't fit in a single imm12. Defined just below. */ -static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off); - -/* True when the symbol must be reached via a GOT indirection slot at - * this site: an undefined external on a target format that binds extern - * data through __got / non-lazy pointers (Mach-O today). The policy - * lives behind obj_format_extern_via_got so the backend never names a - * specific OS/format. - * - * The "is undefined" test keys on section_id == OBJ_SEC_NONE — the - * canonical marker per obj.h. SK_UNDEF as a kind is reserved for - * symbols whose kind isn't known yet; the decl pass mints externs - * with their intended SK_OBJ / SK_FUNC kind plus OBJ_SEC_NONE. */ -static int use_got_for_sym(CGTarget* t, ObjSymId sym) { - return obj_symbol_extern_via_got(t->c, t->obj, sym); -} - -/* Emit `ADRP dst, sym@GOTPAGE ; LDR Xdst, [dst, #sym@GOTPAGEOFF]`, - * leaving the runtime address of `sym` in `dst_reg`. Addends are - * deliberately omitted from the GOT relocs — most loaders disallow - * nonzero addends on GOT-load fixups — so callers add any displacement - * with a follow-on ADD/LDUR/STUR. */ -static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - u32 adrp_pos = mc->pos(mc); - emit32(mc, aa64_adrp_base(dst_reg)); - mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_GOT_PAGE, sym, 0, 0, 0); - u32 ldr_pos = mc->pos(mc); - emit32(mc, aa64_ldr_uimm(/*size=*/3, dst_reg, dst_reg, 0)); - mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LD64_GOT_LO12_NC, sym, 0, 0, 0); -} - -/* Materialize &sym+addend into `dst_reg` via ADRP + ADD (LO12_NC), or - * ADRP + LDR-from-GOT + (optional) ADD when the symbol must route - * through an indirection slot. */ -static void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym, - i64 addend) { - MCEmitter* mc = t->mc; - if (use_got_for_sym(t, sym)) { - emit_got_load_addr(t, dst_reg, sym); - if (addend) emit_addr_adjust(mc, dst_reg, dst_reg, (i32)addend); - return; - } - u32 sec = mc->section_id; - u32 adrp_pos = mc->pos(mc); - emit32(mc, aa64_adrp_base(dst_reg)); - mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, addend, - 0, 0); - u32 add_pos = mc->pos(mc); - emit32(mc, aa64_add_imm(1, dst_reg, dst_reg, 0, 0)); - mc->emit_reloc_at(mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, addend, 0, - 0); -} - -/* Materialize a SUB/ADD imm sequence that puts (base ± abs_off) into Rd. - * abs_off must be representable as imm12 or imm12<<12 (or the sum). For - * larger offsets, falls back to MOV+ADD via emit_load_imm. */ -static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off) { - if (off == 0) { - emit32(mc, aa64_mov_reg(1, Rd, base)); - return; - } - u32 abs_off = (off < 0) ? (u32)(-off) : (u32)off; - /* Single imm12. */ - if (abs_off <= 0xfff) { - if (off < 0) - emit32(mc, aa64_sub_imm(1, Rd, base, abs_off, 0)); - else - emit32(mc, aa64_add_imm(1, Rd, base, abs_off, 0)); - return; - } - /* Two-shift form: hi12 + lo12 (when low is zero, hi only). */ - if ((abs_off >> 24) == 0) { - u32 hi = (abs_off >> 12) & 0xfff; - u32 lo = abs_off & 0xfff; - if (off < 0) { - if (hi) emit32(mc, aa64_sub_imm(1, Rd, base, hi, 1)); - if (lo) emit32(mc, aa64_sub_imm(1, Rd, hi ? Rd : base, lo, 0)); - } else { - if (hi) emit32(mc, aa64_add_imm(1, Rd, base, hi, 1)); - if (lo) emit32(mc, aa64_add_imm(1, Rd, hi ? Rd : base, lo, 0)); - } - return; - } - /* Generic: load constant into Rd, then add. */ - emit_load_imm(mc, 1, Rd, off); - emit32(mc, aa64_add(1, Rd, base, Rd)); -} - -/* Resolve an address operand (LOCAL or INDIRECT) into (base_reg, signed - * offset) via a possibly-temporary base register. Returns the base reg. - * Frames larger than the STUR/LDUR ±256 window land here via tmp_reg — - * the caller passes 0 as offset and uses the returned register directly. */ -static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { - AAImpl* a = impl_of(t); - if (addr.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot"); - i32 off = -(i32)s->off; - if (off >= -256 && off <= 255) { - *out_off = off; - return 29; /* x29 = fp */ - } - /* Out of STUR range — synthesize the address into tmp_reg. */ - emit_addr_adjust(t->mc, tmp_reg, 29, off); - *out_off = 0; - return tmp_reg; - } - if (addr.kind == OPK_INDIRECT) { - i32 off = addr.v.ind.ofs; - u32 base = addr.v.ind.base & 0x1f; - if (off >= -256 && off <= 255) { - *out_off = off; - return base; - } - emit_addr_adjust(t->mc, tmp_reg, base, off); - *out_off = 0; - return tmp_reg; - } - if (addr.kind == OPK_GLOBAL) { - emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend); - *out_off = 0; - return tmp_reg; - } - compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d", - (int)addr.kind); -} - -static void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - u32 sidx = size_idx_for_bytes(sz); - - /* OPK_GLOBAL: ADRP scratch, sym ; LDR Wd, [scratch, #:lo12:sym]. - * The LO12_NC reloc requires the scaled-offset LDR encoding, not LDUR. - * - * Extern-via-GOT path: ADRP scratch, sym@GOTPAGE ; - * LDR Xscratch, [scratch, #:gotoff:sym] ; LDUR Wd, [scratch, #addend] - * The GOT load returns the symbol's runtime address; we then read the - * value at +addend with a plain LDUR (no reloc, addend baked in). */ - if (addr.kind == OPK_GLOBAL) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - ObjSymId sym = addr.v.global.sym; - i64 add = addr.v.global.addend; - if (use_got_for_sym(t, sym)) { - emit_got_load_addr(t, /*dst=*/9, sym); - if (dst.cls == RC_FP) { - emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add)); - } else { - emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add)); - } - return; - } - u32 adrp_pos = mc->pos(mc); - emit32(mc, aa64_adrp_base(/*Rd=*/9)); - mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add, - 0, 0); - u32 ld_pos = mc->pos(mc); - if (dst.cls == RC_FP) { - emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0)); - } else { - emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), 9, 0)); - } - mc->emit_reloc_at(mc, sec, ld_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0); - return; - } - - i32 off; - u32 base = addr_base(t, addr, &off, 9); - if (dst.cls == RC_FP) { - emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off)); - } else { - emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off)); - } -} - -static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - u32 sidx = size_idx_for_bytes(sz); - - /* OPK_GLOBAL: ADRP scratch, sym ; STR Wt, [scratch, #:lo12:sym]. - * For OPK_IMM source, materialize the value first into x9, then use - * x10 for the global base so the two scratches don't collide. - * - * Extern-via-GOT path: load the symbol's runtime address into the - * base scratch via emit_got_load_addr, then STUR with addend baked - * into the imm9 (no reloc on the store). */ - if (addr.kind == OPK_GLOBAL) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - ObjSymId sym = addr.v.global.sym; - i64 add = addr.v.global.addend; - - u32 src_reg; - u32 src_is_fp = 0; - if (src.kind == OPK_IMM) { - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(mc, sf, /*Rd=*/9, src.v.imm); - src_reg = 9; - } else if (src.cls == RC_FP) { - src_reg = reg_num(src); - src_is_fp = 1; - } else { - src_reg = reg_num(src); - } - u32 base = (src.kind == OPK_IMM) ? 10u : 9u; - if (use_got_for_sym(t, sym)) { - emit_got_load_addr(t, base, sym); - if (src_is_fp) { - emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add)); - } else { - emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add)); - } - return; - } - u32 adrp_pos = mc->pos(mc); - emit32(mc, aa64_adrp_base(base)); - mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add, - 0, 0); - u32 st_pos = mc->pos(mc); - if (src_is_fp) { - emit32(mc, aa64_str_fp_uimm(sidx, src_reg, base, 0)); - } else { - emit32(mc, aa64_str_uimm(sidx, src_reg, base, 0)); - } - mc->emit_reloc_at(mc, sec, st_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0); - return; - } - - i32 off; - /* For OPK_IMM source we need x9 to materialize the value, so the - * address synthesis (addr_base fallback) lands in x10. Otherwise x9 - * is free. */ - u32 addr_tmp = (src.kind == OPK_IMM) ? 10u : 9u; - u32 base = addr_base(t, addr, &off, addr_tmp); - - if (src.kind == OPK_IMM) { - /* Materialize through a scratch register. Use x9 (caller-saved). */ - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(t->mc, sf, 9, src.v.imm); - emit32(t->mc, aa64_stur(sidx, 9, base, off)); - return; - } - if (src.cls == RC_FP) { - emit32(t->mc, aa64_stur_fp(sidx, reg_num(src), base, off)); - } else { - emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off)); - } -} - -static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) { - AAImpl* a = impl_of(t); - if (lv.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, lv.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot"); - /* dst = x29 - off */ - emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0)); - return; - } - if (lv.kind == OPK_INDIRECT) { - i32 ofs = lv.v.ind.ofs; - u32 base = lv.v.ind.base & 0x1f; - if (ofs == 0) { - emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base)); - } else if (ofs > 0 && ofs <= 0xfff) { - emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0)); - } else if (ofs < 0 && -ofs <= 0xfff) { - emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0)); - } else { - compiler_panic(t->c, a->loc, - "aarch64 addr_of: indirect offset %d unsupported", ofs); - } - return; - } - if (lv.kind == OPK_GLOBAL) { - /* ADRP Xd, sym ; ADD Xd, Xd, #:lo12:sym (with addend baked into both - * relocations). Used to materialize a function or data pointer. - * - * Extern-via-GOT path: load the address from the GOT slot and then - * apply the addend with a plain ADD/SUB (GOT relocs disallow addends). */ - u32 rd = reg_num(dst); - ObjSymId sym = lv.v.global.sym; - i64 addend = lv.v.global.addend; - if (use_got_for_sym(t, sym)) { - emit_got_load_addr(t, rd, sym); - if (addend) emit_addr_adjust(t->mc, rd, rd, (i32)addend); - return; - } - u32 sec = t->mc->section_id; - u32 adrp_pos = t->mc->pos(t->mc); - emit32(t->mc, aa64_adrp_base(rd)); - t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, - addend, 0, 0); - u32 add_pos = t->mc->pos(t->mc); - emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0)); - t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, - addend, 0, 0); - return; - } - aa_panic(t, "addr_of"); -} - -/* AArch64 TLS Local-Exec materialization. - * mrs xtmp, tpidr_el0 - * add xdst, xtmp, #:tprel_hi12:sym, lsl #12 - * add xdst, xdst, #:tprel_lo12_nc:sym - * The two ADDs carry HI12 / LO12_NC TLSLE relocations; the linker fills in - * the per-target TP-relative offset (image offset + AARCH64_TCB_SIZE). */ -static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - u32 rd = reg_num(dst); - - /* Read thread pointer into x9 (scratch). */ - emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9)); - - /* add xdst, x9, #:tprel_hi12:sym, lsl #12 */ - u32 hi_pos = mc->pos(mc); - emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/9, /*imm12=*/0, /*sh=*/1)); - mc->emit_reloc_at(mc, sec, hi_pos, R_AARCH64_TLSLE_ADD_TPREL_HI12, sym, - addend, 0, 0); - - /* add xdst, xdst, #:tprel_lo12_nc:sym */ - u32 lo_pos = mc->pos(mc); - emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/rd, /*imm12=*/0, /*sh=*/0)); - mc->emit_reloc_at(mc, sec, lo_pos, R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, sym, - addend, 0, 0); -} - -/* Resolve a dst/src address operand for the aggregate ops below. - * Accepts OPK_REG (already a pointer) and OPK_LOCAL (= fp - off); - * for OPK_LOCAL we materialize the address into a scratch register. */ -static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { - if (op.kind == OPK_REG) return reg_num(op); - if (op.kind == OPK_LOCAL) { - AAImpl* a = impl_of(t); - AASlot* s = slot_get(a, op.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot"); - emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0)); - return scratch; - } - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 agg: address kind %d unsupported", (int)op.kind); -} - -static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr, - AggregateAccess agg) { - MCEmitter* mc = t->mc; - u32 dr = agg_addr_reg(t, dst_addr, 9); - u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u); - u32 nbytes = agg.size; - u32 i = 0; - /* Unrolled per-element copy through scratch x12. We use unscaled - * LDUR/STUR so we don't depend on `agg.align` for legality. */ - while (i + 8 <= nbytes) { - emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); - emit32(mc, aa64_stur(3, 12, dr, (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); - emit32(mc, aa64_stur(2, 12, dr, (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); - emit32(mc, aa64_stur(1, 12, dr, (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); - emit32(mc, aa64_stur(0, 12, dr, (i32)i)); - i += 1; - } -} - -static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value, - AggregateAccess agg) { - MCEmitter* mc = t->mc; - u32 dr = agg_addr_reg(t, dst_addr, 9); - - u32 byte; - if (byte_value.kind == OPK_IMM) { - byte = (u32)(byte_value.v.imm & 0xffu); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 set_bytes: REG byte not yet supported"); - } - u32 nbytes = agg.size; - - if (byte == 0) { - /* Use XZR/WZR directly — no broadcast register needed. */ - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, aa64_stur(3, 31, dr, (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, aa64_stur(2, 31, dr, (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, aa64_stur(1, 31, dr, (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, aa64_stur(0, 31, dr, (i32)i)); - i += 1; - } - return; - } - - /* Broadcast byte into x12 then strided-store. */ - u64 b64 = byte; - b64 |= b64 << 8; - b64 |= b64 << 16; - b64 |= b64 << 32; - emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64); - - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, aa64_stur(3, 12, dr, (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, aa64_stur(2, 12, dr, (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, aa64_stur(1, 12, dr, (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, aa64_stur(0, 12, dr, (i32)i)); - i += 1; - } -} - -static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, - BitFieldAccess bf) { - MCEmitter* mc = t->mc; - u32 base = agg_addr_reg(t, record_addr, 9); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - u32 sf = (storage_bytes == 8u) ? 1u : 0u; - u32 sidx = size_idx_for_bytes(storage_bytes); - u32 rd = reg_num(dst); - - /* Load the entire storage unit, then extract bf.bit_width bits at - * bf.bit_offset. UBFX (zero-extend) or SBFX (sign-extend) per the - * field's signedness. */ - emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset)); - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - u32 imms = lsb + width - 1u; - if (bf.signed_) { - emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms)); - } else { - emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms)); - } -} - -static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src, - BitFieldAccess bf) { - MCEmitter* mc = t->mc; - u32 base = agg_addr_reg(t, record_addr, 9); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - u32 sf = (storage_bytes == 8u) ? 1u : 0u; - u32 sidx = size_idx_for_bytes(storage_bytes); - - /* Read-modify-write through scratch registers x10 (storage) and x11 - * (the source value). */ - emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); - - u32 src_reg; - if (src.kind == OPK_IMM) { - emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm); - src_reg = 11u; - } else if (src.kind == OPK_REG) { - src_reg = reg_num(src); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 bitfield_store: src kind %d unsupported", - (int)src.kind); - } - - /* BFI Rd, Rn, #lsb, #width — insert width bits of Rn[0..width-1] - * starting at bit lsb of Rd. Encoded as BFM with - * immr = (RegSize - lsb) mod RegSize, imms = width - 1. */ - u32 reg_size = sf ? 64u : 32u; - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - u32 immr = (reg_size - lsb) % reg_size; - u32 imms = width - 1u; - emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms)); - - emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); -} - -/* ---- arithmetic ---- */ - -/* Force an Operand into a register, materializing immediates via x9. - * Returns the register number to use as Rn/Rm. */ -static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch) { - if (op.kind == OPK_REG) return reg_num(op); - if (op.kind == OPK_IMM) { - emit_load_imm(t->mc, sf, scratch, op.v.imm); - return scratch; - } - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 binop: operand kind %d unsupported", (int)op.kind); -} - -static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, - Operand b_op) { - MCEmitter* mc = t->mc; - - /* FP binops route through scalar FADD/FSUB/FMUL/FDIV. */ - if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { - if (a_op.kind != OPK_REG || b_op.kind != OPK_REG || dst.cls != RC_FP) { - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 binop: FP op requires REG operands"); - } - u32 type = type_is_fp_double(dst.type) ? 1u : 0u; - u32 rd = reg_num(dst); - u32 rn = reg_num(a_op); - u32 rm = reg_num(b_op); - u32 w; - switch (op) { - case BO_FADD: - w = aa64_fadd(type, rd, rn, rm); - break; - case BO_FSUB: - w = aa64_fsub(type, rd, rn, rm); - break; - case BO_FMUL: - w = aa64_fmul(type, rd, rn, rm); - break; - case BO_FDIV: - w = aa64_fdiv(type, rd, rn, rm); - break; - default: - w = 0; - break; /* unreachable */ - } - emit32(mc, w); - return; - } - - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 rd = reg_num(dst); - - /* Imm-form fast paths. For commutative ops (ADD/AND/OR/XOR), if the - * LHS is the IMM swap to canonicalize (REG, IMM) and try to encode. - * For SUB we don't swap — `SUB imm, reg` has no encoding without - * materializing. Shifts take the imm as the count and require RHS-IMM - * by definition. Anything that doesn't fit the encoding falls through - * to force_reg_int + the shifted-register form, preserving the old - * behavior. */ - u32 word; - switch (op) { - case BO_IADD: - case BO_AND: - case BO_OR: - case BO_XOR: { - if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { - Operand t_op = a_op; a_op = b_op; b_op = t_op; - } - break; - } - default: break; - } - - /* Try the imm-form before materializing. Each case sets `word` and - * jumps to emit; misses fall through to the reg path below. */ - if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { - u32 rn_reg = reg_num(a_op); - i64 imm = b_op.v.imm; - u32 imm12, sh, N, immr, imms; - switch (op) { - case BO_IADD: - if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { - emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh)); - return; - } - break; - case BO_ISUB: - if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { - emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh)); - return; - } - break; - case BO_AND: - if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { - emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms)); - return; - } - break; - case BO_OR: - if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { - emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms)); - return; - } - break; - case BO_XOR: - if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { - emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms)); - return; - } - break; - case BO_SHL: { - /* C shifts by ≥ width are UB but we don't exploit it; mask the - * count to width-1 to match the variable-shift behavior. */ - u32 width = sf ? 64u : 32u; - u32 sh_amt = (u32)((u64)imm & (width - 1u)); - if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) { - emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); - return; - } - break; - } - case BO_SHR_U: { - u32 width = sf ? 64u : 32u; - u32 sh_amt = (u32)((u64)imm & (width - 1u)); - if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) { - emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); - return; - } - break; - } - case BO_SHR_S: { - u32 width = sf ? 64u : 32u; - u32 sh_amt = (u32)((u64)imm & (width - 1u)); - if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) { - emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms)); - return; - } - break; - } - default: break; - } - } - - u32 rn = force_reg_int(t, a_op, sf, 9); - u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9); - - switch (op) { - case BO_IADD: - word = aa64_add(sf, rd, rn, rm); - break; - case BO_ISUB: - word = aa64_sub(sf, rd, rn, rm); - break; - case BO_IMUL: - word = aa64_mul(sf, rd, rn, rm); - break; - case BO_AND: - word = aa64_and(sf, rd, rn, rm); - break; - case BO_OR: - word = aa64_orr(sf, rd, rn, rm); - break; - case BO_XOR: - word = aa64_eor(sf, rd, rn, rm); - break; - case BO_SHL: - word = aa64_lslv(sf, rd, rn, rm); - break; - case BO_SHR_U: - word = aa64_lsrv(sf, rd, rn, rm); - break; - case BO_SHR_S: - word = aa64_asrv(sf, rd, rn, rm); - break; - case BO_UDIV: - word = aa64_udiv(sf, rd, rn, rm); - break; - case BO_SDIV: - word = aa64_sdiv(sf, rd, rn, rm); - break; - /* rem = a - (a/b)*b → SDIV/UDIV into x11, then MSUB rd, x11, b, a. */ - case BO_SREM: - emit32(mc, aa64_sdiv(sf, 11, rn, rm)); - word = aa64_msub(sf, rd, 11, rm, rn); - break; - case BO_UREM: - emit32(mc, aa64_udiv(sf, 11, rn, rm)); - word = aa64_msub(sf, rd, 11, rm, rn); - break; - case BO_FADD: - case BO_FSUB: - case BO_FMUL: - case BO_FDIV: - default: - compiler_panic(t->c, impl_of(t)->loc, "aarch64 binop: op %d unimpl", - (int)op); - } - emit32(mc, word); -} - -static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { - MCEmitter* mc = t->mc; - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 rd = reg_num(dst); - /* OPK_IMM is legal per the CGTarget contract (arch.h); force_reg_int - * materializes into x9 when the operand isn't already a register. - * cg folds literal unops upstream (cg_fold_unop), so the IMM path - * here is only reached from opt's emit when the IR carries an - * unfolded literal — still a contract case we must honor. */ - u32 rn = force_reg_int(t, a_op, sf, 9); - u32 word; - - switch (op) { - case UO_NEG: - word = aa64_neg(sf, rd, rn); - break; - case UO_BNOT: - word = aa64_mvn(sf, rd, rn); - break; - case UO_NOT: - /* !x → cmp Xn, #0 ; cset Xd, EQ */ - emit32(mc, aa64_subs_imm(sf, /*ZR=*/31, rn, 0)); - word = aa64_cset_eq(sf, rd); - break; - default: - compiler_panic(t->c, impl_of(t)->loc, "aarch64 unop: op %d unimpl", - (int)op); - } - emit32(mc, word); -} - -static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 rd = reg_num(dst); - u32 rn = reg_num(src); - - switch (k) { - case CV_SEXT: { - if (src.cls != RC_INT || dst.cls != RC_INT) { - compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes"); - } - u32 src_bits = type_byte_size(src.type) * 8u; - u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; - emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); - return; - } - case CV_ZEXT: { - if (src.cls != RC_INT || dst.cls != RC_INT) { - compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes"); - } - u32 src_bits = type_byte_size(src.type) * 8u; - if (src_bits == 32u) { - /* MOV Wd, Wn auto-zero-extends into the X register. */ - emit32(mc, aa64_mov_reg(0, rd, rn)); - } else { - emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); - } - return; - } - case CV_TRUNC: { - /* Reading the W view of any X register zeros the upper 32 bits. - * For narrower truncations the consumer (store / ret) selects - * the byte width — leaving extra high bits is harmless. */ - emit32(mc, aa64_mov_reg(0, rd, rn)); - return; - } - case CV_ITOF_S: { - u32 sf_src = type_is_64(src.type) ? 1u : 0u; - u32 type = type_is_fp_double(dst.type) ? 1u : 0u; - emit32(mc, aa64_scvtf(sf_src, type, rd, rn)); - return; - } - case CV_ITOF_U: { - u32 sf_src = type_is_64(src.type) ? 1u : 0u; - u32 type = type_is_fp_double(dst.type) ? 1u : 0u; - emit32(mc, aa64_ucvtf(sf_src, type, rd, rn)); - return; - } - case CV_FTOI_S: { - if (src.cls != RC_FP || dst.cls != RC_INT) { - compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes"); - } - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 type = type_is_fp_double(src.type) ? 1u : 0u; - emit32(mc, aa64_fcvtzs(sf, type, rd, rn)); - return; - } - case CV_FTOI_U: { - if (src.cls != RC_FP || dst.cls != RC_INT) { - compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes"); - } - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 type = type_is_fp_double(src.type) ? 1u : 0u; - emit32(mc, aa64_fcvtzu(sf, type, rd, rn)); - return; - } - case CV_FEXT: { - /* float (S) → double (D). */ - emit32(mc, aa64_fcvt_d_s(rd, rn)); - return; - } - case CV_FTRUNC: { - /* double (D) → float (S). */ - emit32(mc, aa64_fcvt_s_d(rd, rn)); - return; - } - case CV_BITCAST: { - /* Same-size cross-class reinterpret (i32↔f32, i64↔f64). */ - if (src.cls == RC_INT && dst.cls == RC_FP) { - u32 sz = type_byte_size(dst.type); - emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn) : aa64_fmov_s_w(rd, rn)); - } else if (src.cls == RC_FP && dst.cls == RC_INT) { - u32 sz = type_byte_size(src.type); - emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn) : aa64_fmov_w_s(rd, rn)); - } else { - compiler_panic(t->c, a->loc, - "aarch64 convert BITCAST: same-class not yet supported"); - } - return; - } - default: - compiler_panic(t->c, a->loc, "aarch64 convert kind %d unimpl", (int)k); - } -} - -/* ---- calls / return ---- */ - -/* Materialize a CGABIValue into the outgoing argument slots: register - * arguments go to x0..x7 / v0..v7; overflow goes to [sp, #stack_off]. - * For BYVAL/INDIRECT the caller's `storage` is the address of the source - * data; we either load chunks into the next register pair (DIRECT - * aggregate) or pass the address itself (INDIRECT). */ -static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, - const CGABIValue* av, u32* next_int, u32* next_fp, - u32* stack_off) { - AAImpl* a = impl_of(t); - /* Synthesize a one-part DIRECT ABIArgInfo for var args (av->abi is NULL - * past the fixed-param count). AAPCS64 routes var args through the same - * register/stack rules as fixed scalars, so this matches what - * abi_func_info would have produced. - * - * Apple ARM64 diverges: variadic args go on the stack only. The - * ABIFuncInfo.vararg_on_stack trait carries that policy out of the - * backend — we bump the next-int / next-fp cursors past the register - * pool so the part below falls through to stack placement. */ - ABIArgInfo va_ai; - ABIArgPart va_pt; - const ABIArgInfo* ai = av->abi; - if (!ai) { - u32 sz = type_byte_size(av->type); - memset(&va_ai, 0, sizeof va_ai); - memset(&va_pt, 0, sizeof va_pt); - va_ai.kind = ABI_ARG_DIRECT; - va_ai.parts = &va_pt; - va_ai.nparts = 1; - va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; - va_pt.size = sz; - va_pt.align = sz; - va_pt.src_offset = 0; - ai = &va_ai; - if (fi && fi->vararg_on_stack) { - *next_int = 8; - *next_fp = 8; - } - } - if (ai->kind == ABI_ARG_IGNORE) return; - - if (ai->kind == ABI_ARG_INDIRECT) { - /* Pass the address of the storage. storage is OPK_LOCAL holding - * the byval source. */ - u32 dst_reg; - int to_stack = (*next_int >= 8); - if (!to_stack) - dst_reg = (*next_int)++; - else - dst_reg = 9; - if (av->storage.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot"); - emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0)); - } else if (av->storage.kind == OPK_INDIRECT) { - /* BYVAL from an indirect lvalue: pass the address `base + ind.ofs` - * itself in the arg register. */ - emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f, - av->storage.v.ind.ofs); - } else { - compiler_panic(t->c, a->loc, - "aarch64 call: INDIRECT arg storage kind %d unsupported", - (int)av->storage.kind); - } - if (to_stack) { - emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); - *stack_off += 8; - } - return; - } - - /* DIRECT — possibly multiple parts. */ - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 sz = pt->size; - u32 sidx = size_idx_for_bytes(sz); - - if (pt->cls == ABI_CLASS_INT) { - int to_stack = (*next_int >= 8); - u32 dst_reg = to_stack ? 9u : (*next_int)++; - /* Source bits for this part. */ - switch (av->storage.kind) { - case OPK_IMM: { - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(t->mc, sf, dst_reg, av->storage.v.imm); - break; - } - case OPK_REG: { - u32 sf = (sz == 8) ? 1u : 0u; - emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage))); - break; - } - case OPK_LOCAL: { - /* BYVAL aggregate carried in registers: load chunks from - * the source local's address + part->src_offset. */ - AASlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad arg slot"); - i32 off = -(i32)s->off + (i32)pt->src_offset; - emit32(t->mc, aa64_ldur(sidx, dst_reg, 29, off)); - break; - } - case OPK_INDIRECT: { - /* Source is `[base + ind.ofs]`. Load each part from - * `[base, ind.ofs + part->src_offset]`. The cg layer hands out - * INDIRECT base regs from the callee-saved pool (x19..x28), - * which is disjoint from arg regs (x0..x7) and the x9 scratch, - * so the base survives every iteration of the part loop. */ - Operand src; - memset(&src, 0, sizeof src); - src.kind = OPK_INDIRECT; - src.v.ind.base = av->storage.v.ind.base; - src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, /*tmp=*/9); - emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off)); - break; - } - default: - compiler_panic(t->c, a->loc, - "aarch64 call: arg storage kind %d unsupported", - (int)av->storage.kind); - } - if (to_stack) { - emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); - *stack_off += 8; - } - } else if (pt->cls == ABI_CLASS_FP) { - int to_stack = (*next_fp >= 8); - if (!to_stack) { - u32 dst_reg = (*next_fp)++; - switch (av->storage.kind) { - case OPK_REG: { - u32 type = (sz == 8) ? 1u : 0u; - emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage))); - break; - } - case OPK_INDIRECT: { - Operand src; - memset(&src, 0, sizeof src); - src.kind = OPK_INDIRECT; - src.v.ind.base = av->storage.v.ind.base; - src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, /*tmp=*/9); - emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off)); - break; - } - default: - compiler_panic(t->c, a->loc, - "aarch64 call: FP arg storage kind %d unsupported", - (int)av->storage.kind); - } - } else { - /* Store source FP reg directly into the stack slot — going - * through v0/v1 would corrupt args already placed in the - * register save area. */ - switch (av->storage.kind) { - case OPK_REG: - emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31, - (i32)*stack_off)); - break; - case OPK_INDIRECT: { - /* No direct mem-to-mem on aa64: route through a caller-saved - * scratch FP reg (v16) to avoid clobbering v0..v7 already - * loaded with earlier FP args. */ - Operand src; - memset(&src, 0, sizeof src); - src.kind = OPK_INDIRECT; - src.v.ind.base = av->storage.v.ind.base; - src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; - i32 off; - u32 base = addr_base(t, src, &off, /*tmp=*/9); - emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off)); - emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off)); - break; - } - default: - compiler_panic( - t->c, a->loc, - "aarch64 call: FP stack-arg storage kind %d unsupported", - (int)av->storage.kind); - } - *stack_off += 8; - } - } else { - compiler_panic(t->c, a->loc, "aarch64 call: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static void aa_call(CGTarget* t, const CGCallDesc* d) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - /* Pass 1: classify args, set up argument registers/stack. */ - u32 next_int = 0, next_fp = 0, stack_off = 0; - - /* sret: caller passes destination pointer in x8. */ - if (d->abi && d->abi->has_sret) { - if (d->ret.storage.kind != OPK_LOCAL) { - compiler_panic(t->c, a->loc, - "aarch64 call: sret destination must be LOCAL"); - } - AASlot* s = slot_get(a, d->ret.storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad sret slot"); - emit32(mc, aa64_sub_imm(1, 8, 29, s->off, 0)); - } - - for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off); - } - - /* Track outgoing-arg high-water mark, 16-aligned. */ - u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) a->max_outgoing = needed; - - /* Direct (BL <sym>) vs. indirect (BLR Xn). */ - if (d->callee.kind == OPK_GLOBAL) { - u32 bl_pos = mc->pos(mc); - emit32(mc, aa64_bl_base()); - mc->emit_reloc_at(mc, mc->section_id, bl_pos, R_AARCH64_CALL26, - d->callee.v.global.sym, d->callee.v.global.addend, 0, 0); - } else if (d->callee.kind == OPK_REG) { - emit32(mc, aa64_blr(reg_num(d->callee))); - } else { - compiler_panic(t->c, a->loc, "aarch64 call: callee kind %d unsupported", - (int)d->callee.kind); - } - - /* Receive return value. */ - const ABIArgInfo* ri = &d->abi->ret; - if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) { - /* Nothing to copy — sret was placed directly into the dst slot. */ - return; - } - if (ri->nparts == 0) return; - - Operand rs = d->ret.storage; - /* Walk parts; INT parts come from x0, x1, ...; FP parts from v0, v1, .... */ - u32 next_int_ret = 0, next_fp_ret = 0; - for (u16 i = 0; i < ri->nparts; ++i) { - const ABIArgPart* p = &ri->parts[i]; - u32 src_reg; - if (p->cls == ABI_CLASS_INT) { - src_reg = next_int_ret++; - } else if (p->cls == ABI_CLASS_FP) { - src_reg = next_fp_ret++; - } else { - compiler_panic(t->c, a->loc, "aarch64 call: ret part cls %d unimpl", - (int)p->cls); - } - - if (rs.kind == OPK_REG) { - if (ri->nparts != 1) { - compiler_panic(t->c, a->loc, - "aarch64 call: REG ret_storage with %u parts", - (unsigned)ri->nparts); - } - if (p->cls == ABI_CLASS_INT) { - u32 sf = (p->size == 8) ? 1u : 0u; - emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg)); - } else { - u32 type = (p->size == 8) ? 1u : 0u; - emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg)); - } - } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { - u32 base_reg; - i32 base_off; - if (rs.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot"); - base_reg = 29; - base_off = -(i32)s->off; - } else { - base_reg = rs.v.ind.base & 0x1f; - base_off = rs.v.ind.ofs; - } - u32 sidx = size_idx_for_bytes(p->size); - i32 off = base_off + (i32)p->src_offset; - if (p->cls == ABI_CLASS_INT) { - emit32(mc, aa64_stur(sidx, src_reg, base_reg, off)); - } else { - emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off)); - } - } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { - /* Void return placeholder — nothing to do. */ - } else { - compiler_panic(t->c, a->loc, - "aarch64 call: ret_storage kind %d unsupported", - (int)rs.kind); - } - } -} - -/* Materialize the return value, then branch to the function epilogue. */ -static void aa_ret(CGTarget* t, const CGABIValue* val) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - if (val) { - const ABIArgInfo* ri = val->abi; - if (ri && ri->kind == ABI_ARG_INDIRECT) { - /* sret: caller passed the destination pointer in x8 at entry, - * which we spilled into sret_ptr_slot. Reload x8 from there, - * then memcpy the source storage into [x8]. */ - if (val->storage.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad sret slot"); - if (a->sret_ptr_slot != FRAME_SLOT_NONE) { - AASlot* sp = slot_get(a, a->sret_ptr_slot); - if (sp) emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off)); - } - u32 nbytes = s->size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, aa64_ldur(3, 9, 29, -(i32)s->off + (i32)i)); - emit32(mc, aa64_str_uimm(3, 9, 8, i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, aa64_ldur(2, 9, 29, -(i32)s->off + (i32)i)); - emit32(mc, aa64_str_uimm(2, 9, 8, i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, aa64_ldur(1, 9, 29, -(i32)s->off + (i32)i)); - emit32(mc, aa64_str_uimm(1, 9, 8, i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, aa64_ldur(0, 9, 29, -(i32)s->off + (i32)i)); - emit32(mc, aa64_str_uimm(0, 9, 8, i)); - i += 1; - } - } else if (val->storage.kind == OPK_INDIRECT) { - /* sret memcpy from `[base + ind.ofs]` into [x8]. cg populates - * `val->size` with the aggregate byte count. */ - u32 nbytes = val->size; - if (!nbytes) { - compiler_panic(t->c, a->loc, - "aarch64 ret indirect: missing aggregate size"); - } - if (a->sret_ptr_slot != FRAME_SLOT_NONE) { - AASlot* sp = slot_get(a, a->sret_ptr_slot); - if (sp) emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off)); - } - u32 base_reg = val->storage.v.ind.base & 0x1f; - i32 base_off = val->storage.v.ind.ofs; - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i)); - emit32(mc, aa64_str_uimm(3, 9, 8, i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i)); - emit32(mc, aa64_str_uimm(2, 9, 8, i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i)); - emit32(mc, aa64_str_uimm(1, 9, 8, i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i)); - emit32(mc, aa64_str_uimm(0, 9, 8, i)); - i += 1; - } - } else { - compiler_panic(t->c, a->loc, - "aarch64 ret indirect: storage kind %d unsupported", - (int)val->storage.kind); - } - } else if (val->storage.kind == OPK_REG) { - if (val->storage.cls == RC_FP) { - u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u; - emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, reg_num(val->storage))); - } else { - u32 sf = type_is_64(val->storage.type) ? 1u : 0u; - emit32(mc, aa64_mov_reg(sf, /*Rd=*/0, reg_num(val->storage))); - } - } else if (val->storage.kind == OPK_IMM) { - u32 sf = type_is_64(val->storage.type) ? 1u : 0u; - emit_load_imm(mc, sf, /*Rd=*/0, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL || - val->storage.kind == OPK_INDIRECT) { - /* DIRECT return whose source is a local or an indirect lvalue: - * load each part into x0/x1 (or v0/v1) per the ABI classification. - * cg hands out INDIRECT base regs from x19..x28, disjoint from the - * x0/x1 (v0/v1) return regs, so the base survives the part loop. */ - u32 base_reg; - i32 base_off; - if (val->storage.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot"); - base_reg = 29; /* fp */ - base_off = -(i32)s->off; - } else { - base_reg = val->storage.v.ind.base & 0x1f; - base_off = val->storage.v.ind.ofs; - } - const ABIArgInfo* ri = val->abi; - for (u16 i = 0; i < (ri ? ri->nparts : 0); ++i) { - const ABIArgPart* pt = &ri->parts[i]; - u32 sidx = size_idx_for_bytes(pt->size); - i32 off = base_off + (i32)pt->src_offset; - if (pt->cls == ABI_CLASS_INT) { - emit32(mc, aa64_ldur(sidx, /*Rt=*/i, base_reg, off)); - } else if (pt->cls == ABI_CLASS_FP) { - emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, base_reg, off)); - } else { - compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl", - (int)pt->cls); - } - } - } - } - /* Branch to the epilogue. mc->emit_label_ref records a fixup that - * resolves to a JUMP26-encoded displacement when the label is placed. */ - u32 bpos = mc->pos(mc); - emit32(mc, aa64_b_base()); - mc->emit_label_ref(mc, a->epilogue_label, R_AARCH64_JUMP26, 4, 0); - (void)bpos; -} - -/* Dynamic stack allocation. Layout: outgoing-args (max_outgoing bytes, - * 16-aligned) sit at the bottom of SP; the alloca block goes immediately - * above. After lowering SP by an aligned size, the new block's address is - * (SP + max_outgoing). max_outgoing is only known at func_end, so each - * alloca emits a placeholder `ADD dst, SP, #0` and registers a patch site; - * func_end rewrites the imm12 with the final max_outgoing. */ -static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - if (d.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "aarch64 alloca: dst must be REG"); - } - /* SP is 16-aligned and we lower it by an aligned amount, so result - * inherits 16-byte alignment. Larger requests would need an - * additional mask on the result; reject so we notice. */ - if (align > 16) { - compiler_panic(t->c, a->loc, - "aarch64 alloca: align %u > 16 not yet supported", align); - } - - if (sz.kind == OPK_IMM) { - i64 v = sz.v.imm; - if (v < 0) { - compiler_panic(t->c, a->loc, "aarch64 alloca: negative size"); - } - u64 aligned = ((u64)v + 15u) & ~(u64)15u; - if (aligned == 0) aligned = 16; /* keep SP changing */ - if (aligned > 0xfffu) { - compiler_panic(t->c, a->loc, - "aarch64 alloca: const size %llu too large for v1", - (unsigned long long)aligned); - } - emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=SP*/ 31, (u32)aligned, 0)); - } else if (sz.kind == OPK_REG) { - /* Round size up to a 16-byte multiple, then `sub sp, sp, x9` - * (extended-register form so Rd/Rn=SP work). */ - u32 sz_reg = reg_num(sz); - emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0)); /* x9 = size+15 */ - emit32(mc, aa64_ubfm(1, 9, 9, 4, 63)); /* lsr x9, x9, #4 */ - emit32(mc, aa64_ubfm(1, 9, 9, 60, 59)); /* lsl x9, x9, #4 */ - emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, 9)); - } else { - compiler_panic(t->c, a->loc, "aarch64 alloca: size kind %d unsupported", - (int)sz.kind); - } - - /* Placeholder ADD dst, SP, #<max_outgoing>. Patched at func_end. */ - if (a->nadd_patches == a->add_patches_cap) { - u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4; - struct AAAllocaPatch* nb = - arena_array(t->c->tu, struct AAAllocaPatch, ncap); - if (a->add_patches) - memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches); - a->add_patches = nb; - a->add_patches_cap = ncap; - } - u32 dst_reg = reg_num(d); - a->add_patches[a->nadd_patches].pos = mc->pos(mc); - a->add_patches[a->nadd_patches].dst_reg = dst_reg; - a->nadd_patches++; - emit32(mc, aa64_add_imm(1, dst_reg, /*Rn=SP*/ 31, 0, 0)); - a->has_alloca = 1; -} -/* AAPCS64 va_list (32 bytes): - * off 0 void* __stack next stack-passed var arg - * off 8 void* __gr_top one past end of GP save area - * off 16 void* __vr_top one past end of FP save area - * off 24 int __gr_offs current GP offset (negative; >= 0 → use stack) - * off 28 int __vr_offs current FP offset (negative; >= 0 → use stack) - * - * va_start populates the struct from the function's reg-save areas and - * the named-param consumption already tracked on AAImpl. va_arg dispatches - * by RegClass: int args walk the GP save area at 8-byte stride; FP args - * walk the FP save area at 16-byte stride (q-register-sized slots). When - * the offset reaches 0, fall through to the stack at 8-byte stride. */ -static void emit_fp_off(MCEmitter* mc, u32 dst, i32 ofs) { - if (ofs == 0) - emit32(mc, aa64_mov_reg(1, dst, 29)); - else if (ofs > 0 && (u32)ofs <= 0xfff) - emit32(mc, aa64_add_imm(1, dst, 29, (u32)ofs, 0)); - else if (ofs < 0 && (u32)(-ofs) <= 0xfff) - emit32(mc, aa64_sub_imm(1, dst, 29, (u32)(-ofs), 0)); - else { - emit_load_imm(mc, 1, dst, ofs); - emit32(mc, aa64_add(1, dst, 29, dst)); - } -} - -static void aa_va_start_(CGTarget* t, Operand ap_op) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - if (!a->is_variadic) { - compiler_panic(t->c, a->loc, "aarch64 va_start: function not variadic"); - } - u32 ap = reg_num(ap_op); - AASlot* gs = slot_get(a, a->gp_save_slot); - AASlot* fs = slot_get(a, a->fp_save_slot); - - /* __stack = fp + 16 + named-stack-args-bytes */ - { - u32 ofs = 16u + a->next_param_stack; - if (ofs <= 0xfff) - emit32(mc, aa64_add_imm(1, 9, 29, ofs, 0)); - else { - emit_load_imm(mc, 1, 9, (i64)ofs); - emit32(mc, aa64_add(1, 9, 29, 9)); - } - emit32(mc, aa64_str_uimm(3, 9, ap, 0)); - } - /* __gr_top = fp - gs->off + gs->size */ - emit_fp_off(mc, 9, -(i32)gs->off + (i32)gs->size); - emit32(mc, aa64_str_uimm(3, 9, ap, 8)); - /* __vr_top = fp - fs->off + fs->size */ - emit_fp_off(mc, 9, -(i32)fs->off + (i32)fs->size); - emit32(mc, aa64_str_uimm(3, 9, ap, 16)); - /* __gr_offs = named_int*8 - 64 */ - emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_int * 8u) - 64)); - emit32(mc, aa64_str_uimm(2, 9, ap, 24)); - /* __vr_offs = named_fp*16 - 128 */ - emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_fp * 16u) - 128)); - emit32(mc, aa64_str_uimm(2, 9, ap, 28)); -} - -static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op, - const Type* ty) { - MCEmitter* mc = t->mc; - u32 ap = reg_num(ap_op); - int is_fp = (dst.cls == RC_FP); - u32 offs_field = is_fp ? 28u : 24u; - u32 top_field = is_fp ? 16u : 8u; - u32 stride_reg = is_fp ? 16u : 8u; - u32 sz = type_byte_size(ty); - u32 sidx = size_idx_for_bytes(sz); - - MCLabel L_stack = mc->label_new(mc); - MCLabel L_done = mc->label_new(mc); - - /* w9 = ap.offs ; cmp; b.ge L_stack (>=0 means save area exhausted) */ - emit32(mc, aa64_ldur(2, 9, ap, (i32)offs_field)); - emit32(mc, aa64_subs_imm(0, 31, 9, 0)); - emit32(mc, aa64_b_cond(0xa /*GE*/)); - mc->emit_label_ref(mc, L_stack, R_AARCH64_CONDBR19, 4, 0); - - /* save-area path: - * x10 = ap.top - * x12 = sxtw(w9) - * x11 = x10 + x12 - * load dst, [x11] - * w9 += stride_reg ; ap.offs = w9 ; b L_done */ - emit32(mc, aa64_ldur(3, 10, ap, (i32)top_field)); - emit32(mc, aa64_sbfm(1, 12, 9, 0, 31)); - emit32(mc, aa64_add(1, 11, 10, 12)); - if (is_fp) - emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 11, 0)); - else - emit32(mc, aa64_ldur(sidx, reg_num(dst), 11, 0)); - emit32(mc, aa64_add_imm(0, 9, 9, stride_reg, 0)); - emit32(mc, aa64_stur(2, 9, ap, (i32)offs_field)); - emit32(mc, aa64_b_base()); - mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0); - - /* L_stack: x10 = ap.stack ; load dst,[x10] ; x10+=8 ; ap.stack=x10 */ - mc->label_place(mc, L_stack); - emit32(mc, aa64_ldur(3, 10, ap, 0)); - if (is_fp) - emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 10, 0)); - else - emit32(mc, aa64_ldur(sidx, reg_num(dst), 10, 0)); - emit32(mc, aa64_add_imm(1, 10, 10, 8u, 0)); - emit32(mc, aa64_stur(3, 10, ap, 0)); - - mc->label_place(mc, L_done); -} - -static void aa_va_end_(CGTarget* t, Operand a) { - (void)t; - (void)a; -} - -static void aa_va_copy_(CGTarget* t, Operand d, Operand s) { - MCEmitter* mc = t->mc; - u32 dr = reg_num(d); - u32 sr = reg_num(s); - /* va_list is 32 bytes — 4 x 8-byte LDR/STR pairs. */ - for (u32 i = 0; i < 32u; i += 8u) { - emit32(mc, aa64_ldur(3, 9, sr, (i32)i)); - emit32(mc, aa64_stur(3, 9, dr, (i32)i)); - } -} - -/* ---- atomics ---- - * - * Lowering uses ARMv8.0 LL/SC (LDXR/STXR family) — no FEAT_LSE assumption. - * Acquire/Release semantics ride the load/store form chosen by MemOrder - * (LDAR/STLR for plain accesses; LDAXR/STLXR inside the LL/SC loop). - * fence() emits DMB ISH (data memory barrier, inner shareable). */ - -/* Encoder helpers — inline since only used here. */ -static inline u32 aa64_ldar(u32 sf64, u32 Rt, u32 Rn) { - return (sf64 ? 0xC8DFFC00u : 0x88DFFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_stlr(u32 sf64, u32 Rt, u32 Rn) { - return (sf64 ? 0xC89FFC00u : 0x889FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldxr(u32 sf64, u32 Rt, u32 Rn) { - return (sf64 ? 0xC85F7C00u : 0x885F7C00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_ldaxr(u32 sf64, u32 Rt, u32 Rn) { - return (sf64 ? 0xC85FFC00u : 0x885FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_stxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) { - return (sf64 ? 0xC8007C00u : 0x88007C00u) | ((Rs & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -static inline u32 aa64_stlxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) { - return (sf64 ? 0xC800FC00u : 0x8800FC00u) | ((Rs & 0x1f) << 16) | - ((Rn & 0x1f) << 5) | (Rt & 0x1f); -} -/* CBNZ Rt, imm19 */ -static inline u32 aa64_cbnz(u32 sf64, u32 Rt) { - return 0x35000000u | (sf64 << 31) | (Rt & 0x1f); -} - -static int mem_order_is_acquire(MemOrder o) { - return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || - o == MO_CONSUME; -} -static int mem_order_is_release(MemOrder o) { - return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST; -} - -static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, - MemOrder ord) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - - /* Resolve addr to a base register; LDAR/LDR-exclusive both want a - * pointer in a GPR, no offset form. */ - u32 base; - if (addr.kind == OPK_REG) { - base = reg_num(addr); - } else if (addr.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot"); - base = 9u; - emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); - } else { - compiler_panic(t->c, a->loc, - "aarch64 atomic_load: addr kind %d unsupported", - (int)addr.kind); - } - if (mem_order_is_acquire(ord)) { - emit32(mc, aa64_ldar(sf, reg_num(dst), base)); - } else { - u32 sidx = size_idx_for_bytes(ma.size); - emit32(mc, aa64_ldur(sidx, reg_num(dst), base, 0)); - } -} - -static void aa_atomic_store(CGTarget* t, Operand addr, Operand src, - MemAccess ma, MemOrder ord) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - - /* Materialize src into a register if needed. */ - u32 src_reg; - if (src.kind == OPK_IMM) { - src_reg = 10u; - emit_load_imm(mc, sf, src_reg, src.v.imm); - } else if (src.kind == OPK_REG) { - src_reg = reg_num(src); - } else { - compiler_panic(t->c, a->loc, - "aarch64 atomic_store: src kind %d unsupported", - (int)src.kind); - } - /* Base reg. */ - u32 base; - if (addr.kind == OPK_REG) { - base = reg_num(addr); - } else if (addr.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot"); - base = 9u; - emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); - } else { - compiler_panic(t->c, a->loc, - "aarch64 atomic_store: addr kind %d unsupported", - (int)addr.kind); - } - if (mem_order_is_release(ord)) { - emit32(mc, aa64_stlr(sf, src_reg, base)); - } else { - u32 sidx = size_idx_for_bytes(ma.size); - emit32(mc, aa64_stur(sidx, src_reg, base, 0)); - } -} - -/* Apply rmw op: new = f(prior, val). prior, val, dst are W/X based on sf. - * Uses scratch x12 if a temporary is needed (e.g. NAND). */ -static void emit_rmw_combine(MCEmitter* mc, AtomicOp op, u32 sf, u32 dst_new, - u32 prior, u32 val) { - switch (op) { - case AO_XCHG: - emit32(mc, aa64_mov_reg(sf, dst_new, val)); - break; - case AO_ADD: - emit32(mc, aa64_add(sf, dst_new, prior, val)); - break; - case AO_SUB: - emit32(mc, aa64_sub(sf, dst_new, prior, val)); - break; - case AO_AND: - emit32(mc, aa64_and(sf, dst_new, prior, val)); - break; - case AO_OR: - emit32(mc, aa64_orr(sf, dst_new, prior, val)); - break; - case AO_XOR: - emit32(mc, aa64_eor(sf, dst_new, prior, val)); - break; - case AO_NAND: - /* NAND: new = ~(prior & val). AArch64 has no NAND; use AND then MVN. */ - emit32(mc, aa64_and(sf, dst_new, prior, val)); - emit32(mc, aa64_mvn(sf, dst_new, dst_new)); - break; - default: - emit32(mc, aa64_mov_reg(sf, dst_new, val)); - break; - } -} - -static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, - Operand val, MemAccess ma, MemOrder ord) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - - /* Pin operands into scratch regs: - * x9 = base (atomic addr) - * x10 = val - * x11 = new (computed) - * w12 = stxr status flag - * dst (prior) is the user-provided destination reg. */ - u32 base = 9u; - if (addr.kind == OPK_REG) { - emit32(mc, aa64_mov_reg(1, 9, reg_num(addr))); - } else if (addr.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot"); - emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0)); - } else { - compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported", - (int)addr.kind); - } - u32 vreg = 10u; - if (val.kind == OPK_IMM) { - emit_load_imm(mc, sf, vreg, val.v.imm); - } else if (val.kind == OPK_REG) { - emit32(mc, aa64_mov_reg(sf, vreg, reg_num(val))); - } else { - compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: val kind %d unsupported", - (int)val.kind); - } - - int do_acq = mem_order_is_acquire(ord); - int do_rel = mem_order_is_release(ord); - - MCLabel L_retry = mc->label_new(mc); - mc->label_place(mc, L_retry); - - /* prior <- ldxr/ldaxr [base] */ - if (do_acq) - emit32(mc, aa64_ldaxr(sf, reg_num(dst), base)); - else - emit32(mc, aa64_ldxr(sf, reg_num(dst), base)); - - /* new = combine(prior, val) into x11 */ - emit_rmw_combine(mc, op, sf, /*new=*/11u, /*prior=*/reg_num(dst), vreg); - - /* status <- stxr/stlxr [base], new ; cbnz status, retry */ - if (do_rel) - emit32(mc, aa64_stlxr(sf, /*Rs=*/12u, /*Rt=*/11u, base)); - else - emit32(mc, aa64_stxr(sf, /*Rs=*/12u, /*Rt=*/11u, base)); - - u32 cbnz_pos = mc->pos(mc); - emit32(mc, aa64_cbnz(0, /*Rt=*/12u)); - mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0); - (void)cbnz_pos; -} - -static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, - Operand expected, Operand desired, MemAccess ma, - MemOrder succ, MemOrder fail) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - (void)fail; - - /* Pin operands: - * x9 = base - * x10 = expected (compare against prior) - * x11 = desired (store on match) - * w12 = stxr status flag */ - u32 base = 9u; - if (addr.kind == OPK_REG) - emit32(mc, aa64_mov_reg(1, 9, reg_num(addr))); - else if (addr.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot"); - emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0)); - } else { - compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported", - (int)addr.kind); - } - if (expected.kind == OPK_IMM) - emit_load_imm(mc, sf, 10, expected.v.imm); - else if (expected.kind == OPK_REG) - emit32(mc, aa64_mov_reg(sf, 10, reg_num(expected))); - else - compiler_panic(t->c, a->loc, "aarch64 atomic_cas: exp kind %d unsupported", - (int)expected.kind); - if (desired.kind == OPK_IMM) - emit_load_imm(mc, sf, 11, desired.v.imm); - else if (desired.kind == OPK_REG) - emit32(mc, aa64_mov_reg(sf, 11, reg_num(desired))); - else - compiler_panic(t->c, a->loc, "aarch64 atomic_cas: des kind %d unsupported", - (int)desired.kind); - - int do_acq = mem_order_is_acquire(succ); - int do_rel = mem_order_is_release(succ); - - MCLabel L_retry = mc->label_new(mc); - MCLabel L_fail = mc->label_new(mc); - MCLabel L_done = mc->label_new(mc); - - mc->label_place(mc, L_retry); - if (do_acq) - emit32(mc, aa64_ldaxr(sf, reg_num(prior), base)); - else - emit32(mc, aa64_ldxr(sf, reg_num(prior), base)); - - /* if (prior != expected) -> fail (clrex + ok=0) */ - emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), 10u)); - emit32(mc, aa64_b_cond(0x1u /*NE*/)); - mc->emit_label_ref(mc, L_fail, R_AARCH64_CONDBR19, 4, 0); - - /* try store; retry on stxr failure */ - if (do_rel) - emit32(mc, aa64_stlxr(sf, 12u, 11u, base)); - else - emit32(mc, aa64_stxr(sf, 12u, 11u, base)); - emit32(mc, aa64_cbnz(0, 12u)); - mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0); - - /* ok = 1 ; jump done */ - emit_load_imm(mc, 0, reg_num(ok), 1); - emit32(mc, aa64_b_base()); - mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0); - - /* L_fail: clear monitor; ok = 0 */ - mc->label_place(mc, L_fail); - emit32(mc, aa64_clrex(AA64_BARRIER_OPT_SY)); - emit_load_imm(mc, 0, reg_num(ok), 0); - - mc->label_place(mc, L_done); -} - -static void aa_fence(CGTarget* t, MemOrder o) { - (void)o; - /* Conservative: full-system DMB ISH for any release/acquire/seq_cst. - * RELAXED fence is a no-op. */ - if (o == MO_RELAXED) return; - emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); -} - -/* ---- intrinsics ---- */ - -/* Data-processing (1 source) — REV16 / REV / REV32 / RBIT / CLZ. - * Family base 0x5AC00000 (sf=0); set sf<<31 for 64-bit forms. */ -static inline u32 aa64_rev16_w(u32 Rd, u32 Rn) { - return 0x5AC00400u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_rev_w(u32 Rd, u32 Rn) { - return 0x5AC00800u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_rev_x(u32 Rd, u32 Rn) { - return 0xDAC00C00u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_rbit(u32 sf64, u32 Rd, u32 Rn) { - return (sf64 ? 0xDAC00000u : 0x5AC00000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} -static inline u32 aa64_clz(u32 sf64, u32 Rd, u32 Rn) { - return (sf64 ? 0xDAC01000u : 0x5AC01000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); -} - -/* SIMD CNT (Vd.<T>, Vn.<T>) and ADDV (Bd, Vn.8B). 8B form, Q=0. */ -static inline u32 aa64_cnt_8b(u32 Vd, u32 Vn) { - return 0x0E205800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f); -} -static inline u32 aa64_addv_b_8b(u32 Vd, u32 Vn) { - return 0x0E31B800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f); -} - -/* ADDS / SUBS shifted register (S=1; sets NZCV including V for signed ovf). */ -static inline u32 aa64_adds_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) { - return 0x2B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} - -/* SMADDL / UMADDL → SMULL / UMULL with Ra = ZR. 64-bit dst, 32-bit srcs. */ -static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) { - return aa64_dp3_pack((AA64DP3){ - .sf = 1, .op31 = 1, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd}); -} -static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) { - return aa64_smaddl(Rd, Rn, Rm, AA64_ZR); -} - -/* SUBS Xd, Xn, Wm, SXTW — extended-register form, used for the - * mul_overflow check (compare full 64-bit product to sign-extended low 32). */ -static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) { - return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) | - (Rd & 0x1f); -} - -/* BRK #imm16 (TRAP/UNREACHABLE landing pads) lives in arch/aa64_isa.h - * alongside the rest of the exception-generation family. */ - -static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, - const Operand* args, u32 na) { - AAImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - (void)nd; - - switch (kind) { - case INTRIN_POPCOUNT: { - /* fmov v0, src ; cnt v0.8b, v0.8b ; addv b0, v0.8b ; fmov w_dst, s0 */ - Operand src = args[0]; - Operand dst = dsts[0]; - u32 sz_in = type_byte_size(src.type); - if (sz_in == 8) - emit32(mc, aa64_fmov_d_x(0, reg_num(src))); - else - emit32(mc, aa64_fmov_s_w(0, reg_num(src))); - emit32(mc, aa64_cnt_8b(0, 0)); - emit32(mc, aa64_addv_b_8b(0, 0)); - emit32(mc, aa64_fmov_w_s(reg_num(dst), 0)); - return; - } - case INTRIN_CLZ: { - Operand src = args[0]; - Operand dst = dsts[0]; - u32 sf = type_is_64(src.type) ? 1u : 0u; - emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(src))); - return; - } - case INTRIN_CTZ: { - /* ctz(x) = clz(rbit(x)) */ - Operand src = args[0]; - Operand dst = dsts[0]; - u32 sf = type_is_64(src.type) ? 1u : 0u; - emit32(mc, aa64_rbit(sf, reg_num(dst), reg_num(src))); - emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(dst))); - return; - } - case INTRIN_BSWAP16: { - emit32(mc, aa64_rev16_w(reg_num(dsts[0]), reg_num(args[0]))); - return; - } - case INTRIN_BSWAP32: { - emit32(mc, aa64_rev_w(reg_num(dsts[0]), reg_num(args[0]))); - return; - } - case INTRIN_BSWAP64: { - emit32(mc, aa64_rev_x(reg_num(dsts[0]), reg_num(args[0]))); - return; - } - case INTRIN_MEMCPY: - case INTRIN_MEMMOVE: { - /* args = (dst_addr, src_addr, n_bytes). v1 only handles a constant - * n: unroll forward (memcpy) or backward (memmove). */ - Operand da = args[0], sa = args[1], nb = args[2]; - if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic(t->c, a->loc, - "aarch64 intrinsic: %s with non-const n or non-REG ptr", - kind == INTRIN_MEMCPY ? "memcpy" : "memmove"); - } - u32 dr = reg_num(da); - u32 sr = reg_num(sa); - u32 n = (u32)nb.v.imm; - if (kind == INTRIN_MEMCPY) { - u32 i = 0; - while (i + 8 <= n) { - emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); - emit32(mc, aa64_stur(3, 12, dr, (i32)i)); - i += 8; - } - while (i + 4 <= n) { - emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); - emit32(mc, aa64_stur(2, 12, dr, (i32)i)); - i += 4; - } - while (i + 2 <= n) { - emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); - emit32(mc, aa64_stur(1, 12, dr, (i32)i)); - i += 2; - } - while (i < n) { - emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); - emit32(mc, aa64_stur(0, 12, dr, (i32)i)); - i += 1; - } - } else { - /* memmove: copy backward to handle dst > src overlap. */ - u32 i = n; - while (i >= 8) { - i -= 8; - emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); - emit32(mc, aa64_stur(3, 12, dr, (i32)i)); - } - while (i >= 4) { - i -= 4; - emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); - emit32(mc, aa64_stur(2, 12, dr, (i32)i)); - } - while (i >= 2) { - i -= 2; - emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); - emit32(mc, aa64_stur(1, 12, dr, (i32)i)); - } - while (i >= 1) { - i -= 1; - emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); - emit32(mc, aa64_stur(0, 12, dr, (i32)i)); - } - } - return; - } - case INTRIN_MEMSET: { - /* args = (dst_addr, byte, n) */ - Operand da = args[0], bv = args[1], nb = args[2]; - if (da.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic( - t->c, a->loc, - "aarch64 intrinsic: memset with non-const n / non-REG ptr"); - } - u32 dr = reg_num(da); - u32 n = (u32)nb.v.imm; - u32 byte; - u32 src_reg; - if (bv.kind == OPK_IMM) { - byte = (u32)(bv.v.imm & 0xffu); - if (byte == 0) { - src_reg = 31u; /* XZR / WZR */ - } else { - u64 b64 = byte; - b64 |= b64 << 8; - b64 |= b64 << 16; - b64 |= b64 << 32; - emit_load_imm(mc, 1, 12, (i64)b64); - src_reg = 12u; - } - } else if (bv.kind == OPK_REG) { - /* Broadcast: dup low byte across all 8 bytes via ORR-immediate - * trickery is awkward; use mul-by-0x0101010101010101. */ - emit_load_imm(mc, 1, 12, (i64)0x0101010101010101ll); - emit32(mc, aa64_madd(1, 12, reg_num(bv), 12, AA64_ZR)); - src_reg = 12u; - } else { - compiler_panic(t->c, a->loc, - "aarch64 intrinsic: memset byte kind %d unsupported", - (int)bv.kind); - } - u32 i = 0; - while (i + 8 <= n) { - emit32(mc, aa64_stur(3, src_reg, dr, (i32)i)); - i += 8; - } - while (i + 4 <= n) { - emit32(mc, aa64_stur(2, src_reg, dr, (i32)i)); - i += 4; - } - while (i + 2 <= n) { - emit32(mc, aa64_stur(1, src_reg, dr, (i32)i)); - i += 2; - } - while (i < n) { - emit32(mc, aa64_stur(0, src_reg, dr, (i32)i)); - i += 1; - } - return; - } - case INTRIN_PREFETCH: - /* No-op hint. */ - (void)args; - (void)na; - return; - case INTRIN_ASSUME_ALIGNED: { - /* dst = src (alignment is a hint only). */ - Operand src = args[0]; - Operand dst = dsts[0]; - if (reg_num(src) != reg_num(dst)) { - emit32(mc, aa64_mov_reg(1, reg_num(dst), reg_num(src))); - } - return; - } - case INTRIN_EXPECT: { - /* dst = val (the "expected" hint is dropped). */ - Operand val = args[0]; - Operand dst = dsts[0]; - u32 sf = type_is_64(dst.type) ? 1u : 0u; - if (val.kind == OPK_REG) { - if (reg_num(val) != reg_num(dst)) { - emit32(mc, aa64_mov_reg(sf, reg_num(dst), reg_num(val))); - } - } else if (val.kind == OPK_IMM) { - emit_load_imm(mc, sf, reg_num(dst), val.v.imm); - } else { - compiler_panic(t->c, a->loc, - "aarch64 intrinsic: expect val kind %d unsupported", - (int)val.kind); - } - return; - } - case INTRIN_UNREACHABLE: - case INTRIN_TRAP: - emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u)); - return; - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { - /* dsts: [val, ovf]. ADDS/SUBS sets V on signed overflow; CSET VS. */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - u32 sf = type_is_64(dval.type) ? 1u : 0u; - u32 ra = force_reg_int(t, a_op, sf, 9); - u32 rb = force_reg_int(t, b_op, sf, (ra == 9) ? 10u : 9u); - u32 word = (kind == INTRIN_ADD_OVERFLOW) - ? aa64_adds_reg(sf, reg_num(dval), ra, rb) - : aa64_subs_reg(sf, reg_num(dval), ra, rb); - emit32(mc, word); - emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/)); - return; - } - case INTRIN_MUL_OVERFLOW: { - /* SMULL Xtmp, Wn, Wm gives full 64-bit signed product. - * ovf = (Xtmp != sxtw(Wtmp)) — i.e. upper 32 bits ≠ sign-ext of low. - * dval gets the truncated low 32 bits. */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - u32 sf = type_is_64(dval.type) ? 1u : 0u; - if (sf) { - compiler_panic( - t->c, a->loc, - "aarch64 intrinsic: mul_overflow on i64 not yet supported"); - } - u32 ra = force_reg_int(t, a_op, 0, 9); - u32 rb = force_reg_int(t, b_op, 0, (ra == 9) ? 10u : 9u); - emit32(mc, aa64_smull(/*X*/ 11u, ra, rb)); - emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, /*Xn=*/11u, /*Wm=*/11u)); - emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/)); - emit32(mc, aa64_mov_reg(0, reg_num(dval), 11u)); - return; - } - default: - compiler_panic(t->c, a->loc, "aarch64 intrinsic: kind %d unsupported", - (int)kind); - } -} - -static void aa_asm_block(CGTarget* t, const char* tmpl, - const AsmConstraint* outs, u32 no, Operand* oo, - const AsmConstraint* ins, u32 ni, const Operand* io, - const Sym* clobs, u32 nc) { - /* Per doc/INLINEASM.md §6: open a per-block AA64Asm, bind operands + - * clobbers, walk the template substituting placeholders into asm - * source text and dispatching each line through the standalone - * aa64_asm_insn parser. - * - * cg_inline_asm has already spilled any live SValues bound to - * physical regs named in `clobs` (via target->resolve_reg_name). Here - * we additionally bump the callee-save high-water marks so the - * prologue saves/restores any callee-saved reg the asm body trashes - * even when no SValue ever used it. */ - AAImpl* a_impl = impl_of(t); - for (u32 i = 0; i < nc; ++i) { - Reg phys; - RegClass cls; - if (aa_resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue; - if (cls == RC_INT) { - u32 idx = (u32)phys; - RegPool* p = &a_impl->int_pool; - if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { - u32 off = idx - p->base + 1u; - if (off > p->hwm) p->hwm = off; - } - } else if (cls == RC_FP) { - u32 idx = (u32)phys; - RegPool* p = &a_impl->fp_pool; - if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { - u32 off = idx - p->base + 1u; - if (off > p->hwm) p->hwm = off; - } - } - } - AA64Asm* a = aa64_asm_open(t->c); - aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc); - aa64_asm_run_template(a, t->mc, tmpl); - aa64_asm_close(a); -} - -static void aa_set_loc(CGTarget* t, SrcLoc loc) { - impl_of(t)->loc = loc; - t->mc->set_loc(t->mc, loc); -} - -static void aa_finalize(CGTarget* t) { (void)t; } - -static void aa_destroy(CGTarget* t) { (void)t; /* arena-backed */ } - -/* ---- construction ---- */ - -static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } - -CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { - AAImpl* a = arena_new(c->tu, AAImpl); - memset(a, 0, sizeof *a); - - CGTarget* t = &a->base; - t->c = c; - t->obj = o; - t->mc = m; - - t->func_begin = aa_func_begin; - t->func_end = aa_func_end; - - t->alloc_reg = aa_alloc_reg; - t->free_reg = aa_free_reg; - t->frame_slot = aa_frame_slot; - t->param = aa_param; - t->clobbers = aa_clobbers; - t->spill_reg = aa_spill_reg; - t->reload_reg = aa_reload_reg; - - t->label_new = aa_label_new; - t->label_place = aa_label_place; - t->jump = aa_jump; - t->cmp_branch = aa_cmp_branch; - - t->scope_begin = aa_scope_begin; - t->scope_else = aa_scope_else; - t->scope_end = aa_scope_end; - t->break_to = aa_break_to; - t->continue_to = aa_continue_to; - - t->load_imm = aa_load_imm; - t->load_const = aa_load_const; - t->copy = aa_copy; - t->load = aa_load; - t->store = aa_store; - t->addr_of = aa_addr_of; - t->tls_addr_of = aa_tls_addr_of; - t->copy_bytes = aa_copy_bytes; - t->set_bytes = aa_set_bytes; - t->bitfield_load = aa_bitfield_load; - t->bitfield_store = aa_bitfield_store; - - t->binop = aa_binop; - t->unop = aa_unop; - t->cmp = aa_cmp; - t->convert = aa_convert; - - t->call = aa_call; - t->ret = aa_ret; - - t->alloca_ = aa_alloca_; - t->va_start_ = aa_va_start_; - t->va_arg_ = aa_va_arg_; - t->va_end_ = aa_va_end_; - t->va_copy_ = aa_va_copy_; - - t->setjmp_ = NULL; - t->longjmp_ = NULL; - - t->atomic_load = aa_atomic_load; - t->atomic_store = aa_atomic_store; - t->atomic_rmw = aa_atomic_rmw; - t->atomic_cas = aa_atomic_cas; - t->fence = aa_fence; - - t->intrinsic = aa_intrinsic; - t->asm_block = aa_asm_block; - t->resolve_reg_name = aa_resolve_reg_name; - - t->set_loc = aa_set_loc; - t->finalize = aa_finalize; - t->destroy = aa_destroy; - - /* Avoid signed/unsigned warning. */ - (void)type_is_signed; - - compiler_defer(c, cgt_cleanup, t); - return t; -} diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c @@ -0,0 +1,318 @@ +/* aarch64/alloc.c — register pool, spill/reload, labels, control flow, + * structured scopes. */ + +#include "arch/aarch64/internal.h" + +/* ============================================================ + * AAImpl accessor + * ============================================================ */ + +AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; } + +/* ============================================================ + * RegPool + * ============================================================ */ + +void regpool_init(RegPool* p, u8 base, u8 nregs) { + p->base = base; + p->nregs = nregs; + p->hwm = 0; + p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); +} + +Reg regpool_alloc(RegPool* p) { + if (p->free == 0) return (Reg)REG_NONE; + u32 idx = (u32)__builtin_ctz(p->free); + p->free &= ~(1u << idx); + if (idx + 1u > p->hwm) p->hwm = idx + 1u; + return (Reg)(p->base + idx); +} + +int regpool_free(RegPool* p, Reg r) { + u32 rn = (u32)r; + if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0; + u32 idx = rn - p->base; + u32 bit = 1u << idx; + if (p->free & bit) return -1; + p->free |= bit; + return 1; +} + +/* ============================================================ + * Slot accessor + * ============================================================ */ + +AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs) { + if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; + return &a->slots[fs - 1]; +} + +/* ============================================================ + * Register allocation / free + * ============================================================ */ + +static Reg aa_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { + AAImpl* a = impl_of(t); + (void)ty; + if (cls == RC_INT) return regpool_alloc(&a->int_pool); + if (cls == RC_FP) return regpool_alloc(&a->fp_pool); + compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls); +} + +void aa_free_reg(CGTarget* t, Reg r, RegClass cls) { + AAImpl* a = impl_of(t); + RegPool* p; + switch (cls) { + case RC_INT: p = &a->int_pool; break; + case RC_FP: p = &a->fp_pool; break; + default: + compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl", + (int)cls); + } + int rc = regpool_free(p, r); + if (rc == 1) return; + if (rc == -1) { + compiler_panic(t->c, a->loc, + "aarch64 free_reg: reg %u already free in %s pool", + (unsigned)r, cls == RC_FP ? "fp" : "int"); + } + compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool", + (unsigned)r, cls == RC_FP ? "fp" : "int"); +} + +static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) { + (void)c; + (void)n; + compiler_panic(t->c, impl_of(t)->loc, "aarch64: clobbers not implemented"); +} + +static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out, + RegClass* cls_out) { + (void)t; + size_t len = 0; + const char* s = pool_str(t->c->global, name, &len); + if (!s || !len) return 1; + char buf[8]; + if (len >= sizeof buf) return 1; + memcpy(buf, s, len); + buf[len] = '\0'; + u32 dwarf; + if (aa64_register_index(buf, &dwarf) != 0) return 1; + if (dwarf <= 30u) { + if (out) *out = (Reg)dwarf; + if (cls_out) *cls_out = RC_INT; + return 0; + } + if (dwarf >= 64u && dwarf <= 95u) { + if (out) *out = (Reg)(dwarf - 64u); + if (cls_out) *cls_out = RC_FP; + return 0; + } + return 1; +} + +static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot, + MemAccess ma) { + AAImpl* a = impl_of(t); + if (src.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "aarch64 spill_reg: src is not OPK_REG"); + } + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + aa_store(t, addr, src, ma); + aa_free_reg(t, src.v.reg, src.cls); +} + +static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, + MemAccess ma) { + AAImpl* a = impl_of(t); + if (dst.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "aarch64 reload_reg: dst is not OPK_REG"); + } + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + aa_load(t, dst, addr, ma); +} + +/* ============================================================ + * Labels / control flow + * ============================================================ */ + +static Label aa_label_new(CGTarget* t) { + return (Label)t->mc->label_new(t->mc); +} + +static void aa_label_place(CGTarget* t, Label l) { + t->mc->label_place(t->mc, (MCLabel)l); +} + +void aa_jump(CGTarget* t, Label l) { + MCEmitter* mc = t->mc; + aa64_emit32(mc, aa64_b_base()); + mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0); +} + +static u32 cmp_to_cond(CmpOp op) { + switch (op) { + case CMP_EQ: return 0x0u; + case CMP_NE: return 0x1u; + case CMP_LT_U: return 0x3u; + case CMP_LE_U: return 0x9u; + case CMP_GT_U: return 0x8u; + case CMP_GE_U: return 0x2u; + case CMP_LT_S: return 0xbu; + case CMP_LE_S: return 0xdu; + case CMP_GT_S: return 0xcu; + case CMP_GE_S: return 0xau; + default: return 0x0u; + } +} + +void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { + MCEmitter* mc = t->mc; + u32 sf = type_is_64(a_op.type) ? 1u : 0u; + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 imm12, sh; + if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) { + u32 rn = aa64_force_reg_int(t, a_op, sf, 9); + aa64_emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh)); + return; + } + } + u32 rn = aa64_force_reg_int(t, a_op, sf, 9); + u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u); + aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, rn, rm)); +} + +static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, + Label l) { + MCEmitter* mc = t->mc; + emit_cmp_ab(t, a, b); + aa64_emit32(mc, aa64_b_cond(cmp_to_cond(op))); + mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0); +} + +static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { + emit_cmp_ab(t, a, b); + u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; + aa64_emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op))); +} + +/* ============================================================ + * Structured scopes + * ============================================================ */ + +static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) { + AAImpl* a = impl_of(t); + if (a->nscopes == a->scopes_cap) { + u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; + AAScope* nb = arena_array(t->c->tu, AAScope, ncap); + if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes); + a->scopes = nb; + a->scopes_cap = ncap; + } + AAScope* sc = &a->scopes[a->nscopes]; + sc->kind = (u8)d->kind; + sc->has_else = 0; + sc->else_label = 0; + sc->end_label = 0; + sc->break_label = d->break_label; + sc->continue_label = d->continue_label; + + if (d->kind == SCOPE_IF) { + sc->else_label = t->mc->label_new(t->mc); + sc->end_label = t->mc->label_new(t->mc); + u32 sf = type_is_64(d->cond.type) ? 1u : 0u; + u32 rn = aa64_force_reg_int(t, d->cond, sf, 9); + aa64_emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, rn, 0)); + aa64_emit32(t->mc, aa64_b_cond(0x0u /*EQ*/)); + t->mc->emit_label_ref(t->mc, sc->else_label, R_AARCH64_CONDBR19, 4, 0); + } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { + /* bookkeep only */ + } else { + compiler_panic(t->c, a->loc, + "aarch64 scope_begin: kind %d not yet implemented", + (int)d->kind); + } + + a->nscopes++; + return (CGScope)a->nscopes; +} + +static void aa_scope_else(CGTarget* t, CGScope s) { + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u", + (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + aa64_emit32(t->mc, aa64_b_base()); + t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0); + t->mc->label_place(t->mc, sc->else_label); + sc->has_else = 1; +} + +static void aa_scope_end(CGTarget* t, CGScope s) { + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u", + (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + if (sc->kind == SCOPE_IF) { + if (!sc->has_else) { + t->mc->label_place(t->mc, sc->else_label); + } + t->mc->label_place(t->mc, sc->end_label); + } +} + +static void aa_break_to(CGTarget* t, CGScope s) { + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 break_to: bad scope %u", (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + aa_jump(t, sc->break_label); +} + +static void aa_continue_to(CGTarget* t, CGScope s) { + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 continue_to: bad scope %u", + (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + aa_jump(t, sc->continue_label); +} + +/* Expose vtable entries to ops.c constructor via a registration helper. + * ops.c calls this after the basic ops vtable is populated. */ +void aa_alloc_vtable_init(CGTarget* t) { + t->alloc_reg = aa_alloc_reg; + t->free_reg = aa_free_reg; + t->clobbers = aa_clobbers; + t->spill_reg = aa_spill_reg; + t->reload_reg = aa_reload_reg; + t->resolve_reg_name = aa_resolve_reg_name; + + t->label_new = aa_label_new; + t->label_place = aa_label_place; + t->jump = aa_jump; + t->cmp_branch = aa_cmp_branch; + t->cmp = aa_cmp; + + t->scope_begin = aa_scope_begin; + t->scope_else = aa_scope_else; + t->scope_end = aa_scope_end; + t->break_to = aa_break_to; + t->continue_to = aa_continue_to; +} diff --git a/src/arch/aarch64/emit.c b/src/arch/aarch64/emit.c @@ -0,0 +1,546 @@ +/* aarch64/emit.c — instruction encoding helpers, function lifecycle, + * frame layout, parameter ABI, address materialization. */ + +#include "arch/aarch64/internal.h" + +extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); + +/* ============================================================ + * Shared type / operand helpers + * ============================================================ */ + +int type_is_64(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 1; + default: + return 0; + } +} + +int type_is_fp_double(const Type* t) { + return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); +} + +int type_is_signed(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_LLONG: + return 1; + default: + return 0; + } +} + +u32 type_byte_size(const Type* t) { + if (!t) return 4; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + case TY_BOOL: + return 1; + case TY_SHORT: + case TY_USHORT: + return 2; + case TY_INT: + case TY_UINT: + case TY_FLOAT: + return 4; + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 8; + default: + return 8; + } +} + +u32 size_idx_for_bytes(u32 nbytes) { + switch (nbytes) { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + default: + return 3; + } +} + +u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } + +/* ============================================================ + * Low-level emission + * ============================================================ */ + +void aa64_emit32(MCEmitter* mc, u32 word) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 b[4]; + b[0] = (u8)(word & 0xff); + b[1] = (u8)((word >> 8) & 0xff); + b[2] = (u8)((word >> 16) & 0xff); + b[3] = (u8)((word >> 24) & 0xff); + mc->emit_bytes(mc, b, 4); + if (mc->debug) { + debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); + } +} + +void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) { + u8 b[4]; + b[0] = (u8)(word & 0xff); + b[1] = (u8)((word >> 8) & 0xff); + b[2] = (u8)((word >> 16) & 0xff); + b[3] = (u8)((word >> 24) & 0xff); + obj_patch(obj, sec_id, ofs, b, 4); +} + +/* ============================================================ + * Immediate encoding helpers + * ============================================================ */ + +void aa64_emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm) { + const u32 nslots = sf ? 4u : 2u; + u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu); + + for (u32 i = 0; i < nslots; ++i) { + u32 slot = (u32)((v >> (i * 16)) & 0xffffu); + u64 cleared = v & ~((u64)0xffffu << (i * 16)); + if (slot != 0 && cleared == 0) { + aa64_emit32(mc, aa64_movz(sf, Rd, slot, i)); + return; + } + } + + { + u64 inv = sf ? ~v : ((~v) & 0xffffffffu); + for (u32 i = 0; i < nslots; ++i) { + u32 slot = (u32)((inv >> (i * 16)) & 0xffffu); + u64 cleared = inv & ~((u64)0xffffu << (i * 16)); + if (cleared == 0) { + aa64_emit32(mc, aa64_movn(sf, Rd, slot, i)); + return; + } + } + } + + int placed = 0; + for (u32 i = 0; i < nslots; ++i) { + u32 slot = (u32)((v >> (i * 16)) & 0xffffu); + if (!placed) { + if (slot == 0) continue; + aa64_emit32(mc, aa64_movz(sf, Rd, slot, i)); + placed = 1; + } else if (slot != 0) { + aa64_emit32(mc, aa64_movk(sf, Rd, slot, i)); + } + } + if (!placed) aa64_emit32(mc, aa64_movz(sf, Rd, 0, 0)); +} + +void emit_sp_add(MCEmitter* mc, u32 imm) { + if (imm <= 0xfff) { + aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm, 0)); + } else if ((imm & 0xfff) == 0 && (imm >> 12) <= 0xfff) { + aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm >> 12, 1)); + } else { + aa64_emit32(mc, aa64_add_imm(1, 31, 31, (imm >> 12) & 0xfff, 1)); + aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm & 0xfff, 0)); + } +} + +/* ============================================================ + * Function lifecycle + * ============================================================ */ + +void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + mc->set_section(mc, fd->text_section_id); + mc->emit_align(mc, 4, 0); + + a->fd = fd; + a->func_start = mc->pos(mc); + a->next_param_int = 0; + a->next_param_fp = 0; + a->next_param_stack = 0; + a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; + a->cum_off = 0; + a->max_outgoing = 0; + regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u); + regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u); + a->nslots = 0; + a->nscopes = 0; + a->has_alloca = 0; + a->nadd_patches = 0; + a->sret_ptr_slot = FRAME_SLOT_NONE; + a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; + a->gp_save_slot = FRAME_SLOT_NONE; + a->fp_save_slot = FRAME_SLOT_NONE; + a->epilogue_label = mc->label_new(mc); + + mc->cfi_startproc(mc); + + a->prologue_pos = mc->pos(mc); + for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) aa64_emit32(mc, AA64_NOP); + + if (a->has_sret) { + FrameSlotDesc fsd = { + .type = NULL, + .name = 0, + .loc = (SrcLoc){0, 0, 0}, + .size = 8, + .align = 8, + .kind = FS_SPILL, + .flags = 0, + }; + a->sret_ptr_slot = aa_frame_slot(t, &fsd); + } + + if (a->is_variadic) { + FrameSlotDesc gpd = { + .type = NULL, + .name = 0, + .loc = (SrcLoc){0, 0, 0}, + .size = 64, + .align = 8, + .kind = FS_SPILL, + .flags = 0, + }; + a->gp_save_slot = aa_frame_slot(t, &gpd); + FrameSlotDesc fpd = { + .type = NULL, + .name = 0, + .loc = (SrcLoc){0, 0, 0}, + .size = 128, + .align = 16, + .kind = FS_SPILL, + .flags = 0, + }; + a->fp_save_slot = aa_frame_slot(t, &fpd); + AASlot* gs = aa64_slot_get(a, a->gp_save_slot); + AASlot* fs = aa64_slot_get(a, a->fp_save_slot); + for (u32 i = 0; i < 8; ++i) { + aa64_emit32(mc, aa64_stur(3, i, 29, -(i32)gs->off + (i32)i * 8)); + } + for (u32 i = 0; i < 8; ++i) { + aa64_emit32(mc, aa64_stur_fp(3, i, 29, -(i32)fs->off + (i32)i * 16)); + } + } +} + +void aa_func_end(CGTarget* t) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + u32 n_int_pairs = (a->int_pool.hwm + 1) / 2; + u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm; + u32 n_fp_pairs = (used_fp_cs + 1) / 2; + + u32 outgoing_off = 0; + u32 int_save_off = a->max_outgoing; + u32 fp_save_off = int_save_off + n_int_pairs * 16; + u32 locals_off = fp_save_off + n_fp_pairs * 16; + u32 fp_lr_off = locals_off + a->cum_off; + u32 frame_size = fp_lr_off + 16; + frame_size = (frame_size + 15u) & ~15u; + fp_lr_off = frame_size - 16; + + (void)outgoing_off; + + mc->label_place(mc, a->epilogue_label); + + if (a->has_alloca) { + if (fp_lr_off <= 0xfff) { + aa64_emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=*/29, fp_lr_off, 0)); + } else { + compiler_panic(t->c, a->loc, + "aarch64: has_alloca + fp_lr_off %u out of imm12 range", + fp_lr_off); + } + } + + for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) { + u32 r0 = 8u + (u32)i * 2u; + u32 r1 = r0 + 1u; + aa64_emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u))); + } + for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) { + u32 r0 = 19u + (u32)i * 2u; + u32 r1 = r0 + 1u; + aa64_emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u))); + } + aa64_emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off)); + emit_sp_add(mc, frame_size); + aa64_emit32(mc, aa64_ret(AA64_LR)); + + u32 pos = a->prologue_pos; + ObjBuilder* obj = t->obj; + u32 sec = a->fd->text_section_id; + + u32 words[AA_PROLOGUE_WORDS]; + for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) words[i] = AA64_NOP; + u32 wi = 0; + + if (frame_size <= 0xfff) { + words[wi++] = aa64_sub_imm(1, 31, 31, frame_size, 0); + } else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) { + words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1); + } else { + if (wi + 2 > AA_PROLOGUE_WORDS) { + compiler_panic(t->c, a->loc, + "aarch64: prologue overflow for frame_size %u", + frame_size); + } + words[wi++] = aa64_sub_imm(1, 31, 31, (frame_size >> 12) & 0xfff, 1); + words[wi++] = aa64_sub_imm(1, 31, 31, frame_size & 0xfff, 0); + } + words[wi++] = aa64_stp_x(29, 30, 31, (i32)fp_lr_off); + words[wi++] = aa64_add_imm(1, 29, 31, fp_lr_off, 0); + if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { + AASlot* s = aa64_slot_get(a, a->sret_ptr_slot); + if (s) { + if (wi >= AA_PROLOGUE_WORDS) goto overflow; + words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off); + } + } + for (u32 i = 0; i < n_int_pairs; ++i) { + u32 r0 = 19u + i * 2u; + u32 r1 = r0 + 1u; + if (wi >= AA_PROLOGUE_WORDS) goto overflow; + words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u)); + } + for (u32 i = 0; i < n_fp_pairs; ++i) { + u32 r0 = 8u + i * 2u; + u32 r1 = r0 + 1u; + if (wi >= AA_PROLOGUE_WORDS) goto overflow; + words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u)); + } + if (0) { + overflow: + compiler_panic( + t->c, a->loc, + "aarch64: prologue placeholder too small (used %u of %u words)", wi, + AA_PROLOGUE_WORDS); + } + + for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) { + aa64_patch32(obj, sec, pos + i * 4u, words[i]); + } + + if (a->max_outgoing > 0xfff) { + compiler_panic( + t->c, a->loc, + "aarch64: max_outgoing %u out of imm12 range for alloca patch", + a->max_outgoing); + } + for (u32 i = 0; i < a->nadd_patches; ++i) { + u32 dr = a->add_patches[i].dst_reg; + u32 word = aa64_add_imm(1, dr, /*Rn=SP*/ 31, a->max_outgoing, 0); + aa64_patch32(obj, sec, a->add_patches[i].pos, word); + } + + u32 end = mc->pos(mc); + obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start, + (u64)(end - a->func_start)); + + mc->cfi_endproc(mc); + a->fd = NULL; +} + +/* ============================================================ + * Frame slots + * ============================================================ */ + +FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d) { + AAImpl* a = impl_of(t); + if (a->nslots == a->slots_cap) { + u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; + AASlot* nbuf = arena_array(t->c->tu, AASlot, ncap); + if (a->slots) memcpy(nbuf, a->slots, sizeof(AASlot) * a->nslots); + a->slots = nbuf; + a->slots_cap = ncap; + } + u32 size = d->size ? d->size : 8; + u32 align = d->align ? d->align : 1; + u32 next = a->cum_off + size; + u32 mask = align - 1; + next = (next + mask) & ~mask; + + AASlot* s = &a->slots[a->nslots]; + s->off = next; + s->size = size; + s->align = align; + s->kind = d->kind; + + a->cum_off = next; + a->nslots++; + return (FrameSlot)(a->nslots); +} + +/* ============================================================ + * Parameters + * ============================================================ */ + +void aa_param(CGTarget* t, const CGParamDesc* p) { + AAImpl* a = impl_of(t); + AASlot* s = aa64_slot_get(a, p->slot); + if (!s) { + compiler_panic(t->c, a->loc, "aarch64 param: bad slot"); + } + const ABIArgInfo* ai = p->abi; + + if (ai->kind == ABI_ARG_IGNORE) return; + if (ai->kind == ABI_ARG_INDIRECT) { + u32 ptr_reg; + if (a->next_param_int < 8) { + ptr_reg = a->next_param_int++; + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + aa64_emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off))); + ptr_reg = 9; + } + u32 nbytes = s->size; + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i)); + aa64_emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i)); + aa64_emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i)); + aa64_emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i)); + aa64_emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i)); + i += 1; + } + return; + } + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 part_off = pt->src_offset; + u32 sz = pt->size; + u32 sidx = size_idx_for_bytes(sz); + + if (pt->cls == ABI_CLASS_INT) { + if (a->next_param_int < 8) { + u32 reg = a->next_param_int++; + aa64_emit32(t->mc, aa64_stur(sidx, reg, 29, -(i32)s->off + (i32)part_off)); + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + aa64_emit32(t->mc, aa64_ldur(sidx, 9, 29, (i32)(16 + caller_off))); + aa64_emit32(t->mc, aa64_stur(sidx, 9, 29, -(i32)s->off + (i32)part_off)); + } + } else if (pt->cls == ABI_CLASS_FP) { + if (a->next_param_fp < 8) { + u32 reg = a->next_param_fp++; + aa64_emit32(t->mc, + aa64_stur_fp(sidx, reg, 29, -(i32)s->off + (i32)part_off)); + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + aa64_emit32(t->mc, aa64_ldur_fp(sidx, 0, 29, (i32)(16 + caller_off))); + aa64_emit32(t->mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off + (i32)part_off)); + } + } else { + compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +/* ============================================================ + * Address materialization helpers + * ============================================================ */ + +static int use_got_for_sym(CGTarget* t, ObjSymId sym) { + return obj_symbol_extern_via_got(t->c, t->obj, sym); +} + +void aa64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 adrp_pos = mc->pos(mc); + aa64_emit32(mc, aa64_adrp_base(dst_reg)); + mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_GOT_PAGE, sym, 0, 0, 0); + u32 ldr_pos = mc->pos(mc); + aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, dst_reg, dst_reg, 0)); + mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LD64_GOT_LO12_NC, sym, 0, 0, 0); +} + +void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym, i64 addend) { + MCEmitter* mc = t->mc; + if (use_got_for_sym(t, sym)) { + aa64_emit_got_load_addr(t, dst_reg, sym); + if (addend) aa64_emit_addr_adjust(mc, dst_reg, dst_reg, (i32)addend); + return; + } + u32 sec = mc->section_id; + u32 adrp_pos = mc->pos(mc); + aa64_emit32(mc, aa64_adrp_base(dst_reg)); + mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, addend, + 0, 0); + u32 add_pos = mc->pos(mc); + aa64_emit32(mc, aa64_add_imm(1, dst_reg, dst_reg, 0, 0)); + mc->emit_reloc_at(mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, addend, 0, + 0); +} + +void aa64_emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off) { + if (off == 0) { + aa64_emit32(mc, aa64_mov_reg(1, Rd, base)); + return; + } + u32 abs_off = (off < 0) ? (u32)(-off) : (u32)off; + if (abs_off <= 0xfff) { + if (off < 0) + aa64_emit32(mc, aa64_sub_imm(1, Rd, base, abs_off, 0)); + else + aa64_emit32(mc, aa64_add_imm(1, Rd, base, abs_off, 0)); + return; + } + if ((abs_off >> 24) == 0) { + u32 hi = (abs_off >> 12) & 0xfff; + u32 lo = abs_off & 0xfff; + if (off < 0) { + if (hi) aa64_emit32(mc, aa64_sub_imm(1, Rd, base, hi, 1)); + if (lo) aa64_emit32(mc, aa64_sub_imm(1, Rd, hi ? Rd : base, lo, 0)); + } else { + if (hi) aa64_emit32(mc, aa64_add_imm(1, Rd, base, hi, 1)); + if (lo) aa64_emit32(mc, aa64_add_imm(1, Rd, hi ? Rd : base, lo, 0)); + } + return; + } + aa64_emit_load_imm(mc, 1, Rd, off); + aa64_emit32(mc, aa64_add(1, Rd, base, Rd)); +} + diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h @@ -0,0 +1,312 @@ +/* aarch64/internal.h — private types and forward decls shared across + * emit.c / alloc.c / ops.c. NOT part of the public API. */ +#pragma once + +#include <string.h> + +#include "arch/aa64_asm.h" +#include "arch/aa64_isa.h" +#include "arch/aa64_regs.h" +#include "arch/arch.h" +#include "core/arena.h" +#include "obj/obj.h" +#include "type/type.h" + +/* ============================================================ + * Local encoding helpers (kept here, not in aa64_isa.h). + * ============================================================ */ + +#define AA64_NOP 0xD503201Fu + +static inline u32 aa64_stp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { + i32 sc = byte_off >> 3; + return 0xA9000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { + i32 sc = byte_off >> 3; + return 0xA9400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_stp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { + i32 sc = byte_off >> 3; + return 0x6D000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) { + i32 sc = byte_off >> 3; + return 0x6D400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} + +static inline u32 aa64_stur(u32 size, u32 Rt, u32 Rn, i32 simm9) { + return 0x38000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldur(u32 size, u32 Rt, u32 Rn, i32 simm9) { + return 0x38400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_stur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) { + return 0x3C000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) { + return 0x3C400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} + +static inline u32 aa64_str_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> size; + return 0x39000000u | (size << 30) | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldr_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> size; + return 0x39400000u | (size << 30) | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_str_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> size; + return 0x3D000000u | (size << 30) | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} + +static inline u32 aa64_mrs_tpidr_el0(u32 Rt) { + return 0xD53BD040u | (Rt & 0x1fu); +} +static inline u32 aa64_b_base(void) { return 0x14000000u; } +static inline u32 aa64_bl_base(void) { return 0x94000000u; } + +static inline u32 aa64_adrp_base(u32 Rd) { return 0x90000000u | (Rd & 0x1f); } + +static inline u32 aa64_ldr_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> size; + return 0x3D400000u | (size << 30) | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} + +static inline u32 aa64_fmov_reg(u32 type, u32 Rd, u32 Rn) { + return 0x1E204000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_subs_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12) { + return 0x71000000u | (sf << 31) | ((imm12 & 0xfff) << 10) | + ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_cset_eq(u32 sf, u32 Rd) { + return 0x1A800400u | (sf << 31) | (31u << 16) | (0x1u << 12) | (31u << 5) | + (Rd & 0x1f); +} + +static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn) { + return 0x1E380000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} +static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn) { + return 0x1E390000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} +static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn) { + return 0x1E220000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} +static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn) { + return 0x1E230000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} + +static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn) { + return 0x1E22C000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn) { + return 0x1E624000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) { + return 0x1E270000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) { + return 0x1E260000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) { + return 0x9E670000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) { + return 0x9E660000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_sub_extreg_x_uxtx(u32 Rd, u32 Rn, u32 Rm) { + return 0xCB206000u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) { + return 0x6B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} + +static inline u32 aa64_b_cond(u32 cond) { return 0x54000000u | (cond & 0xfu); } + +static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { + return 0x1A800400u | (sf << 31) | ((Rm & 0x1f) << 16) | + ((cond & 0xfu) << 12) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond) { + return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u); +} + +static inline u32 aa64_fadd(u32 type, u32 Rd, u32 Rn, u32 Rm) { + return 0x1E202800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fsub(u32 type, u32 Rd, u32 Rn, u32 Rm) { + return 0x1E203800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fmul(u32 type, u32 Rd, u32 Rn, u32 Rm) { + return 0x1E200800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) { + return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { + return 0x13000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | + ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { + return 0x53000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | + ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { + return 0x33000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) | + ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} + +/* ============================================================ + * RegPool + * ============================================================ */ + +typedef struct RegPool { + u32 free; + u32 hwm; + u8 base; + u8 nregs; + u8 pad[2]; +} RegPool; + +/* ============================================================ + * AAImpl types + * ============================================================ */ + +#define AA_PROLOGUE_WORDS \ + 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */ + +typedef struct AASlot { + u32 off; + u32 size; + u32 align; + u8 kind; + u8 pad[3]; +} AASlot; + +typedef struct AAScope { + u8 kind; + u8 has_else; + u8 pad[2]; + MCLabel else_label; + MCLabel end_label; + Label break_label; + Label continue_label; +} AAScope; + +typedef struct AAImpl { + CGTarget base; + SrcLoc loc; + const CGFuncDesc* fd; + + u32 func_start; + u32 prologue_pos; + MCLabel epilogue_label; + + AASlot* slots; + u32 nslots; + u32 slots_cap; + u32 cum_off; + u32 max_outgoing; + + u32 next_param_int; + u32 next_param_fp; + u32 next_param_stack; + u8 has_sret; + FrameSlot sret_ptr_slot; + + RegPool int_pool; + RegPool fp_pool; + + AAScope* scopes; + u32 nscopes; + u32 scopes_cap; + + u8 has_alloca; + struct AAAllocaPatch { + u32 pos; + u32 dst_reg; + }* add_patches; + u32 nadd_patches; + u32 add_patches_cap; + + u8 is_variadic; + FrameSlot gp_save_slot; + FrameSlot fp_save_slot; +} AAImpl; + +/* ============================================================ + * Cross-file forward declarations + * ============================================================ */ + +/* regpool (alloc.c) */ +void regpool_init(RegPool* p, u8 base, u8 nregs); +Reg regpool_alloc(RegPool* p); +int regpool_free(RegPool* p, Reg r); + +/* emit.c helpers used in alloc.c / ops.c */ +void aa64_emit32(MCEmitter* mc, u32 word); +void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word); +void aa64_emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm); +void emit_sp_add(MCEmitter* mc, u32 imm); +void aa64_emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off); +void aa64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym); +void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym, i64 addend); + +/* emit.c public surface */ +FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d); +void aa_func_begin(CGTarget* t, const CGFuncDesc* fd); +void aa_func_end(CGTarget* t); +void aa_param(CGTarget* t, const CGParamDesc* p); + +/* alloc.c helpers used in emit.c / ops.c */ +AAImpl* impl_of(CGTarget* t); +AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs); +void aa_free_reg(CGTarget* t, Reg r, RegClass cls); +void aa_jump(CGTarget* t, Label l); + +/* ops.c helpers used in alloc.c */ +void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); +void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); +u32 aa64_force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch); + +/* alloc.c helpers used in ops.c */ +void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op); +void aa_alloc_vtable_init(CGTarget* t); + +/* shared type helpers (defined in emit.c, used broadly) */ +int type_is_64(const Type* t); +int type_is_fp_double(const Type* t); +int type_is_signed(const Type* t); +u32 type_byte_size(const Type* t); +u32 size_idx_for_bytes(u32 nbytes); +u32 reg_num(Operand op); diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c @@ -0,0 +1,1895 @@ +/* aarch64/ops.c — data movement, arithmetic, calls, varargs, atomics, + * intrinsics, asm_block, set_loc, finalize/destroy, vtable constructor. */ + +#include "arch/aarch64/internal.h" + +/* ============================================================ + * Data movement + * ============================================================ */ + +static void aa_load_imm(CGTarget* t, Operand dst, i64 imm) { + u32 sf = type_is_64(dst.type) ? 1u : 0u; + aa64_emit_load_imm(t->mc, sf, reg_num(dst), imm); +} + +static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) { + AAImpl* a = impl_of(t); + if (dst.cls != RC_FP) { + compiler_panic(t->c, a->loc, "aarch64 load_const: only FP supported in v1"); + } + + Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); + ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); + + u32 cur_section = t->mc->section_id; + t->mc->set_section(t->mc, ro); + u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); + t->mc->emit_bytes(t->mc, cb.bytes, cb.size); + + char namebuf[64]; + static u32 lit_seq = 0; + int len = 0; + { + const char* prefix = ".LCFP"; + for (; prefix[len]; ++len) namebuf[len] = prefix[len]; + u32 v = lit_seq++; + char tmp[16]; + int tn = 0; + if (v == 0) + tmp[tn++] = '0'; + else { + while (v) { + tmp[tn++] = '0' + (char)(v % 10); + v /= 10; + } + } + for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; + namebuf[len] = 0; + } + Sym sname = pool_intern_cstr(t->c->global, namebuf); + ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, + (u64)cb.size); + + t->mc->set_section(t->mc, cur_section); + + u32 adrp_pos = t->mc->pos(t->mc); + aa64_emit32(t->mc, aa64_adrp_base(9)); + t->mc->emit_reloc_at(t->mc, cur_section, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, + sym, 0, 0, 0); + + u32 ldr_pos = t->mc->pos(t->mc); + u32 sidx = (cb.size == 8) ? 3u : 2u; + aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0)); + RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC + : R_AARCH64_LDST32_ABS_LO12_NC; + t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0); +} + +static void aa_copy(CGTarget* t, Operand dst, Operand src) { + if (dst.cls == RC_FP || src.cls == RC_FP) { + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + aa64_emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src))); + return; + } + u32 sf = type_is_64(dst.type) ? 1u : 0u; + aa64_emit32(t->mc, aa64_mov_reg(sf, reg_num(dst), reg_num(src))); +} + +/* ============================================================ + * Load / store + * ============================================================ */ + +static RelocKind ldst_lo12_reloc_for(u32 nbytes) { + switch (nbytes) { + case 1: return R_AARCH64_LDST8_ABS_LO12_NC; + case 2: return R_AARCH64_LDST16_ABS_LO12_NC; + case 4: return R_AARCH64_LDST32_ABS_LO12_NC; + case 8: return R_AARCH64_LDST64_ABS_LO12_NC; + default: return R_AARCH64_LDST64_ABS_LO12_NC; + } +} + +static int use_got_for_sym(CGTarget* t, ObjSymId sym) { + return obj_symbol_extern_via_got(t->c, t->obj, sym); +} + +static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { + AAImpl* a = impl_of(t); + if (addr.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot"); + i32 off = -(i32)s->off; + if (off >= -256 && off <= 255) { + *out_off = off; + return 29; + } + aa64_emit_addr_adjust(t->mc, tmp_reg, 29, off); + *out_off = 0; + return tmp_reg; + } + if (addr.kind == OPK_INDIRECT) { + i32 off = addr.v.ind.ofs; + u32 base = addr.v.ind.base & 0x1f; + if (off >= -256 && off <= 255) { + *out_off = off; + return base; + } + aa64_emit_addr_adjust(t->mc, tmp_reg, base, off); + *out_off = 0; + return tmp_reg; + } + if (addr.kind == OPK_GLOBAL) { + emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend); + *out_off = 0; + return tmp_reg; + } + compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d", + (int)addr.kind); +} + +void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + u32 sidx = size_idx_for_bytes(sz); + + if (addr.kind == OPK_GLOBAL) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + ObjSymId sym = addr.v.global.sym; + i64 add = addr.v.global.addend; + if (use_got_for_sym(t, sym)) { + aa64_emit_got_load_addr(t, /*dst=*/9, sym); + if (dst.cls == RC_FP) { + aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add)); + } else { + aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add)); + } + return; + } + u32 adrp_pos = mc->pos(mc); + aa64_emit32(mc, aa64_adrp_base(/*Rd=*/9)); + mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add, + 0, 0); + u32 ld_pos = mc->pos(mc); + if (dst.cls == RC_FP) { + aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0)); + } else { + aa64_emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), 9, 0)); + } + mc->emit_reloc_at(mc, sec, ld_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0); + return; + } + + i32 off; + u32 base = addr_base(t, addr, &off, 9); + if (dst.cls == RC_FP) { + aa64_emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off)); + } else { + aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off)); + } +} + +void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + u32 sidx = size_idx_for_bytes(sz); + + if (addr.kind == OPK_GLOBAL) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + ObjSymId sym = addr.v.global.sym; + i64 add = addr.v.global.addend; + + u32 src_reg; + u32 src_is_fp = 0; + if (src.kind == OPK_IMM) { + u32 sf = (sz == 8) ? 1u : 0u; + aa64_emit_load_imm(mc, sf, /*Rd=*/9, src.v.imm); + src_reg = 9; + } else if (src.cls == RC_FP) { + src_reg = reg_num(src); + src_is_fp = 1; + } else { + src_reg = reg_num(src); + } + u32 base = (src.kind == OPK_IMM) ? 10u : 9u; + if (use_got_for_sym(t, sym)) { + aa64_emit_got_load_addr(t, base, sym); + if (src_is_fp) { + aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add)); + } else { + aa64_emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add)); + } + return; + } + u32 adrp_pos = mc->pos(mc); + aa64_emit32(mc, aa64_adrp_base(base)); + mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add, + 0, 0); + u32 st_pos = mc->pos(mc); + if (src_is_fp) { + aa64_emit32(mc, aa64_str_fp_uimm(sidx, src_reg, base, 0)); + } else { + aa64_emit32(mc, aa64_str_uimm(sidx, src_reg, base, 0)); + } + mc->emit_reloc_at(mc, sec, st_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0); + return; + } + + i32 off; + u32 addr_tmp = (src.kind == OPK_IMM) ? 10u : 9u; + u32 base = addr_base(t, addr, &off, addr_tmp); + + if (src.kind == OPK_IMM) { + u32 sf = (sz == 8) ? 1u : 0u; + aa64_emit_load_imm(t->mc, sf, 9, src.v.imm); + aa64_emit32(t->mc, aa64_stur(sidx, 9, base, off)); + return; + } + if (src.cls == RC_FP) { + aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(src), base, off)); + } else { + aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off)); + } +} + +static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) { + AAImpl* a = impl_of(t); + if (lv.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, lv.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot"); + aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0)); + return; + } + if (lv.kind == OPK_INDIRECT) { + i32 ofs = lv.v.ind.ofs; + u32 base = lv.v.ind.base & 0x1f; + if (ofs == 0) { + aa64_emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base)); + } else if (ofs > 0 && ofs <= 0xfff) { + aa64_emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0)); + } else if (ofs < 0 && -ofs <= 0xfff) { + aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 addr_of: indirect offset %d unsupported", ofs); + } + return; + } + if (lv.kind == OPK_GLOBAL) { + u32 rd = reg_num(dst); + ObjSymId sym = lv.v.global.sym; + i64 addend = lv.v.global.addend; + if (use_got_for_sym(t, sym)) { + aa64_emit_got_load_addr(t, rd, sym); + if (addend) aa64_emit_addr_adjust(t->mc, rd, rd, (i32)addend); + return; + } + u32 sec = t->mc->section_id; + u32 adrp_pos = t->mc->pos(t->mc); + aa64_emit32(t->mc, aa64_adrp_base(rd)); + t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, + addend, 0, 0); + u32 add_pos = t->mc->pos(t->mc); + aa64_emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0)); + t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, + addend, 0, 0); + return; + } + compiler_panic(t->c, impl_of(t)->loc, "aarch64: addr_of not implemented"); +} + +static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 rd = reg_num(dst); + + aa64_emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9)); + + u32 hi_pos = mc->pos(mc); + aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/9, /*imm12=*/0, /*sh=*/1)); + mc->emit_reloc_at(mc, sec, hi_pos, R_AARCH64_TLSLE_ADD_TPREL_HI12, sym, + addend, 0, 0); + + u32 lo_pos = mc->pos(mc); + aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/rd, /*imm12=*/0, /*sh=*/0)); + mc->emit_reloc_at(mc, sec, lo_pos, R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, sym, + addend, 0, 0); +} + +/* ============================================================ + * Aggregate helpers + * ============================================================ */ + +static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { + if (op.kind == OPK_REG) return reg_num(op); + if (op.kind == OPK_LOCAL) { + AAImpl* a = impl_of(t); + AASlot* s = aa64_slot_get(a, op.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot"); + aa64_emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0)); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 agg: address kind %d unsupported", (int)op.kind); +} + +static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr, + AggregateAccess agg) { + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, 9); + u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u); + u32 nbytes = agg.size; + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i)); + i += 1; + } +} + +static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value, + AggregateAccess agg) { + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, 9); + + u32 byte; + if (byte_value.kind == OPK_IMM) { + byte = (u32)(byte_value.v.imm & 0xffu); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 set_bytes: REG byte not yet supported"); + } + u32 nbytes = agg.size; + + if (byte == 0) { + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(mc, aa64_stur(3, 31, dr, (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(mc, aa64_stur(2, 31, dr, (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(mc, aa64_stur(1, 31, dr, (i32)i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(mc, aa64_stur(0, 31, dr, (i32)i)); + i += 1; + } + return; + } + + u64 b64 = byte; + b64 |= b64 << 8; + b64 |= b64 << 16; + b64 |= b64 << 32; + aa64_emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64); + + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i)); + i += 1; + } +} + +/* ============================================================ + * Bitfields + * ============================================================ */ + +static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, + BitFieldAccess bf) { + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, 9); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + u32 sf = (storage_bytes == 8u) ? 1u : 0u; + u32 sidx = size_idx_for_bytes(storage_bytes); + u32 rd = reg_num(dst); + + aa64_emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset)); + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 imms = lsb + width - 1u; + if (bf.signed_) { + aa64_emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms)); + } else { + aa64_emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms)); + } +} + +static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src, + BitFieldAccess bf) { + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, 9); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + u32 sf = (storage_bytes == 8u) ? 1u : 0u; + u32 sidx = size_idx_for_bytes(storage_bytes); + + aa64_emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); + + u32 src_reg; + if (src.kind == OPK_IMM) { + aa64_emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm); + src_reg = 11u; + } else if (src.kind == OPK_REG) { + src_reg = reg_num(src); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 bitfield_store: src kind %d unsupported", + (int)src.kind); + } + + u32 reg_size = sf ? 64u : 32u; + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 immr = (reg_size - lsb) % reg_size; + u32 imms = width - 1u; + aa64_emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms)); + + aa64_emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); +} + +/* ============================================================ + * Arithmetic helpers + * ============================================================ */ + +u32 aa64_force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch) { + if (op.kind == OPK_REG) return reg_num(op); + if (op.kind == OPK_IMM) { + aa64_emit_load_imm(t->mc, sf, scratch, op.v.imm); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 binop: operand kind %d unsupported", (int)op.kind); +} + +static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, + Operand b_op) { + MCEmitter* mc = t->mc; + + if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { + if (a_op.kind != OPK_REG || b_op.kind != OPK_REG || dst.cls != RC_FP) { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 binop: FP op requires REG operands"); + } + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + u32 rd = reg_num(dst); + u32 rn = reg_num(a_op); + u32 rm = reg_num(b_op); + u32 w; + switch (op) { + case BO_FADD: w = aa64_fadd(type, rd, rn, rm); break; + case BO_FSUB: w = aa64_fsub(type, rd, rn, rm); break; + case BO_FMUL: w = aa64_fmul(type, rd, rn, rm); break; + case BO_FDIV: w = aa64_fdiv(type, rd, rn, rm); break; + default: w = 0; break; + } + aa64_emit32(mc, w); + return; + } + + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 rd = reg_num(dst); + + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 rn_reg = reg_num(a_op); + i64 imm = b_op.v.imm; + u32 imm12, sh, N, immr, imms; + switch (op) { + case BO_IADD: + if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { + aa64_emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh)); + return; + } + break; + case BO_ISUB: + if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { + aa64_emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh)); + return; + } + break; + case BO_AND: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + aa64_emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_OR: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + aa64_emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_XOR: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + aa64_emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_SHL: { + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) { + aa64_emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + case BO_SHR_U: { + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) { + aa64_emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + case BO_SHR_S: { + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) { + aa64_emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + default: break; + } + } + + u32 rn = aa64_force_reg_int(t, a_op, sf, 9); + u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9); + + u32 word; + switch (op) { + case BO_IADD: word = aa64_add(sf, rd, rn, rm); break; + case BO_ISUB: word = aa64_sub(sf, rd, rn, rm); break; + case BO_IMUL: word = aa64_mul(sf, rd, rn, rm); break; + case BO_AND: word = aa64_and(sf, rd, rn, rm); break; + case BO_OR: word = aa64_orr(sf, rd, rn, rm); break; + case BO_XOR: word = aa64_eor(sf, rd, rn, rm); break; + case BO_SHL: word = aa64_lslv(sf, rd, rn, rm); break; + case BO_SHR_U: word = aa64_lsrv(sf, rd, rn, rm); break; + case BO_SHR_S: word = aa64_asrv(sf, rd, rn, rm); break; + case BO_UDIV: word = aa64_udiv(sf, rd, rn, rm); break; + case BO_SDIV: word = aa64_sdiv(sf, rd, rn, rm); break; + case BO_SREM: + aa64_emit32(mc, aa64_sdiv(sf, 11, rn, rm)); + word = aa64_msub(sf, rd, 11, rm, rn); + break; + case BO_UREM: + aa64_emit32(mc, aa64_udiv(sf, 11, rn, rm)); + word = aa64_msub(sf, rd, 11, rm, rn); + break; + case BO_FADD: + case BO_FSUB: + case BO_FMUL: + case BO_FDIV: + default: + compiler_panic(t->c, impl_of(t)->loc, "aarch64 binop: op %d unimpl", + (int)op); + } + aa64_emit32(mc, word); +} + +static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { + MCEmitter* mc = t->mc; + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 rd = reg_num(dst); + u32 rn = aa64_force_reg_int(t, a_op, sf, 9); + u32 word; + + switch (op) { + case UO_NEG: + word = aa64_neg(sf, rd, rn); + break; + case UO_BNOT: + word = aa64_mvn(sf, rd, rn); + break; + case UO_NOT: + aa64_emit32(mc, aa64_subs_imm(sf, /*ZR=*/31, rn, 0)); + word = aa64_cset_eq(sf, rd); + break; + default: + compiler_panic(t->c, impl_of(t)->loc, "aarch64 unop: op %d unimpl", + (int)op); + } + aa64_emit32(mc, word); +} + +static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 rd = reg_num(dst); + u32 rn = reg_num(src); + + switch (k) { + case CV_SEXT: { + if (src.cls != RC_INT || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes"); + } + u32 src_bits = type_byte_size(src.type) * 8u; + u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; + aa64_emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); + return; + } + case CV_ZEXT: { + if (src.cls != RC_INT || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes"); + } + u32 src_bits = type_byte_size(src.type) * 8u; + if (src_bits == 32u) { + aa64_emit32(mc, aa64_mov_reg(0, rd, rn)); + } else { + aa64_emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); + } + return; + } + case CV_TRUNC: { + aa64_emit32(mc, aa64_mov_reg(0, rd, rn)); + return; + } + case CV_ITOF_S: { + u32 sf_src = type_is_64(src.type) ? 1u : 0u; + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + aa64_emit32(mc, aa64_scvtf(sf_src, type, rd, rn)); + return; + } + case CV_ITOF_U: { + u32 sf_src = type_is_64(src.type) ? 1u : 0u; + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + aa64_emit32(mc, aa64_ucvtf(sf_src, type, rd, rn)); + return; + } + case CV_FTOI_S: { + if (src.cls != RC_FP || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes"); + } + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 type = type_is_fp_double(src.type) ? 1u : 0u; + aa64_emit32(mc, aa64_fcvtzs(sf, type, rd, rn)); + return; + } + case CV_FTOI_U: { + if (src.cls != RC_FP || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes"); + } + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 type = type_is_fp_double(src.type) ? 1u : 0u; + aa64_emit32(mc, aa64_fcvtzu(sf, type, rd, rn)); + return; + } + case CV_FEXT: { + aa64_emit32(mc, aa64_fcvt_d_s(rd, rn)); + return; + } + case CV_FTRUNC: { + aa64_emit32(mc, aa64_fcvt_s_d(rd, rn)); + return; + } + case CV_BITCAST: { + if (src.cls == RC_INT && dst.cls == RC_FP) { + u32 sz = type_byte_size(dst.type); + aa64_emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn) : aa64_fmov_s_w(rd, rn)); + } else if (src.cls == RC_FP && dst.cls == RC_INT) { + u32 sz = type_byte_size(src.type); + aa64_emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn) : aa64_fmov_w_s(rd, rn)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 convert BITCAST: same-class not yet supported"); + } + return; + } + default: + compiler_panic(t->c, a->loc, "aarch64 convert kind %d unimpl", (int)k); + } +} + +/* ============================================================ + * Calls + * ============================================================ */ + +static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, + const CGABIValue* av, u32* next_int, u32* next_fp, + u32* stack_off) { + AAImpl* a = impl_of(t); + ABIArgInfo va_ai; + ABIArgPart va_pt; + const ABIArgInfo* ai = av->abi; + if (!ai) { + u32 sz = type_byte_size(av->type); + memset(&va_ai, 0, sizeof va_ai); + memset(&va_pt, 0, sizeof va_pt); + va_ai.kind = ABI_ARG_DIRECT; + va_ai.parts = &va_pt; + va_ai.nparts = 1; + va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; + va_pt.size = sz; + va_pt.align = sz; + va_pt.src_offset = 0; + ai = &va_ai; + if (fi && fi->vararg_on_stack) { + *next_int = 8; + *next_fp = 8; + } + } + if (ai->kind == ABI_ARG_IGNORE) return; + + if (ai->kind == ABI_ARG_INDIRECT) { + u32 dst_reg; + int to_stack = (*next_int >= 8); + if (!to_stack) + dst_reg = (*next_int)++; + else + dst_reg = 9; + if (av->storage.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot"); + aa64_emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0)); + } else if (av->storage.kind == OPK_INDIRECT) { + aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f, + av->storage.v.ind.ofs); + } else { + compiler_panic(t->c, a->loc, + "aarch64 call: INDIRECT arg storage kind %d unsupported", + (int)av->storage.kind); + } + if (to_stack) { + aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); + *stack_off += 8; + } + return; + } + + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 sz = pt->size; + u32 sidx = size_idx_for_bytes(sz); + + if (pt->cls == ABI_CLASS_INT) { + int to_stack = (*next_int >= 8); + u32 dst_reg = to_stack ? 9u : (*next_int)++; + switch (av->storage.kind) { + case OPK_IMM: { + u32 sf = (sz == 8) ? 1u : 0u; + aa64_emit_load_imm(t->mc, sf, dst_reg, av->storage.v.imm); + break; + } + case OPK_REG: { + u32 sf = (sz == 8) ? 1u : 0u; + aa64_emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage))); + break; + } + case OPK_LOCAL: { + AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad arg slot"); + i32 off = -(i32)s->off + (i32)pt->src_offset; + aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, 29, off)); + break; + } + case OPK_INDIRECT: { + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off)); + break; + } + default: + compiler_panic(t->c, a->loc, + "aarch64 call: arg storage kind %d unsupported", + (int)av->storage.kind); + } + if (to_stack) { + aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); + *stack_off += 8; + } + } else if (pt->cls == ABI_CLASS_FP) { + int to_stack = (*next_fp >= 8); + if (!to_stack) { + u32 dst_reg = (*next_fp)++; + switch (av->storage.kind) { + case OPK_REG: { + u32 type = (sz == 8) ? 1u : 0u; + aa64_emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage))); + break; + } + case OPK_INDIRECT: { + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + aa64_emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off)); + break; + } + default: + compiler_panic(t->c, a->loc, + "aarch64 call: FP arg storage kind %d unsupported", + (int)av->storage.kind); + } + } else { + switch (av->storage.kind) { + case OPK_REG: + aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31, + (i32)*stack_off)); + break; + case OPK_INDIRECT: { + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + aa64_emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off)); + aa64_emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off)); + break; + } + default: + compiler_panic( + t->c, a->loc, + "aarch64 call: FP stack-arg storage kind %d unsupported", + (int)av->storage.kind); + } + *stack_off += 8; + } + } else { + compiler_panic(t->c, a->loc, "aarch64 call: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +static void aa_call(CGTarget* t, const CGCallDesc* d) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + u32 next_int = 0, next_fp = 0, stack_off = 0; + + if (d->abi && d->abi->has_sret) { + if (d->ret.storage.kind != OPK_LOCAL) { + compiler_panic(t->c, a->loc, + "aarch64 call: sret destination must be LOCAL"); + } + AASlot* s = aa64_slot_get(a, d->ret.storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad sret slot"); + aa64_emit32(mc, aa64_sub_imm(1, 8, 29, s->off, 0)); + } + + for (u32 i = 0; i < d->nargs; ++i) { + emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off); + } + + u32 needed = (stack_off + 15u) & ~15u; + if (needed > a->max_outgoing) a->max_outgoing = needed; + + if (d->callee.kind == OPK_GLOBAL) { + u32 bl_pos = mc->pos(mc); + aa64_emit32(mc, aa64_bl_base()); + mc->emit_reloc_at(mc, mc->section_id, bl_pos, R_AARCH64_CALL26, + d->callee.v.global.sym, d->callee.v.global.addend, 0, 0); + } else if (d->callee.kind == OPK_REG) { + aa64_emit32(mc, aa64_blr(reg_num(d->callee))); + } else { + compiler_panic(t->c, a->loc, "aarch64 call: callee kind %d unsupported", + (int)d->callee.kind); + } + + const ABIArgInfo* ri = &d->abi->ret; + if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) { + return; + } + if (ri->nparts == 0) return; + + Operand rs = d->ret.storage; + u32 next_int_ret = 0, next_fp_ret = 0; + for (u16 i = 0; i < ri->nparts; ++i) { + const ABIArgPart* p = &ri->parts[i]; + u32 src_reg; + if (p->cls == ABI_CLASS_INT) { + src_reg = next_int_ret++; + } else if (p->cls == ABI_CLASS_FP) { + src_reg = next_fp_ret++; + } else { + compiler_panic(t->c, a->loc, "aarch64 call: ret part cls %d unimpl", + (int)p->cls); + } + + if (rs.kind == OPK_REG) { + if (ri->nparts != 1) { + compiler_panic(t->c, a->loc, + "aarch64 call: REG ret_storage with %u parts", + (unsigned)ri->nparts); + } + if (p->cls == ABI_CLASS_INT) { + u32 sf = (p->size == 8) ? 1u : 0u; + aa64_emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg)); + } else { + u32 type = (p->size == 8) ? 1u : 0u; + aa64_emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg)); + } + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot"); + base_reg = 29; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0x1f; + base_off = rs.v.ind.ofs; + } + u32 sidx = size_idx_for_bytes(p->size); + i32 off = base_off + (i32)p->src_offset; + if (p->cls == ABI_CLASS_INT) { + aa64_emit32(mc, aa64_stur(sidx, src_reg, base_reg, off)); + } else { + aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off)); + } + } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { + /* void return placeholder */ + } else { + compiler_panic(t->c, a->loc, + "aarch64 call: ret_storage kind %d unsupported", + (int)rs.kind); + } + } +} + +static void aa_ret(CGTarget* t, const CGABIValue* val) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + if (val) { + const ABIArgInfo* ri = val->abi; + if (ri && ri->kind == ABI_ARG_INDIRECT) { + if (val->storage.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad sret slot"); + if (a->sret_ptr_slot != FRAME_SLOT_NONE) { + AASlot* sp = aa64_slot_get(a, a->sret_ptr_slot); + if (sp) aa64_emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off)); + } + u32 nbytes = s->size; + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(mc, aa64_ldur(3, 9, 29, -(i32)s->off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(mc, aa64_ldur(2, 9, 29, -(i32)s->off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(mc, aa64_ldur(1, 9, 29, -(i32)s->off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(mc, aa64_ldur(0, 9, 29, -(i32)s->off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i)); + i += 1; + } + } else if (val->storage.kind == OPK_INDIRECT) { + u32 nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "aarch64 ret indirect: missing aggregate size"); + } + if (a->sret_ptr_slot != FRAME_SLOT_NONE) { + AASlot* sp = aa64_slot_get(a, a->sret_ptr_slot); + if (sp) aa64_emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off)); + } + u32 base_reg = val->storage.v.ind.base & 0x1f; + i32 base_off = val->storage.v.ind.ofs; + u32 i = 0; + while (i + 8 <= nbytes) { + aa64_emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i)); + i += 8; + } + while (i + 4 <= nbytes) { + aa64_emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i)); + i += 4; + } + while (i + 2 <= nbytes) { + aa64_emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i)); + i += 2; + } + while (i < nbytes) { + aa64_emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i)); + aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i)); + i += 1; + } + } else { + compiler_panic(t->c, a->loc, + "aarch64 ret indirect: storage kind %d unsupported", + (int)val->storage.kind); + } + } else if (val->storage.kind == OPK_REG) { + if (val->storage.cls == RC_FP) { + u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u; + aa64_emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, reg_num(val->storage))); + } else { + u32 sf = type_is_64(val->storage.type) ? 1u : 0u; + aa64_emit32(mc, aa64_mov_reg(sf, /*Rd=*/0, reg_num(val->storage))); + } + } else if (val->storage.kind == OPK_IMM) { + u32 sf = type_is_64(val->storage.type) ? 1u : 0u; + aa64_emit_load_imm(mc, sf, /*Rd=*/0, val->storage.v.imm); + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot"); + base_reg = 29; + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0x1f; + base_off = val->storage.v.ind.ofs; + } + const ABIArgInfo* ri2 = val->abi; + for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { + const ABIArgPart* pt = &ri2->parts[i]; + u32 sidx = size_idx_for_bytes(pt->size); + i32 off = base_off + (i32)pt->src_offset; + if (pt->cls == ABI_CLASS_INT) { + aa64_emit32(mc, aa64_ldur(sidx, /*Rt=*/i, base_reg, off)); + } else if (pt->cls == ABI_CLASS_FP) { + aa64_emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, base_reg, off)); + } else { + compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl", + (int)pt->cls); + } + } + } + } + u32 bpos = mc->pos(mc); + aa64_emit32(mc, aa64_b_base()); + mc->emit_label_ref(mc, a->epilogue_label, R_AARCH64_JUMP26, 4, 0); + (void)bpos; +} + +/* ============================================================ + * alloca + * ============================================================ */ + +static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + if (d.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "aarch64 alloca: dst must be REG"); + } + if (align > 16) { + compiler_panic(t->c, a->loc, + "aarch64 alloca: align %u > 16 not yet supported", align); + } + + if (sz.kind == OPK_IMM) { + i64 v = sz.v.imm; + if (v < 0) { + compiler_panic(t->c, a->loc, "aarch64 alloca: negative size"); + } + u64 aligned = ((u64)v + 15u) & ~(u64)15u; + if (aligned == 0) aligned = 16; + if (aligned > 0xfffu) { + compiler_panic(t->c, a->loc, + "aarch64 alloca: const size %llu too large for v1", + (unsigned long long)aligned); + } + aa64_emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=SP*/ 31, (u32)aligned, 0)); + } else if (sz.kind == OPK_REG) { + u32 sz_reg = reg_num(sz); + aa64_emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0)); + aa64_emit32(mc, aa64_ubfm(1, 9, 9, 4, 63)); + aa64_emit32(mc, aa64_ubfm(1, 9, 9, 60, 59)); + aa64_emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, 9)); + } else { + compiler_panic(t->c, a->loc, "aarch64 alloca: size kind %d unsupported", + (int)sz.kind); + } + + if (a->nadd_patches == a->add_patches_cap) { + u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4; + struct AAAllocaPatch* nb = + arena_array(t->c->tu, struct AAAllocaPatch, ncap); + if (a->add_patches) + memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches); + a->add_patches = nb; + a->add_patches_cap = ncap; + } + u32 dst_reg = reg_num(d); + a->add_patches[a->nadd_patches].pos = mc->pos(mc); + a->add_patches[a->nadd_patches].dst_reg = dst_reg; + a->nadd_patches++; + aa64_emit32(mc, aa64_add_imm(1, dst_reg, /*Rn=SP*/ 31, 0, 0)); + a->has_alloca = 1; +} + +/* ============================================================ + * Varargs + * ============================================================ */ + +static void emit_fp_off(MCEmitter* mc, u32 dst, i32 ofs) { + if (ofs == 0) + aa64_emit32(mc, aa64_mov_reg(1, dst, 29)); + else if (ofs > 0 && (u32)ofs <= 0xfff) + aa64_emit32(mc, aa64_add_imm(1, dst, 29, (u32)ofs, 0)); + else if (ofs < 0 && (u32)(-ofs) <= 0xfff) + aa64_emit32(mc, aa64_sub_imm(1, dst, 29, (u32)(-ofs), 0)); + else { + aa64_emit_load_imm(mc, 1, dst, ofs); + aa64_emit32(mc, aa64_add(1, dst, 29, dst)); + } +} + +static void aa_va_start_(CGTarget* t, Operand ap_op) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (!a->is_variadic) { + compiler_panic(t->c, a->loc, "aarch64 va_start: function not variadic"); + } + u32 ap = reg_num(ap_op); + AASlot* gs = aa64_slot_get(a, a->gp_save_slot); + AASlot* fs = aa64_slot_get(a, a->fp_save_slot); + + { + u32 ofs = 16u + a->next_param_stack; + if (ofs <= 0xfff) + aa64_emit32(mc, aa64_add_imm(1, 9, 29, ofs, 0)); + else { + aa64_emit_load_imm(mc, 1, 9, (i64)ofs); + aa64_emit32(mc, aa64_add(1, 9, 29, 9)); + } + aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 0)); + } + emit_fp_off(mc, 9, -(i32)gs->off + (i32)gs->size); + aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 8)); + emit_fp_off(mc, 9, -(i32)fs->off + (i32)fs->size); + aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 16)); + aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_int * 8u) - 64)); + aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 24)); + aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_fp * 16u) - 128)); + aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 28)); +} + +static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op, + const Type* ty) { + MCEmitter* mc = t->mc; + u32 ap = reg_num(ap_op); + int is_fp = (dst.cls == RC_FP); + u32 offs_field = is_fp ? 28u : 24u; + u32 top_field = is_fp ? 16u : 8u; + u32 stride_reg = is_fp ? 16u : 8u; + u32 sz = type_byte_size(ty); + u32 sidx = size_idx_for_bytes(sz); + + MCLabel L_stack = mc->label_new(mc); + MCLabel L_done = mc->label_new(mc); + + aa64_emit32(mc, aa64_ldur(2, 9, ap, (i32)offs_field)); + aa64_emit32(mc, aa64_subs_imm(0, 31, 9, 0)); + aa64_emit32(mc, aa64_b_cond(0xa /*GE*/)); + mc->emit_label_ref(mc, L_stack, R_AARCH64_CONDBR19, 4, 0); + + aa64_emit32(mc, aa64_ldur(3, 10, ap, (i32)top_field)); + aa64_emit32(mc, aa64_sbfm(1, 12, 9, 0, 31)); + aa64_emit32(mc, aa64_add(1, 11, 10, 12)); + if (is_fp) + aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 11, 0)); + else + aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 11, 0)); + aa64_emit32(mc, aa64_add_imm(0, 9, 9, stride_reg, 0)); + aa64_emit32(mc, aa64_stur(2, 9, ap, (i32)offs_field)); + aa64_emit32(mc, aa64_b_base()); + mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0); + + mc->label_place(mc, L_stack); + aa64_emit32(mc, aa64_ldur(3, 10, ap, 0)); + if (is_fp) + aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 10, 0)); + else + aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 10, 0)); + aa64_emit32(mc, aa64_add_imm(1, 10, 10, 8u, 0)); + aa64_emit32(mc, aa64_stur(3, 10, ap, 0)); + + mc->label_place(mc, L_done); +} + +static void aa_va_end_(CGTarget* t, Operand a) { + (void)t; + (void)a; +} + +static void aa_va_copy_(CGTarget* t, Operand d, Operand s) { + MCEmitter* mc = t->mc; + u32 dr = reg_num(d); + u32 sr = reg_num(s); + for (u32 i = 0; i < 32u; i += 8u) { + aa64_emit32(mc, aa64_ldur(3, 9, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(3, 9, dr, (i32)i)); + } +} + +/* ============================================================ + * Atomics + * ============================================================ */ + +static inline u32 aa64_ldar(u32 sf64, u32 Rt, u32 Rn) { + return (sf64 ? 0xC8DFFC00u : 0x88DFFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_stlr(u32 sf64, u32 Rt, u32 Rn) { + return (sf64 ? 0xC89FFC00u : 0x889FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldxr(u32 sf64, u32 Rt, u32 Rn) { + return (sf64 ? 0xC85F7C00u : 0x885F7C00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldaxr(u32 sf64, u32 Rt, u32 Rn) { + return (sf64 ? 0xC85FFC00u : 0x885FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_stxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) { + return (sf64 ? 0xC8007C00u : 0x88007C00u) | ((Rs & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_stlxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) { + return (sf64 ? 0xC800FC00u : 0x8800FC00u) | ((Rs & 0x1f) << 16) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_cbnz(u32 sf64, u32 Rt) { + return 0x35000000u | (sf64 << 31) | (Rt & 0x1f); +} + +static int mem_order_is_acquire(MemOrder o) { + return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || + o == MO_CONSUME; +} +static int mem_order_is_release(MemOrder o) { + return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST; +} + +static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, + MemOrder ord) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + + u32 base; + if (addr.kind == OPK_REG) { + base = reg_num(addr); + } else if (addr.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot"); + base = 9u; + aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 atomic_load: addr kind %d unsupported", + (int)addr.kind); + } + if (mem_order_is_acquire(ord)) { + aa64_emit32(mc, aa64_ldar(sf, reg_num(dst), base)); + } else { + u32 sidx = size_idx_for_bytes(ma.size); + aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), base, 0)); + } +} + +static void aa_atomic_store(CGTarget* t, Operand addr, Operand src, + MemAccess ma, MemOrder ord) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + + u32 src_reg; + if (src.kind == OPK_IMM) { + src_reg = 10u; + aa64_emit_load_imm(mc, sf, src_reg, src.v.imm); + } else if (src.kind == OPK_REG) { + src_reg = reg_num(src); + } else { + compiler_panic(t->c, a->loc, + "aarch64 atomic_store: src kind %d unsupported", + (int)src.kind); + } + u32 base; + if (addr.kind == OPK_REG) { + base = reg_num(addr); + } else if (addr.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot"); + base = 9u; + aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 atomic_store: addr kind %d unsupported", + (int)addr.kind); + } + if (mem_order_is_release(ord)) { + aa64_emit32(mc, aa64_stlr(sf, src_reg, base)); + } else { + u32 sidx = size_idx_for_bytes(ma.size); + aa64_emit32(mc, aa64_stur(sidx, src_reg, base, 0)); + } +} + +static void emit_rmw_combine(MCEmitter* mc, AtomicOp op, u32 sf, u32 dst_new, + u32 prior, u32 val) { + switch (op) { + case AO_XCHG: aa64_emit32(mc, aa64_mov_reg(sf, dst_new, val)); break; + case AO_ADD: aa64_emit32(mc, aa64_add(sf, dst_new, prior, val)); break; + case AO_SUB: aa64_emit32(mc, aa64_sub(sf, dst_new, prior, val)); break; + case AO_AND: aa64_emit32(mc, aa64_and(sf, dst_new, prior, val)); break; + case AO_OR: aa64_emit32(mc, aa64_orr(sf, dst_new, prior, val)); break; + case AO_XOR: aa64_emit32(mc, aa64_eor(sf, dst_new, prior, val)); break; + case AO_NAND: + aa64_emit32(mc, aa64_and(sf, dst_new, prior, val)); + aa64_emit32(mc, aa64_mvn(sf, dst_new, dst_new)); + break; + default: + aa64_emit32(mc, aa64_mov_reg(sf, dst_new, val)); + break; + } +} + +static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, + Operand val, MemAccess ma, MemOrder ord) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + + u32 base = 9u; + if (addr.kind == OPK_REG) { + aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr))); + } else if (addr.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot"); + aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0)); + } else { + compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported", + (int)addr.kind); + } + u32 vreg = 10u; + if (val.kind == OPK_IMM) { + aa64_emit_load_imm(mc, sf, vreg, val.v.imm); + } else if (val.kind == OPK_REG) { + aa64_emit32(mc, aa64_mov_reg(sf, vreg, reg_num(val))); + } else { + compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: val kind %d unsupported", + (int)val.kind); + } + + int do_acq = mem_order_is_acquire(ord); + int do_rel = mem_order_is_release(ord); + + MCLabel L_retry = mc->label_new(mc); + mc->label_place(mc, L_retry); + + if (do_acq) + aa64_emit32(mc, aa64_ldaxr(sf, reg_num(dst), base)); + else + aa64_emit32(mc, aa64_ldxr(sf, reg_num(dst), base)); + + emit_rmw_combine(mc, op, sf, /*new=*/11u, /*prior=*/reg_num(dst), vreg); + + if (do_rel) + aa64_emit32(mc, aa64_stlxr(sf, /*Rs=*/12u, /*Rt=*/11u, base)); + else + aa64_emit32(mc, aa64_stxr(sf, /*Rs=*/12u, /*Rt=*/11u, base)); + + u32 cbnz_pos = mc->pos(mc); + aa64_emit32(mc, aa64_cbnz(0, /*Rt=*/12u)); + mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0); + (void)cbnz_pos; +} + +static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, + Operand expected, Operand desired, MemAccess ma, + MemOrder succ, MemOrder fail) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + (void)fail; + + u32 base = 9u; + if (addr.kind == OPK_REG) + aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr))); + else if (addr.kind == OPK_LOCAL) { + AASlot* s = aa64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot"); + aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0)); + } else { + compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported", + (int)addr.kind); + } + if (expected.kind == OPK_IMM) + aa64_emit_load_imm(mc, sf, 10, expected.v.imm); + else if (expected.kind == OPK_REG) + aa64_emit32(mc, aa64_mov_reg(sf, 10, reg_num(expected))); + else + compiler_panic(t->c, a->loc, "aarch64 atomic_cas: exp kind %d unsupported", + (int)expected.kind); + if (desired.kind == OPK_IMM) + aa64_emit_load_imm(mc, sf, 11, desired.v.imm); + else if (desired.kind == OPK_REG) + aa64_emit32(mc, aa64_mov_reg(sf, 11, reg_num(desired))); + else + compiler_panic(t->c, a->loc, "aarch64 atomic_cas: des kind %d unsupported", + (int)desired.kind); + + int do_acq = mem_order_is_acquire(succ); + int do_rel = mem_order_is_release(succ); + + MCLabel L_retry = mc->label_new(mc); + MCLabel L_fail = mc->label_new(mc); + MCLabel L_done = mc->label_new(mc); + + mc->label_place(mc, L_retry); + if (do_acq) + aa64_emit32(mc, aa64_ldaxr(sf, reg_num(prior), base)); + else + aa64_emit32(mc, aa64_ldxr(sf, reg_num(prior), base)); + + aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), 10u)); + aa64_emit32(mc, aa64_b_cond(0x1u /*NE*/)); + mc->emit_label_ref(mc, L_fail, R_AARCH64_CONDBR19, 4, 0); + + if (do_rel) + aa64_emit32(mc, aa64_stlxr(sf, 12u, 11u, base)); + else + aa64_emit32(mc, aa64_stxr(sf, 12u, 11u, base)); + aa64_emit32(mc, aa64_cbnz(0, 12u)); + mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0); + + aa64_emit_load_imm(mc, 0, reg_num(ok), 1); + aa64_emit32(mc, aa64_b_base()); + mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0); + + mc->label_place(mc, L_fail); + aa64_emit32(mc, aa64_clrex(AA64_BARRIER_OPT_SY)); + aa64_emit_load_imm(mc, 0, reg_num(ok), 0); + + mc->label_place(mc, L_done); +} + +static void aa_fence(CGTarget* t, MemOrder o) { + (void)o; + if (o == MO_RELAXED) return; + aa64_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); +} + +/* ============================================================ + * Intrinsics + * ============================================================ */ + +static inline u32 aa64_rev16_w(u32 Rd, u32 Rn) { + return 0x5AC00400u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_rev_w(u32 Rd, u32 Rn) { + return 0x5AC00800u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_rev_x(u32 Rd, u32 Rn) { + return 0xDAC00C00u | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_rbit(u32 sf64, u32 Rd, u32 Rn) { + return (sf64 ? 0xDAC00000u : 0x5AC00000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_clz(u32 sf64, u32 Rd, u32 Rn) { + return (sf64 ? 0xDAC01000u : 0x5AC01000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); +} +static inline u32 aa64_cnt_8b(u32 Vd, u32 Vn) { + return 0x0E205800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f); +} +static inline u32 aa64_addv_b_8b(u32 Vd, u32 Vn) { + return 0x0E31B800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f); +} +static inline u32 aa64_adds_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) { + return 0x2B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} +static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) { + return aa64_dp3_pack((AA64DP3){ + .sf = 1, .op31 = 1, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd}); +} +static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) { + return aa64_smaddl(Rd, Rn, Rm, AA64_ZR); +} +static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) { + return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} + +static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, + const Operand* args, u32 na) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + (void)nd; + + switch (kind) { + case INTRIN_POPCOUNT: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 sz_in = type_byte_size(src.type); + if (sz_in == 8) + aa64_emit32(mc, aa64_fmov_d_x(0, reg_num(src))); + else + aa64_emit32(mc, aa64_fmov_s_w(0, reg_num(src))); + aa64_emit32(mc, aa64_cnt_8b(0, 0)); + aa64_emit32(mc, aa64_addv_b_8b(0, 0)); + aa64_emit32(mc, aa64_fmov_w_s(reg_num(dst), 0)); + return; + } + case INTRIN_CLZ: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 sf = type_is_64(src.type) ? 1u : 0u; + aa64_emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(src))); + return; + } + case INTRIN_CTZ: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 sf = type_is_64(src.type) ? 1u : 0u; + aa64_emit32(mc, aa64_rbit(sf, reg_num(dst), reg_num(src))); + aa64_emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(dst))); + return; + } + case INTRIN_BSWAP16: { + aa64_emit32(mc, aa64_rev16_w(reg_num(dsts[0]), reg_num(args[0]))); + return; + } + case INTRIN_BSWAP32: { + aa64_emit32(mc, aa64_rev_w(reg_num(dsts[0]), reg_num(args[0]))); + return; + } + case INTRIN_BSWAP64: { + aa64_emit32(mc, aa64_rev_x(reg_num(dsts[0]), reg_num(args[0]))); + return; + } + case INTRIN_MEMCPY: + case INTRIN_MEMMOVE: { + Operand da = args[0], sa = args[1], nb = args[2]; + if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic(t->c, a->loc, + "aarch64 intrinsic: %s with non-const n or non-REG ptr", + kind == INTRIN_MEMCPY ? "memcpy" : "memmove"); + } + u32 dr = reg_num(da); + u32 sr = reg_num(sa); + u32 n = (u32)nb.v.imm; + if (kind == INTRIN_MEMCPY) { + u32 i = 0; + while (i + 8 <= n) { + aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i)); + i += 8; + } + while (i + 4 <= n) { + aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i)); + i += 4; + } + while (i + 2 <= n) { + aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i)); + i += 2; + } + while (i < n) { + aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i)); + i += 1; + } + } else { + u32 i = n; + while (i >= 8) { + i -= 8; + aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i)); + } + while (i >= 4) { + i -= 4; + aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i)); + } + while (i >= 2) { + i -= 2; + aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i)); + } + while (i >= 1) { + i -= 1; + aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); + aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i)); + } + } + return; + } + case INTRIN_MEMSET: { + Operand da = args[0], bv = args[1], nb = args[2]; + if (da.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic( + t->c, a->loc, + "aarch64 intrinsic: memset with non-const n / non-REG ptr"); + } + u32 dr = reg_num(da); + u32 n = (u32)nb.v.imm; + u32 byte; + u32 src_reg; + if (bv.kind == OPK_IMM) { + byte = (u32)(bv.v.imm & 0xffu); + if (byte == 0) { + src_reg = 31u; + } else { + u64 b64 = byte; + b64 |= b64 << 8; + b64 |= b64 << 16; + b64 |= b64 << 32; + aa64_emit_load_imm(mc, 1, 12, (i64)b64); + src_reg = 12u; + } + } else if (bv.kind == OPK_REG) { + aa64_emit_load_imm(mc, 1, 12, (i64)0x0101010101010101ll); + aa64_emit32(mc, aa64_madd(1, 12, reg_num(bv), 12, AA64_ZR)); + src_reg = 12u; + } else { + compiler_panic(t->c, a->loc, + "aarch64 intrinsic: memset byte kind %d unsupported", + (int)bv.kind); + } + u32 i = 0; + while (i + 8 <= n) { + aa64_emit32(mc, aa64_stur(3, src_reg, dr, (i32)i)); + i += 8; + } + while (i + 4 <= n) { + aa64_emit32(mc, aa64_stur(2, src_reg, dr, (i32)i)); + i += 4; + } + while (i + 2 <= n) { + aa64_emit32(mc, aa64_stur(1, src_reg, dr, (i32)i)); + i += 2; + } + while (i < n) { + aa64_emit32(mc, aa64_stur(0, src_reg, dr, (i32)i)); + i += 1; + } + return; + } + case INTRIN_PREFETCH: + (void)args; + (void)na; + return; + case INTRIN_ASSUME_ALIGNED: { + Operand src = args[0]; + Operand dst = dsts[0]; + if (reg_num(src) != reg_num(dst)) { + aa64_emit32(mc, aa64_mov_reg(1, reg_num(dst), reg_num(src))); + } + return; + } + case INTRIN_EXPECT: { + Operand val = args[0]; + Operand dst = dsts[0]; + u32 sf = type_is_64(dst.type) ? 1u : 0u; + if (val.kind == OPK_REG) { + if (reg_num(val) != reg_num(dst)) { + aa64_emit32(mc, aa64_mov_reg(sf, reg_num(dst), reg_num(val))); + } + } else if (val.kind == OPK_IMM) { + aa64_emit_load_imm(mc, sf, reg_num(dst), val.v.imm); + } else { + compiler_panic(t->c, a->loc, + "aarch64 intrinsic: expect val kind %d unsupported", + (int)val.kind); + } + return; + } + case INTRIN_UNREACHABLE: + case INTRIN_TRAP: + aa64_emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u)); + return; + case INTRIN_ADD_OVERFLOW: + case INTRIN_SUB_OVERFLOW: { + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + u32 sf = type_is_64(dval.type) ? 1u : 0u; + u32 ra = aa64_force_reg_int(t, a_op, sf, 9); + u32 rb = aa64_force_reg_int(t, b_op, sf, (ra == 9) ? 10u : 9u); + u32 word = (kind == INTRIN_ADD_OVERFLOW) + ? aa64_adds_reg(sf, reg_num(dval), ra, rb) + : aa64_subs_reg(sf, reg_num(dval), ra, rb); + aa64_emit32(mc, word); + aa64_emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/)); + return; + } + case INTRIN_MUL_OVERFLOW: { + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + u32 sf = type_is_64(dval.type) ? 1u : 0u; + if (sf) { + compiler_panic( + t->c, a->loc, + "aarch64 intrinsic: mul_overflow on i64 not yet supported"); + } + u32 ra = aa64_force_reg_int(t, a_op, 0, 9); + u32 rb = aa64_force_reg_int(t, b_op, 0, (ra == 9) ? 10u : 9u); + aa64_emit32(mc, aa64_smull(/*X*/ 11u, ra, rb)); + aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, /*Xn=*/11u, /*Wm=*/11u)); + aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/)); + aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), 11u)); + return; + } + default: + compiler_panic(t->c, a->loc, "aarch64 intrinsic: kind %d unsupported", + (int)kind); + } +} + +/* ============================================================ + * Inline asm block + * ============================================================ */ + +static void aa_asm_block(CGTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 no, Operand* oo, + const AsmConstraint* ins, u32 ni, const Operand* io, + const Sym* clobs, u32 nc) { + AAImpl* a_impl = impl_of(t); + for (u32 i = 0; i < nc; ++i) { + Reg phys; + RegClass cls; + if (t->resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue; + if (cls == RC_INT) { + u32 idx = (u32)phys; + RegPool* p = &a_impl->int_pool; + if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { + u32 off = idx - p->base + 1u; + if (off > p->hwm) p->hwm = off; + } + } else if (cls == RC_FP) { + u32 idx = (u32)phys; + RegPool* p = &a_impl->fp_pool; + if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { + u32 off = idx - p->base + 1u; + if (off > p->hwm) p->hwm = off; + } + } + } + AA64Asm* a = aa64_asm_open(t->c); + aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc); + aa64_asm_run_template(a, t->mc, tmpl); + aa64_asm_close(a); +} + +/* ============================================================ + * Lifecycle / vtable constructor + * ============================================================ */ + +static void aa_set_loc(CGTarget* t, SrcLoc loc) { + impl_of(t)->loc = loc; + t->mc->set_loc(t->mc, loc); +} + +static void aa_finalize(CGTarget* t) { (void)t; } + +static void aa_destroy(CGTarget* t) { (void)t; } + +static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } + +CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { + AAImpl* a = arena_new(c->tu, AAImpl); + memset(a, 0, sizeof *a); + + CGTarget* t = &a->base; + t->c = c; + t->obj = o; + t->mc = m; + + t->func_begin = aa_func_begin; + t->func_end = aa_func_end; + t->frame_slot = aa_frame_slot; + t->param = aa_param; + + t->load_imm = aa_load_imm; + t->load_const = aa_load_const; + t->copy = aa_copy; + t->load = aa_load; + t->store = aa_store; + t->addr_of = aa_addr_of; + t->tls_addr_of = aa_tls_addr_of; + t->copy_bytes = aa_copy_bytes; + t->set_bytes = aa_set_bytes; + t->bitfield_load = aa_bitfield_load; + t->bitfield_store = aa_bitfield_store; + + t->binop = aa_binop; + t->unop = aa_unop; + t->convert = aa_convert; + + t->call = aa_call; + t->ret = aa_ret; + + t->alloca_ = aa_alloca_; + t->va_start_ = aa_va_start_; + t->va_arg_ = aa_va_arg_; + t->va_end_ = aa_va_end_; + t->va_copy_ = aa_va_copy_; + + t->setjmp_ = NULL; + t->longjmp_ = NULL; + + t->atomic_load = aa_atomic_load; + t->atomic_store = aa_atomic_store; + t->atomic_rmw = aa_atomic_rmw; + t->atomic_cas = aa_atomic_cas; + t->fence = aa_fence; + + t->intrinsic = aa_intrinsic; + t->asm_block = aa_asm_block; + + t->set_loc = aa_set_loc; + t->finalize = aa_finalize; + t->destroy = aa_destroy; + + /* alloc/label/scope vtable entries */ + aa_alloc_vtable_init(t); + + /* Suppress unused warning. */ + (void)type_is_signed; + + compiler_defer(c, cgt_cleanup, t); + return t; +} diff --git a/src/arch/rv64.c b/src/arch/rv64.c @@ -1,2765 +0,0 @@ -/* Minimal RISC-V (RV64IMFD, LP64D) CGTarget. - * - * Single-pass codegen mirroring src/arch/aarch64.c. The frame uses s0 - * (x8) as the frame pointer; locals live at s0-relative negative - * offsets, callee-save spills and outgoing args at sp-relative positive - * offsets. The prologue is reserved as a NOP placeholder at func_begin - * and patched at func_end once frame_size and the callee-save high- - * water marks are known. - * - * Reg allocator: lowest-bit-first over s2..s11 (int) and fs2..fs11 (fp). - * Scratch registers held outside the pools are t0..t3 (x5..x7, x28). - * - * Scope: this backend covers the v1 cg corpus paths the aarch64 backend - * covers, with these explicit gaps that still panic: - * - va_*, alloca, asm_block, atomic_cas (partial), intrinsic - * INTRIN_MUL_OVERFLOW i64. */ - -#include <string.h> - -#include "arch/arch.h" -#include "arch/rv64.h" -#include "arch/rv64_isa.h" -#include "core/arena.h" -#include "obj/obj.h" -#include "type/type.h" - -#define RV_PROLOGUE_WORDS 32u - -/* ============================================================ - * RegPool (copy of the aa64 helper — bit-set free mask). */ -typedef struct RegPool { - u32 free; - u32 hwm; - u8 base; - u8 nregs; - u8 pad[2]; -} RegPool; - -static void regpool_init(RegPool* p, u8 base, u8 nregs) { - p->base = base; - p->nregs = nregs; - p->hwm = 0; - p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); -} - -static Reg regpool_alloc(RegPool* p) { - if (p->free == 0) return (Reg)REG_NONE; - u32 idx = (u32)__builtin_ctz(p->free); - p->free &= ~(1u << idx); - if (idx + 1u > p->hwm) p->hwm = idx + 1u; - return (Reg)(p->base + idx); -} - -static int regpool_free(RegPool* p, Reg r) { - u32 rn = (u32)r; - if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0; - u32 idx = rn - p->base; - u32 bit = 1u << idx; - if (p->free & bit) return -1; - p->free |= bit; - return 1; -} - -/* ============================================================ - * RImpl */ - -typedef struct RvSlot { - u32 off; /* bytes below s0 (positive); address = s0 - off */ - u32 size; - u32 align; - u8 kind; - u8 pad[3]; -} RvSlot; - -typedef struct RvScope { - u8 kind; - u8 has_else; - u8 pad[2]; - MCLabel else_label; - MCLabel end_label; - Label break_label; - Label continue_label; -} RvScope; - -typedef struct RImpl { - CGTarget base; - SrcLoc loc; - const CGFuncDesc* fd; - - u32 func_start; - u32 prologue_pos; - MCLabel epilogue_label; - - RvSlot* slots; - u32 nslots; - u32 slots_cap; - u32 cum_off; - u32 max_outgoing; - /* fp_pair_off captures the offset from sp where the saved-s0/ra pair - * sits. Computed at func_end. Stored so post-prologue sret/varargs - * stores written by func_begin don't depend on it (they use s0). */ - u32 fp_pair_off; - - u32 next_param_int; - u32 next_param_fp; - u32 next_param_stack; - u8 has_sret; - FrameSlot sret_ptr_slot; - - RegPool int_pool; - RegPool fp_pool; - - RvScope* scopes; - u32 nscopes; - u32 scopes_cap; - - u8 has_alloca; - /* alloca patch list: each call emits `addi dst, sp, 0` and registers - * the (pos, dst_reg) for patching with max_outgoing at func_end. */ - struct RvAllocaPatch { - u32 pos; - u32 dst_reg; - }* add_patches; - u32 nadd_patches; - u32 add_patches_cap; - - /* Variadic register save area: 64 bytes (a0..a7). Allocated lazily on - * the first va_start. The prologue patcher spills a-regs into it. */ - u8 is_variadic; - FrameSlot gp_save_slot; -} RImpl; - -static RImpl* impl_of(CGTarget* t) { return (RImpl*)t; } - -/* Forward decls. */ -static FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d); -static RvSlot* slot_get(RImpl* a, FrameSlot fs); -static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); -static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); - -/* ---- type helpers ---- */ -static int type_is_64(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 1; - default: - return 0; - } -} -static int type_is_fp_double(const Type* t) { - return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); -} -static u32 type_byte_size(const Type* t) { - if (!t) return 4; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_UCHAR: - case TY_BOOL: - return 1; - case TY_SHORT: - case TY_USHORT: - return 2; - case TY_INT: - case TY_UINT: - case TY_FLOAT: - return 4; - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 8; - default: - return 8; - } -} -static int type_is_signed(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_SHORT: - case TY_INT: - case TY_LONG: - case TY_LLONG: - return 1; - default: - return 0; - } -} - -static u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } - -extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); - -static void emit32(MCEmitter* mc, u32 word) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 b[4]; - b[0] = (u8)(word & 0xff); - b[1] = (u8)((word >> 8) & 0xff); - b[2] = (u8)((word >> 16) & 0xff); - b[3] = (u8)((word >> 24) & 0xff); - mc->emit_bytes(mc, b, 4); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) { - u8 b[4]; - b[0] = (u8)(word & 0xff); - b[1] = (u8)((word >> 8) & 0xff); - b[2] = (u8)((word >> 16) & 0xff); - b[3] = (u8)((word >> 24) & 0xff); - obj_patch(obj, sec_id, ofs, b, 4); -} - -static _Noreturn void rv_panic(CGTarget* t, const char* what) { - SrcLoc loc = impl_of(t)->loc; - compiler_panic(t->c, loc, "rv64: %s not implemented", what); -} - -/* ---- immediate materialization ---- - * Load any i64 into `rd`. Strategy: - * - if fits signed 12-bit: addi rd, x0, imm - * - elif fits signed 32-bit: lui rd, hi20; addiw rd, rd, lo12 - * - otherwise: split into high and low 32-bit halves, materialize - * each separately, then shift-and-or. Worst-case sequence is up - * to 8 instructions; good enough for the cg test corpus. */ -static int fits_signed32(i64 v) { return v >= (i64)(i32)0x80000000 && v <= (i64)(i32)0x7fffffff; } - -static void emit_li_32(MCEmitter* mc, u32 rd, i32 imm) { - if (imm >= -2048 && imm <= 2047) { - emit32(mc, rv_addi(rd, RV_ZERO, imm)); - return; - } - /* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */ - i32 hi = (i32)((u32)(imm + 0x800) >> 12); - i32 lo = (i32)((i32)imm - (i32)(hi << 12)); - emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu)); - if (lo) emit32(mc, rv_addiw(rd, rd, lo)); -} - -static void emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm) { - if (!sf) { - /* 32-bit destination: low 32 bits, sign-extended. */ - emit_li_32(mc, rd, (i32)imm); - return; - } - if (fits_signed32(imm)) { - emit_li_32(mc, rd, (i32)imm); - return; - } - /* General 64-bit load: split into high and low 32 bits, place high - * into rd << 32, then OR in low via a temp register (t0=x5). The cg - * corpus has no IMM operands that collide with t0, so this is safe. */ - i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */ - i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */ - if (hi64 < (i64)(i32)0x80000000 || - hi64 > (i64)(i32)0x7fffffff) { - /* Out of i32 range — fallback: use a smaller chunked approach. - * For the cg corpus this isn't hit; emit a conservative sequence: - * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */ - i32 hi32 = (i32)(imm >> 32); - i32 lo32_i = (i32)imm; - emit_li_32(mc, rd, hi32); - emit32(mc, rv_slli(rd, rd, 32)); - emit_li_32(mc, RV_T0, lo32_i); - /* zero-extend t0 to clear sign-extension before OR */ - emit32(mc, rv_slli(RV_T0, RV_T0, 32)); - emit32(mc, rv_srli(RV_T0, RV_T0, 32)); - emit32(mc, rv_or(rd, rd, RV_T0)); - return; - } - emit_li_32(mc, rd, (i32)hi64); - emit32(mc, rv_slli(rd, rd, 32)); - if (lo32 != 0) { - emit_li_32(mc, RV_T0, (i32)lo32); - emit32(mc, rv_slli(RV_T0, RV_T0, 32)); - emit32(mc, rv_srli(RV_T0, RV_T0, 32)); - emit32(mc, rv_or(rd, rd, RV_T0)); - } -} - -/* sp += imm. imm can be any signed value the caller passes — we pick - * the shortest sequence. */ -static void emit_sp_addi(MCEmitter* mc, i64 imm) { - if (imm >= -2048 && imm <= 2047) { - emit32(mc, rv_addi(RV_SP, RV_SP, (i32)imm)); - return; - } - emit_load_imm(mc, 1, RV_T0, imm); - emit32(mc, rv_add(RV_SP, RV_SP, RV_T0)); -} - -/* ---- function lifecycle ---- */ - -static void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - mc->set_section(mc, fd->text_section_id); - mc->emit_align(mc, 4, 0); - - a->fd = fd; - a->func_start = mc->pos(mc); - a->next_param_int = 0; - a->next_param_fp = 0; - a->next_param_stack = 0; - a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; - a->cum_off = 0; - a->max_outgoing = 0; - a->fp_pair_off = 0; - regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */ - regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */ - a->nslots = 0; - a->nscopes = 0; - a->has_alloca = 0; - a->nadd_patches = 0; - a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; - a->gp_save_slot = FRAME_SLOT_NONE; - a->sret_ptr_slot = FRAME_SLOT_NONE; - a->epilogue_label = mc->label_new(mc); - - mc->cfi_startproc(mc); - - /* Reserve a NOP-filled prologue placeholder; func_end patches it. */ - a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) emit32(mc, RV_NOP); - - /* For an sret return, the caller passed the destination pointer in - * a0; reserve a hidden slot to spill it into so the body can use a0 - * freely. The actual SD a0, ...(s0) is emitted in the patched - * prologue once the slot offset is known. */ - if (a->has_sret) { - FrameSlotDesc fsd = { - .type = NULL, - .name = 0, - .loc = (SrcLoc){0, 0, 0}, - .size = 8, - .align = 8, - .kind = FS_SPILL, - .flags = 0, - }; - a->sret_ptr_slot = rv_frame_slot(t, &fsd); - /* Consume a0 — it is no longer available for the first real param. */ - a->next_param_int = 1; - } - - /* Variadic: a 64-byte GP save area for a0..a7 lives at the very top - * of the frame, immediately above the saved-s0/ra pair, so its bytes - * are contiguous with the caller's stack args. The patcher spills the - * unnamed a-regs into it as part of the prologue. The slot is implicit - * (not allocated through rv_frame_slot) — it sits at [s0 + 16] when - * is_variadic is set. */ -} - -static void rv_func_end(CGTarget* t) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - ObjBuilder* obj = t->obj; - u32 sec = a->fd->text_section_id; - - u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */ - u32 n_fp_saves = a->fp_pool.hwm; - u32 max_out = (a->max_outgoing + 15u) & ~15u; - u32 int_saves_sz = n_int_saves * 8u; - u32 fp_saves_sz = n_fp_saves * 8u; - - /* Variadic functions reserve a 64-byte save area at the very top of - * the frame so the save area and caller's stack args form a single - * contiguous byte stream walked by the va_list pointer. */ - u32 va_save_sz = a->is_variadic ? 64u : 0u; - u32 locals_off = max_out + int_saves_sz + fp_saves_sz; /* from sp */ - u32 fp_pair_off = locals_off + a->cum_off; - u32 frame_size = fp_pair_off + 16u + va_save_sz; - frame_size = (frame_size + 15u) & ~15u; - fp_pair_off = frame_size - 16u - va_save_sz; - a->fp_pair_off = fp_pair_off; - - /* Place the epilogue label at current pos. */ - mc->label_place(mc, a->epilogue_label); - - /* Restore int and fp saves using s0-relative addressing so they - * don't depend on the final frame_size encoding (and survive - * alloca-induced sp shifts). */ - /* layout below s0: - * s0 - 8 .. s0 - 16 saved s0/ra ? No — those are at sp+fp_pair_off - * We arranged saved-s0/ra at [sp+fp_pair_off], not below s0. So - * immediately below s0 are: int saves, then fp saves, then locals. - * Wait — let me recompute. - * - * sp + 0 outgoing args (max_out bytes) - * sp + max_out int saves - * sp + max_out + I fp saves - * sp + max_out+I+F locals (cum_off) - * sp + fp_pair_off saved s0_caller (8) - * sp + fp_pair_off+8 saved ra (8) - * sp + frame_size end - * - * s0 = sp + fp_pair_off (so [s0+0] = saved s0_caller). - * Locals at [s0 - off] where off in [1..cum_off]. - * FP saves at [s0 - cum_off - 8*i]. - * Int saves at [s0 - cum_off - F - 8*i]. */ - /* Save slots sit at the start of an 8-byte cell below the locals - * area. fp_save_base = offset of the first fp save (=-(L+8)); each - * subsequent save is 8 bytes lower. int saves start below the fp - * block. */ - i32 fp_save_base = -(i32)a->cum_off - 8; - i32 int_save_base = fp_save_base - (i32)fp_saves_sz; - - /* Reverse order: ints first (lowest address) on restore, but we emit - * the restore loop in reverse to keep the prologue/epilogue symmetric. */ - for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { - u32 r = 18u + (u32)i; /* s2 + i */ - i32 off = int_save_base - 8 * (i32)i; - emit32(mc, rv_ld(r, RV_S0, off)); - } - for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { - u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */ - i32 off = fp_save_base - 8 * (i32)i; - emit32(mc, rv_fld(r, RV_S0, off)); - } - /* Restore sp from s0 first so alloca-induced offsets don't matter. - * After this, sp == its post-prologue value. */ - if (a->has_alloca) { - if ((i32)fp_pair_off > 2047) { - compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large for alloca"); - } - emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fp_pair_off)); - } - emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fp_pair_off)); - emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fp_pair_off + 8)); - emit_sp_addi(mc, (i64)frame_size); - emit32(mc, rv_ret_()); - - /* Now patch the prologue placeholder. */ - u32 pos = a->prologue_pos; - u32 words[RV_PROLOGUE_WORDS]; - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) words[i] = RV_NOP; - u32 wi = 0; - - /* addi sp, sp, -frame_size (or 2-insn if too large) */ - if ((i64)frame_size <= 2048) { - words[wi++] = rv_addi(RV_SP, RV_SP, -(i32)frame_size); - } else { - /* li t0, -frame_size; add sp, sp, t0 */ - /* Use a small two-instruction expansion via LUI+ADDI if it fits 32-bit; - * otherwise we'd need a full load_imm but that's overkill for tests. */ - i64 neg = -(i64)frame_size; - if (fits_signed32(neg)) { - i32 hi = (i32)((u32)((i32)neg + 0x800) >> 12); - i32 lo = (i32)neg - (hi << 12); - words[wi++] = rv_lui(RV_T0, (u32)hi & 0xfffffu); - if (lo) words[wi++] = rv_addiw(RV_T0, RV_T0, lo); - words[wi++] = rv_add(RV_SP, RV_SP, RV_T0); - } else { - compiler_panic(t->c, a->loc, "rv64: frame_size too large to patch"); - } - } - /* sd s0, fp_pair_off(sp); sd ra, fp_pair_off+8(sp); addi s0, sp, fp_pair_off */ - if ((i32)fp_pair_off > 2047 || (i32)(fp_pair_off + 8) > 2047) { - compiler_panic(t->c, a->loc, "rv64: fp_pair_off out of imm12 range"); - } - words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fp_pair_off); - words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fp_pair_off + 8); - words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fp_pair_off); - - /* If sret, spill incoming a0 into the hidden slot. */ - if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { - RvSlot* s = slot_get(a, a->sret_ptr_slot); - if (s) { - if (wi >= RV_PROLOGUE_WORDS) goto overflow; - words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off); - } - } - /* Variadic: spill the still-unconsumed a-regs (a_{nparams_int}..a7) - * into the save area at [s0 + 16 + i*8]. The save area sits between - * the saved-s0/ra pair and the caller's stack args, so save_area[8] - * == caller's first stack arg. */ - if (a->is_variadic) { - for (u32 i = a->next_param_int; i < 8; ++i) { - if (wi >= RV_PROLOGUE_WORDS) goto overflow; - words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8); - } - } - /* int saves */ - for (u32 i = 0; i < n_int_saves; ++i) { - u32 r = 18u + i; - i32 off = int_save_base - 8 * (i32)i; - if (wi >= RV_PROLOGUE_WORDS) goto overflow; - words[wi++] = rv_sd(r, RV_S0, off); - } - /* fp saves */ - for (u32 i = 0; i < n_fp_saves; ++i) { - u32 r = 18u + i; - i32 off = fp_save_base - 8 * (i32)i; - if (wi >= RV_PROLOGUE_WORDS) goto overflow; - words[wi++] = rv_fsd(r, RV_S0, off); - } - if (0) { - overflow: - compiler_panic(t->c, a->loc, - "rv64: prologue placeholder too small (used %u of %u)", wi, - RV_PROLOGUE_WORDS); - } - - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) { - patch32(obj, sec, pos + i * 4u, words[i]); - } - - /* Patch alloca placeholders with max_outgoing. */ - if (max_out > 2047u) { - compiler_panic(t->c, a->loc, - "rv64: max_outgoing %u out of imm12 for alloca patch", - max_out); - } - for (u32 i = 0; i < a->nadd_patches; ++i) { - u32 dr = a->add_patches[i].dst_reg; - u32 word = rv_addi(dr, RV_SP, (i32)max_out); - patch32(obj, sec, a->add_patches[i].pos, word); - } - - /* Define the function symbol. */ - u32 end = mc->pos(mc); - obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start, - (u64)(end - a->func_start)); - - mc->cfi_endproc(mc); - a->fd = NULL; -} - -/* ---- regs / frame ---- */ - -static Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { - RImpl* a = impl_of(t); - (void)ty; - if (cls == RC_INT) return regpool_alloc(&a->int_pool); - if (cls == RC_FP) return regpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls); -} - -static void rv_free_reg(CGTarget* t, Reg r, RegClass cls) { - RImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: - compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls); - } - int rc = regpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); - } - compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} - -static FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) { - RImpl* a = impl_of(t); - if (a->nslots == a->slots_cap) { - u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; - RvSlot* nbuf = arena_array(t->c->tu, RvSlot, ncap); - if (a->slots) memcpy(nbuf, a->slots, sizeof(RvSlot) * a->nslots); - a->slots = nbuf; - a->slots_cap = ncap; - } - u32 size = d->size ? d->size : 8; - u32 align = d->align ? d->align : 1; - u32 next = a->cum_off + size; - u32 mask = align - 1; - next = (next + mask) & ~mask; - - RvSlot* s = &a->slots[a->nslots]; - s->off = next; - s->size = size; - s->align = align; - s->kind = d->kind; - - a->cum_off = next; - a->nslots++; - return (FrameSlot)(a->nslots); -} - -static RvSlot* slot_get(RImpl* a, FrameSlot fs) { - if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; - return &a->slots[fs - 1]; -} - -/* For a memory access of `nbytes`, pick the right store opcode. */ -static u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off) { - switch (nbytes) { - case 1: return rv_sb(src, base, off); - case 2: return rv_sh(src, base, off); - case 4: return rv_sw(src, base, off); - default: return rv_sd(src, base, off); - } -} -static u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off) { - switch (nbytes) { - case 1: return sign_ext ? rv_lb(rd, base, off) : rv_lbu(rd, base, off); - case 2: return sign_ext ? rv_lh(rd, base, off) : rv_lhu(rd, base, off); - case 4: return sign_ext ? rv_lw(rd, base, off) : rv_lwu(rd, base, off); - default: return rv_ld(rd, base, off); - } -} - -/* ---- param ---- */ - -static void rv_param(CGTarget* t, const CGParamDesc* p) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - RvSlot* s = slot_get(a, p->slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 param: bad slot"); - const ABIArgInfo* ai = p->abi; - /* Caller's stack args start above the saved-s0/ra pair, plus the - * 64-byte variadic save area when this function is variadic. */ - i32 caller_stack_base = 16 + (a->is_variadic ? 64 : 0); - - if (ai->kind == ABI_ARG_IGNORE) return; - if (ai->kind == ABI_ARG_INDIRECT) { - /* Pointer-to-copy passed in a-register. Copy bytes from there into - * the home slot. Source pointer is in a0..a7. */ - u32 ptr_reg; - if (a->next_param_int < 8) { - ptr_reg = RV_A0 + a->next_param_int; - a->next_param_int++; - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - /* Incoming stack args live in the caller's outgoing-arg area, - * which is `frame_size - fp_pair_off` (= 16 + the saved-s0/ra - * pair) above s0 — same logic as aa64's `16 + caller_off`. */ - emit32(mc, rv_ld(RV_T1, RV_S0, caller_stack_base + (i32)caller_off)); - ptr_reg = RV_T1; - } - u32 nbytes = s->size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, rv_ld(RV_T2, ptr_reg, (i32)i)); - emit32(mc, rv_sd(RV_T2, RV_S0, -(i32)s->off + (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, rv_lwu(RV_T2, ptr_reg, (i32)i)); - emit32(mc, rv_sw(RV_T2, RV_S0, -(i32)s->off + (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, rv_lhu(RV_T2, ptr_reg, (i32)i)); - emit32(mc, rv_sh(RV_T2, RV_S0, -(i32)s->off + (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, rv_lbu(RV_T2, ptr_reg, (i32)i)); - emit32(mc, rv_sb(RV_T2, RV_S0, -(i32)s->off + (i32)i)); - i += 1; - } - return; - } - /* DIRECT */ - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 part_off = pt->src_offset; - u32 sz = pt->size; - - if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 8) { - u32 reg = RV_A0 + a->next_param_int; - a->next_param_int++; - emit32(mc, enc_int_store(sz, reg, RV_S0, - -(i32)s->off + (i32)part_off)); - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit32(mc, enc_int_load(sz, 0, RV_T2, RV_S0, - caller_stack_base + (i32)caller_off)); - emit32(mc, enc_int_store(sz, RV_T2, RV_S0, - -(i32)s->off + (i32)part_off)); - } - } else if (pt->cls == ABI_CLASS_FP) { - if (a->next_param_fp < 8) { - u32 reg = a->next_param_fp; /* fa0..fa7 → freg 10..17 */ - u32 freg = 10u + reg; - a->next_param_fp++; - if (sz == 8) { - emit32(mc, rv_fsd(freg, RV_S0, -(i32)s->off + (i32)part_off)); - } else { - emit32(mc, rv_fsw(freg, RV_S0, -(i32)s->off + (i32)part_off)); - } - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - if (sz == 8) { - emit32(mc, rv_fld(0, RV_S0, caller_stack_base + (i32)caller_off)); - emit32(mc, rv_fsd(0, RV_S0, -(i32)s->off + (i32)part_off)); - } else { - emit32(mc, rv_flw(0, RV_S0, caller_stack_base + (i32)caller_off)); - emit32(mc, rv_fsw(0, RV_S0, -(i32)s->off + (i32)part_off)); - } - } - } else { - compiler_panic(t->c, a->loc, "rv64 param: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - rv_panic(t, "clobbers"); -} - -static void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, - MemAccess ma) { - RImpl* a = impl_of(t); - if (src.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "rv64 spill_reg: src is not OPK_REG"); - } - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - rv_store(t, addr, src, ma); - rv_free_reg(t, src.v.reg, src.cls); -} - -static void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, - MemAccess ma) { - RImpl* a = impl_of(t); - if (dst.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "rv64 reload_reg: dst is not OPK_REG"); - } - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - rv_load(t, dst, addr, ma); -} - -/* ---- labels / control flow ---- */ - -static Label rv_label_new(CGTarget* t) { - return (Label)t->mc->label_new(t->mc); -} -static void rv_label_place(CGTarget* t, Label l) { - t->mc->label_place(t->mc, (MCLabel)l); -} -static void rv_jump(CGTarget* t, Label l) { - MCEmitter* mc = t->mc; - emit32(mc, rv_jal(RV_ZERO, 0)); - mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0); -} - -/* Force an integer Operand into a register; materializes IMM via scratch. */ -static u32 force_reg_int(CGTarget* t, Operand op, u32 scratch) { - if (op.kind == OPK_REG) return reg_num(op); - if (op.kind == OPK_IMM) { - u32 sf = type_is_64(op.type) ? 1u : 0u; - emit_load_imm(t->mc, sf, scratch, op.v.imm); - return scratch; - } - compiler_panic(t->c, impl_of(t)->loc, - "rv64: operand kind %d unsupported here", (int)op.kind); -} - -/* Emit a conditional branch (a OP b) → label. Uses BEQ/BNE/BLT/BGE etc. */ -static void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op, - Label l) { - MCEmitter* mc = t->mc; - RImpl* a = impl_of(t); - /* For FP compares, fall through to materialize the result and CBNZ. */ - if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) { - compiler_panic(t->c, a->loc, "rv64 cmp_branch: FP cmp NYI"); - } - u32 ra = force_reg_int(t, a_op, RV_T0); - u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); - u32 word = 0; - switch (op) { - case CMP_EQ: word = rv_beq(ra, rb, 0); break; - case CMP_NE: word = rv_bne(ra, rb, 0); break; - case CMP_LT_S: word = rv_blt(ra, rb, 0); break; - case CMP_GE_S: word = rv_bge(ra, rb, 0); break; - case CMP_LT_U: word = rv_bltu(ra, rb, 0); break; - case CMP_GE_U: word = rv_bgeu(ra, rb, 0); break; - /* >= can become < with operands swapped: a > b ↔ b < a; - * a <= b ↔ b >= a. */ - case CMP_GT_S: word = rv_blt(rb, ra, 0); break; - case CMP_LE_S: word = rv_bge(rb, ra, 0); break; - case CMP_GT_U: word = rv_bltu(rb, ra, 0); break; - case CMP_LE_U: word = rv_bgeu(rb, ra, 0); break; - default: - compiler_panic(t->c, a->loc, "rv64 cmp_branch: op %d unimpl", (int)op); - } - emit32(mc, word); - mc->emit_label_ref(mc, (MCLabel)l, R_RV_BRANCH, 4, 0); -} - -/* Materialize 0/1 into dst from a comparison. */ -static void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op, - Operand b_op) { - MCEmitter* mc = t->mc; - RImpl* a = impl_of(t); - u32 rd = reg_num(dst); - - if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) { - /* FP compare in fa,fb → rd. Use FLT/FLE/FEQ depending on op. */ - int is_d = type_is_fp_double(a_op.type); - u32 fa = reg_num(a_op); - u32 fb = reg_num(b_op); - switch (op) { - case CMP_LT_F: emit32(mc, is_d ? rv_flt_d(rd, fa, fb) : rv_flt_s(rd, fa, fb)); return; - case CMP_LE_F: emit32(mc, is_d ? rv_fle_d(rd, fa, fb) : rv_fle_s(rd, fa, fb)); return; - case CMP_GT_F: emit32(mc, is_d ? rv_flt_d(rd, fb, fa) : rv_flt_s(rd, fb, fa)); return; - case CMP_GE_F: emit32(mc, is_d ? rv_fle_d(rd, fb, fa) : rv_fle_s(rd, fb, fa)); return; - default: break; - } - } - u32 ra = force_reg_int(t, a_op, RV_T0); - u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); - - switch (op) { - case CMP_EQ: - emit32(mc, rv_sub(rd, ra, rb)); - emit32(mc, rv_sltiu(rd, rd, 1)); - return; - case CMP_NE: - emit32(mc, rv_sub(rd, ra, rb)); - emit32(mc, rv_sltu(rd, RV_ZERO, rd)); - return; - case CMP_LT_S: emit32(mc, rv_slt(rd, ra, rb)); return; - case CMP_LT_U: emit32(mc, rv_sltu(rd, ra, rb)); return; - case CMP_GT_S: emit32(mc, rv_slt(rd, rb, ra)); return; - case CMP_GT_U: emit32(mc, rv_sltu(rd, rb, ra)); return; - case CMP_GE_S: - emit32(mc, rv_slt(rd, ra, rb)); - emit32(mc, rv_xori(rd, rd, 1)); - return; - case CMP_GE_U: - emit32(mc, rv_sltu(rd, ra, rb)); - emit32(mc, rv_xori(rd, rd, 1)); - return; - case CMP_LE_S: - emit32(mc, rv_slt(rd, rb, ra)); - emit32(mc, rv_xori(rd, rd, 1)); - return; - case CMP_LE_U: - emit32(mc, rv_sltu(rd, rb, ra)); - emit32(mc, rv_xori(rd, rd, 1)); - return; - default: - compiler_panic(t->c, a->loc, "rv64 cmp: op %d unimpl", (int)op); - } -} - -/* ---- structured scopes (SCOPE_IF + SCOPE_LOOP/BLOCK bookkeep) ---- */ - -static CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d) { - RImpl* a = impl_of(t); - if (a->nscopes == a->scopes_cap) { - u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; - RvScope* nb = arena_array(t->c->tu, RvScope, ncap); - if (a->scopes) memcpy(nb, a->scopes, sizeof(RvScope) * a->nscopes); - a->scopes = nb; - a->scopes_cap = ncap; - } - RvScope* sc = &a->scopes[a->nscopes]; - sc->kind = (u8)d->kind; - sc->has_else = 0; - sc->else_label = 0; - sc->end_label = 0; - sc->break_label = d->break_label; - sc->continue_label = d->continue_label; - - if (d->kind == SCOPE_IF) { - sc->else_label = t->mc->label_new(t->mc); - sc->end_label = t->mc->label_new(t->mc); - u32 rn = force_reg_int(t, d->cond, RV_T0); - /* beq rn, x0, else_label */ - emit32(t->mc, rv_beq(rn, RV_ZERO, 0)); - t->mc->emit_label_ref(t->mc, sc->else_label, R_RV_BRANCH, 4, 0); - } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { - /* bookkeep only */ - } else { - compiler_panic(t->c, a->loc, - "rv64 scope_begin: kind %d not yet implemented", - (int)d->kind); - } - a->nscopes++; - return (CGScope)a->nscopes; -} - -static void rv_scope_else(CGTarget* t, CGScope s) { - RImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "rv64 scope_else: bad scope"); - } - RvScope* sc = &a->scopes[s - 1]; - /* jump end ; place else */ - emit32(t->mc, rv_jal(RV_ZERO, 0)); - t->mc->emit_label_ref(t->mc, sc->end_label, R_RV_JAL, 4, 0); - t->mc->label_place(t->mc, sc->else_label); - sc->has_else = 1; -} - -static void rv_scope_end(CGTarget* t, CGScope s) { - RImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "rv64 scope_end: bad scope"); - } - RvScope* sc = &a->scopes[s - 1]; - if (sc->kind == SCOPE_IF) { - if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label); - t->mc->label_place(t->mc, sc->end_label); - } -} - -static void rv_break_to(CGTarget* t, CGScope s) { - RImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "rv64 break_to: bad scope"); - } - rv_jump(t, a->scopes[s - 1].break_label); -} - -static void rv_continue_to(CGTarget* t, CGScope s) { - RImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) { - compiler_panic(t->c, a->loc, "rv64 continue_to: bad scope"); - } - rv_jump(t, a->scopes[s - 1].continue_label); -} - -/* ---- data movement ---- */ - -static void rv_load_imm(CGTarget* t, Operand dst, i64 imm) { - u32 sf = type_is_64(dst.type) ? 1u : 0u; - emit_load_imm(t->mc, sf, reg_num(dst), imm); -} - -static void rv_load_const(CGTarget* t, Operand dst, ConstBytes cb) { - RImpl* a = impl_of(t); - if (dst.cls != RC_FP) { - compiler_panic(t->c, a->loc, "rv64 load_const: only FP supported in v1"); - } - Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); - ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); - - u32 cur_section = t->mc->section_id; - t->mc->set_section(t->mc, ro); - u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); - t->mc->emit_bytes(t->mc, cb.bytes, cb.size); - - char namebuf[64]; - static u32 lit_seq = 0; - int len = 0; - { - const char* prefix = ".LCFP"; - for (; prefix[len]; ++len) namebuf[len] = prefix[len]; - u32 v = lit_seq++; - char tmp[16]; - int tn = 0; - if (v == 0) tmp[tn++] = '0'; - else { - while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } - } - for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; - namebuf[len] = 0; - } - Sym sname = pool_intern_cstr(t->c->global, namebuf); - ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, - (u64)cb.size); - t->mc->set_section(t->mc, cur_section); - - /* auipc t0, %pcrel_hi(sym) ; flw/fld dst, %pcrel_lo(...)(t0) - * The LO12_I reloc references the AUIPC's site address (a label/sym - * placed at the AUIPC). For simplicity we make a local symbol at the - * AUIPC and bind LO12_I to it. */ - u32 sec = t->mc->section_id; - u32 auipc_pos = t->mc->pos(t->mc); - emit32(t->mc, rv_auipc(RV_T0, 0)); - t->mc->emit_reloc_at(t->mc, sec, auipc_pos, R_RV_PCREL_HI20, sym, 0, 0, 0); - /* Create a local symbol at the AUIPC site to anchor PCREL_LO12. */ - char anchor_buf[64]; - int al = 0; - { - const char* p2 = ".LpcrelHi"; - for (; p2[al]; ++al) anchor_buf[al] = p2[al]; - static u32 seq2 = 0; - u32 v = seq2++; - char tmp[16]; int tn = 0; - if (v == 0) tmp[tn++] = '0'; - else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } } - for (int i = tn - 1; i >= 0; --i) anchor_buf[al++] = tmp[i]; - anchor_buf[al] = 0; - } - Sym aname = pool_intern_cstr(t->c->global, anchor_buf); - ObjSymId anchor = obj_symbol(t->obj, aname, SB_LOCAL, SK_OBJ, sec, - (u64)auipc_pos, 0); - u32 lpos = t->mc->pos(t->mc); - if (cb.size == 8) { - emit32(t->mc, rv_fld(reg_num(dst), RV_T0, 0)); - } else { - emit32(t->mc, rv_flw(reg_num(dst), RV_T0, 0)); - } - t->mc->emit_reloc_at(t->mc, sec, lpos, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); -} - -static void rv_copy(CGTarget* t, Operand dst, Operand src) { - if (dst.cls == RC_FP || src.cls == RC_FP) { - u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S; - /* fmv.fmt rd, rs = fsgnj.fmt rd, rs, rs */ - u32 r = reg_num(src); - emit32(t->mc, rv_fsgnj(fmt, reg_num(dst), r, r)); - return; - } - /* mv rd, rs = addi rd, rs, 0 (works for both 32 and 64-bit copies) */ - emit32(t->mc, rv_addi(reg_num(dst), reg_num(src), 0)); -} - -/* ---- address resolution ---- */ - -/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into - * `tmp_reg`. Returns the register holding the base and writes the - * effective signed offset to *out_off (0 when we synthesized into tmp). - * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */ -static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { - RImpl* a = impl_of(t); - if (addr.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot"); - i32 off = -(i32)s->off; - if (off >= -2048 && off <= 2047) { - *out_off = off; - return RV_S0; - } - emit_load_imm(t->mc, 1, tmp_reg, (i64)off); - emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg)); - *out_off = 0; - return tmp_reg; - } - if (addr.kind == OPK_INDIRECT) { - i32 off = addr.v.ind.ofs; - u32 base = addr.v.ind.base & 0x1f; - if (off >= -2048 && off <= 2047) { - *out_off = off; - return base; - } - emit_load_imm(t->mc, 1, tmp_reg, (i64)off); - emit32(t->mc, rv_add(tmp_reg, base, tmp_reg)); - *out_off = 0; - return tmp_reg; - } - compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported", - (int)addr.kind); -} - -static int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) { - return obj_symbol_extern_via_got(t->c, t->obj, sym); -} - -/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a - * fresh local sym; the paired LO12 reloc references the anchor. */ -static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) { - char buf[64]; - int len = 0; - const char* p = ".LpcrelHi"; - for (; p[len]; ++len) buf[len] = p[len]; - static u32 seq = 0; - u32 v = seq++; - char tmp[16]; int tn = 0; - if (v == 0) tmp[tn++] = '0'; - else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } } - for (int i = tn - 1; i >= 0; --i) buf[len++] = tmp[i]; - buf[len] = 0; - Sym n = pool_intern_cstr(t->c->global, buf); - return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0); -} - -/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`, - * leaving the runtime address of `sym` (the GOT slot's contents) in - * `dst_reg`. Addends are omitted from the GOT relocs — most loaders - * disallow nonzero addends on GOT-load fixups — so callers apply any - * displacement with a follow-on ADDI/ADD against the loaded base. */ -static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - u32 ap = mc->pos(mc); - emit32(mc, rv_auipc(dst_reg, 0)); - mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0); - ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); - u32 lp = mc->pos(mc); - emit32(mc, rv_ld(dst_reg, dst_reg, 0)); - mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); -} - -/* Add a signed displacement `off` to `base`, writing into `rd`. Uses - * ADDI for ±2047, otherwise materializes the offset via emit_load_imm - * + ADD. Mirrors emit_addr_adjust in aarch64.c. */ -static void emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) { - if (off == 0) { - if (rd != base) emit32(mc, rv_addi(rd, base, 0)); - return; - } - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(rd, base, off)); - return; - } - emit_load_imm(mc, 1, RV_T1, (i64)off); - emit32(mc, rv_add(rd, base, RV_T1)); -} - -static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - MCEmitter* mc = t->mc; - - if (addr.kind == OPK_GLOBAL) { - u32 sec = mc->section_id; - ObjSymId sym = addr.v.global.sym; - i64 add = addr.v.global.addend; - /* Extern-via-GOT path: load &sym from GOT, then load the value at - * +addend (addend baked into the data load's imm12; relies on the - * common case of `add` fitting ±2047 — larger addends would need a - * follow-on ADD). */ - if (rv64_use_got_for_sym(t, sym)) { - emit_got_load_addr(t, RV_T0, sym); - i32 ao = (i32)add; - if (dst.cls == RC_FP) { - if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, ao)); - else emit32(mc, rv_flw(reg_num(dst), RV_T0, ao)); - } else { - int sx = type_is_signed(addr.type); - emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao)); - } - return; - } - u32 ap = mc->pos(mc); - emit32(mc, rv_auipc(RV_T0, 0)); - mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); - ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); - u32 lp = mc->pos(mc); - if (dst.cls == RC_FP) { - if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, 0)); - else emit32(mc, rv_flw(reg_num(dst), RV_T0, 0)); - } else { - int sx = type_is_signed(addr.type); - emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, 0)); - } - mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); - return; - } - - i32 off; - u32 base = addr_base(t, addr, &off, RV_T0); - if (dst.cls == RC_FP) { - if (sz == 8) emit32(mc, rv_fld(reg_num(dst), base, off)); - else emit32(mc, rv_flw(reg_num(dst), base, off)); - } else { - int sx = type_is_signed(addr.type); - emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off)); - } -} - -static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - MCEmitter* mc = t->mc; - - if (addr.kind == OPK_GLOBAL) { - u32 sec = mc->section_id; - ObjSymId sym = addr.v.global.sym; - i64 add = addr.v.global.addend; - u32 src_reg; - int src_fp = 0; - if (src.kind == OPK_IMM) { - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(mc, sf, RV_T1, src.v.imm); - src_reg = RV_T1; - } else if (src.cls == RC_FP) { - src_reg = reg_num(src); - src_fp = 1; - } else { - src_reg = reg_num(src); - } - /* Extern-via-GOT path: load &sym from GOT into t0, then store with - * addend baked into the imm12 (no reloc on the store). */ - if (rv64_use_got_for_sym(t, sym)) { - emit_got_load_addr(t, RV_T0, sym); - i32 ao = (i32)add; - if (src_fp) { - if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, ao)); - else emit32(mc, rv_fsw(src_reg, RV_T0, ao)); - } else { - emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao)); - } - return; - } - u32 ap = mc->pos(mc); - emit32(mc, rv_auipc(RV_T0, 0)); - mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); - ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); - u32 sp_pos = mc->pos(mc); - if (src_fp) { - if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, 0)); - else emit32(mc, rv_fsw(src_reg, RV_T0, 0)); - } else { - emit32(mc, enc_int_store(sz, src_reg, RV_T0, 0)); - } - mc->emit_reloc_at(mc, sec, sp_pos, R_RV_PCREL_LO12_S, anchor, 0, 0, 0); - return; - } - - i32 off; - u32 base = addr_base(t, addr, &off, - (src.kind == OPK_IMM) ? RV_T1 : RV_T0); - if (src.kind == OPK_IMM) { - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(mc, sf, RV_T0, src.v.imm); - emit32(mc, enc_int_store(sz, RV_T0, base, off)); - return; - } - if (src.cls == RC_FP) { - if (sz == 8) emit32(mc, rv_fsd(reg_num(src), base, off)); - else emit32(mc, rv_fsw(reg_num(src), base, off)); - } else { - emit32(mc, enc_int_store(sz, reg_num(src), base, off)); - } -} - -static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 rd = reg_num(dst); - if (lv.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, lv.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 addr_of: bad slot"); - i32 off = -(i32)s->off; - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(rd, RV_S0, off)); - } else { - emit_load_imm(mc, 1, rd, (i64)off); - emit32(mc, rv_add(rd, RV_S0, rd)); - } - return; - } - if (lv.kind == OPK_INDIRECT) { - i32 ofs = lv.v.ind.ofs; - u32 base = lv.v.ind.base & 0x1f; - if (ofs >= -2048 && ofs <= 2047) { - emit32(mc, rv_addi(rd, base, ofs)); - } else { - emit_load_imm(mc, 1, rd, (i64)ofs); - emit32(mc, rv_add(rd, base, rd)); - } - return; - } - if (lv.kind == OPK_GLOBAL) { - ObjSymId sym = lv.v.global.sym; - i64 addend = lv.v.global.addend; - /* Extern-via-GOT path: GOT load yields &sym directly; apply any - * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */ - if (rv64_use_got_for_sym(t, sym)) { - emit_got_load_addr(t, rd, sym); - if (addend) emit_addr_adjust(mc, rd, rd, (i32)addend); - return; - } - u32 sec = mc->section_id; - u32 ap = mc->pos(mc); - emit32(mc, rv_auipc(rd, 0)); - mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0); - ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); - u32 ip = mc->pos(mc); - emit32(mc, rv_addi(rd, rd, 0)); - mc->emit_reloc_at(mc, sec, ip, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); - return; - } - rv_panic(t, "addr_of"); -} - -static void rv_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { - /* TLS Local-Exec: lui tmp, %tprel_hi(sym); add tmp, tp, tmp; addi dst, - * tmp, %tprel_lo(sym). Uses R_RV_TPREL_HI20 / R_RV_TPREL_LO12_I. */ - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - u32 rd = reg_num(dst); - u32 hp = mc->pos(mc); - emit32(mc, rv_lui(RV_T0, 0)); - mc->emit_reloc_at(mc, sec, hp, R_RV_TPREL_HI20, sym, addend, 0, 0); - emit32(mc, rv_add(RV_T0, RV_TP, RV_T0)); - u32 lp = mc->pos(mc); - emit32(mc, rv_addi(rd, RV_T0, 0)); - mc->emit_reloc_at(mc, sec, lp, R_RV_TPREL_LO12_I, sym, addend, 0, 0); -} - -/* ---- aggregate ops ---- */ - -static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { - RImpl* a = impl_of(t); - if (op.kind == OPK_REG) return reg_num(op); - if (op.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, op.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 agg: bad slot"); - i32 off = -(i32)s->off; - if (off >= -2048 && off <= 2047) { - emit32(t->mc, rv_addi(scratch, RV_S0, off)); - } else { - emit_load_imm(t->mc, 1, scratch, (i64)off); - emit32(t->mc, rv_add(scratch, RV_S0, scratch)); - } - return scratch; - } - compiler_panic(t->c, a->loc, "rv64 agg: address kind %d unsupported", - (int)op.kind); -} - -static void rv_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr, - AggregateAccess agg) { - MCEmitter* mc = t->mc; - u32 dr = agg_addr_reg(t, dst_addr, RV_T0); - u32 sr = agg_addr_reg(t, src_addr, (dr == RV_T1) ? RV_T2 : RV_T1); - u32 n = agg.size; - u32 i = 0; - while (i + 8 <= n) { - emit32(mc, rv_ld(RV_T3, sr, (i32)i)); - emit32(mc, rv_sd(RV_T3, dr, (i32)i)); - i += 8; - } - while (i + 4 <= n) { - emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); - emit32(mc, rv_sw(RV_T3, dr, (i32)i)); - i += 4; - } - while (i + 2 <= n) { - emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); - emit32(mc, rv_sh(RV_T3, dr, (i32)i)); - i += 2; - } - while (i < n) { - emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); - emit32(mc, rv_sb(RV_T3, dr, (i32)i)); - i += 1; - } -} - -static void rv_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value, - AggregateAccess agg) { - MCEmitter* mc = t->mc; - u32 dr = agg_addr_reg(t, dst_addr, RV_T0); - u32 byte; - if (byte_value.kind == OPK_IMM) { - byte = (u32)(byte_value.v.imm & 0xffu); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "rv64 set_bytes: REG byte NYI"); - } - u32 n = agg.size; - u32 src; - if (byte == 0) { - src = RV_ZERO; - } else { - u64 b = byte; - b |= b << 8; b |= b << 16; b |= b << 32; - emit_load_imm(mc, 1, RV_T3, (i64)b); - src = RV_T3; - } - u32 i = 0; - while (i + 8 <= n) { emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; } - while (i + 4 <= n) { emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; } - while (i + 2 <= n) { emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; } - while (i < n) { emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; } -} - -static void rv_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, - BitFieldAccess bf) { - MCEmitter* mc = t->mc; - u32 base = agg_addr_reg(t, record_addr, RV_T0); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - u32 rd = reg_num(dst); - /* Load full storage unit (zero-ext for shifts). */ - emit32(mc, enc_int_load(storage_bytes, 0, rd, base, (i32)bf.storage_offset)); - /* Shift left by (XLEN - (bit_offset + bit_width)) then arithmetic - * right-shift by (XLEN - bit_width). Use 64-bit shifts. */ - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - u32 sh_left = 64u - (lsb + width); - u32 sh_right = 64u - width; - emit32(mc, rv_slli(rd, rd, sh_left)); - if (bf.signed_) emit32(mc, rv_srai(rd, rd, sh_right)); - else emit32(mc, rv_srli(rd, rd, sh_right)); -} - -static void rv_bitfield_store(CGTarget* t, Operand record_addr, Operand src, - BitFieldAccess bf) { - MCEmitter* mc = t->mc; - u32 base = agg_addr_reg(t, record_addr, RV_T0); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - /* Load current value into t1 */ - emit32(mc, enc_int_load(storage_bytes, 0, RV_T1, base, - (i32)bf.storage_offset)); - u32 src_reg; - if (src.kind == OPK_IMM) { - emit_load_imm(mc, 1, RV_T2, src.v.imm); - src_reg = RV_T2; - } else if (src.kind == OPK_REG) { - src_reg = reg_num(src); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "rv64 bitfield_store: src kind %d NYI", (int)src.kind); - } - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - /* mask = ((1 << width) - 1) << lsb */ - u64 mask = ((u64)1 << width) - 1u; - /* t3 = src & ((1<<width)-1), then shifted to lsb */ - emit_load_imm(mc, 1, RV_T3, (i64)mask); - emit32(mc, rv_and(RV_T3, src_reg, RV_T3)); - if (lsb) emit32(mc, rv_slli(RV_T3, RV_T3, lsb)); - /* clear the field bits in t1: andi or and-not pattern */ - u64 mask_in = mask << lsb; - emit_load_imm(mc, 1, RV_T2, (i64)~mask_in); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T2)); - emit32(mc, rv_or(RV_T1, RV_T1, RV_T3)); - emit32(mc, enc_int_store(storage_bytes, RV_T1, base, - (i32)bf.storage_offset)); -} - -/* ---- arithmetic ---- */ - -static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, - Operand b_op) { - MCEmitter* mc = t->mc; - if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { - u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S; - u32 rd = reg_num(dst); - u32 fa = reg_num(a_op); - u32 fb = reg_num(b_op); - switch (op) { - case BO_FADD: emit32(mc, rv_fadd(fmt, rd, fa, fb)); return; - case BO_FSUB: emit32(mc, rv_fsub(fmt, rd, fa, fb)); return; - case BO_FMUL: emit32(mc, rv_fmul(fmt, rd, fa, fb)); return; - case BO_FDIV: emit32(mc, rv_fdiv(fmt, rd, fa, fb)); return; - default: break; - } - } - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 rd = reg_num(dst); - - /* Canonicalize IMM to the RHS for commutative ops so the imm-form - * check below handles `3 + a` the same as `a + 3`. ISUB is not - * commutative — IMM-on-LHS still materializes. */ - switch (op) { - case BO_IADD: - case BO_AND: - case BO_OR: - case BO_XOR: { - if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { - Operand t_op = a_op; a_op = b_op; b_op = t_op; - } - break; - } - default: break; - } - - /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for - * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no - * SUBI — we encode it as ADDI with the negated literal when -imm - * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is - * intentionally excluded since -INT_MIN overflows). Shifts admit a - * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the - * W-variants. */ - if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { - u32 ra = reg_num(a_op); - i64 imm = b_op.v.imm; - int fits12 = imm >= -2048 && imm <= 2047; - switch (op) { - case BO_IADD: - if (fits12) { - emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm)); - return; - } - break; - case BO_ISUB: - if (imm >= -2047 && imm <= 2048) { - emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm)); - return; - } - break; - case BO_AND: - if (fits12) { emit32(mc, rv_andi(rd, ra, (i32)imm)); return; } - break; - case BO_OR: - if (fits12) { emit32(mc, rv_ori(rd, ra, (i32)imm)); return; } - break; - case BO_XOR: - if (fits12) { emit32(mc, rv_xori(rd, ra, (i32)imm)); return; } - break; - case BO_SHL: { - u32 width = sf ? 64u : 32u; - u32 sh = (u32)((u64)imm & (width - 1u)); - emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh)); - return; - } - case BO_SHR_U: { - u32 width = sf ? 64u : 32u; - u32 sh = (u32)((u64)imm & (width - 1u)); - emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh)); - return; - } - case BO_SHR_S: { - u32 width = sf ? 64u : 32u; - u32 sh = (u32)((u64)imm & (width - 1u)); - emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh)); - return; - } - default: break; - } - } - - u32 ra = force_reg_int(t, a_op, RV_T0); - u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); - - switch (op) { - case BO_IADD: emit32(mc, sf ? rv_add(rd, ra, rb) : rv_addw(rd, ra, rb)); return; - case BO_ISUB: emit32(mc, sf ? rv_sub(rd, ra, rb) : rv_subw(rd, ra, rb)); return; - case BO_IMUL: emit32(mc, sf ? rv_mul(rd, ra, rb) : rv_mulw(rd, ra, rb)); return; - case BO_AND: emit32(mc, rv_and(rd, ra, rb)); return; - case BO_OR: emit32(mc, rv_or(rd, ra, rb)); return; - case BO_XOR: emit32(mc, rv_xor(rd, ra, rb)); return; - case BO_SHL: emit32(mc, sf ? rv_sll(rd, ra, rb) : rv_sllw(rd, ra, rb)); return; - case BO_SHR_U: emit32(mc, sf ? rv_srl(rd, ra, rb) : rv_srlw(rd, ra, rb)); return; - case BO_SHR_S: emit32(mc, sf ? rv_sra(rd, ra, rb) : rv_sraw(rd, ra, rb)); return; - case BO_SDIV: emit32(mc, sf ? rv_div(rd, ra, rb) : rv_divw(rd, ra, rb)); return; - case BO_UDIV: emit32(mc, sf ? rv_divu(rd, ra, rb) : rv_divuw(rd, ra, rb)); return; - case BO_SREM: emit32(mc, sf ? rv_rem(rd, ra, rb) : rv_remw(rd, ra, rb)); return; - case BO_UREM: emit32(mc, sf ? rv_remu(rd, ra, rb) : rv_remuw(rd, ra, rb)); return; - default: - compiler_panic(t->c, impl_of(t)->loc, "rv64 binop: op %d unimpl", (int)op); - } -} - -static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { - MCEmitter* mc = t->mc; - u32 sf = type_is_64(dst.type) ? 1u : 0u; - u32 rd = reg_num(dst); - /* IMM operand is legal per the CGTarget contract (arch.h); materialize - * into t0 when not already a register. cg folds literal unops upstream - * via cg_fold_unop. */ - u32 rn = force_reg_int(t, a_op, RV_T0); - switch (op) { - case UO_NEG: - emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn)); - return; - case UO_BNOT: - emit32(mc, rv_xori(rd, rn, -1)); - return; - case UO_NOT: - /* logical: 1 if rn==0 else 0 → sltiu rd, rn, 1 */ - emit32(mc, rv_sltiu(rd, rn, 1)); - return; - default: - compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: op %d unimpl", (int)op); - } -} - -static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 rd = reg_num(dst); - u32 rn = reg_num(src); - - switch (k) { - case CV_SEXT: { - u32 src_bits = type_byte_size(src.type) * 8u; - if (src_bits == 32u) { - /* sext.w rd, rs = addiw rd, rs, 0 */ - emit32(mc, rv_addiw(rd, rn, 0)); - return; - } - /* slli + srai by (64 - src_bits) */ - u32 sh = 64u - src_bits; - emit32(mc, rv_slli(rd, rn, sh)); - emit32(mc, rv_srai(rd, rd, sh)); - return; - } - case CV_ZEXT: { - u32 src_bits = type_byte_size(src.type) * 8u; - if (src_bits == 32u) { - /* zext.w: slli rd, rs, 32; srli rd, rd, 32 */ - emit32(mc, rv_slli(rd, rn, 32)); - emit32(mc, rv_srli(rd, rd, 32)); - } else { - u32 sh = 64u - src_bits; - emit32(mc, rv_slli(rd, rn, sh)); - emit32(mc, rv_srli(rd, rd, sh)); - } - return; - } - case CV_TRUNC: - /* Truncate to W: addiw rd, rs, 0 puts low 32 in rd sign-extended. - * For narrower widths the consumer (store) handles it. */ - emit32(mc, rv_addiw(rd, rn, 0)); - return; - case CV_ITOF_S: { - int sf_src = type_is_64(src.type); - int dst_d = type_is_fp_double(dst.type); - if (dst_d) { - emit32(mc, sf_src ? rv_fcvt_d_l(rd, rn) : rv_fcvt_d_w(rd, rn)); - } else { - emit32(mc, sf_src ? rv_fcvt_s_l(rd, rn) : rv_fcvt_s_w(rd, rn)); - } - return; - } - case CV_ITOF_U: { - int sf_src = type_is_64(src.type); - int dst_d = type_is_fp_double(dst.type); - if (dst_d) { - emit32(mc, sf_src ? rv_fcvt_d_lu(rd, rn) : rv_fcvt_d_wu(rd, rn)); - } else { - emit32(mc, sf_src ? rv_fcvt_s_lu(rd, rn) : rv_fcvt_s_wu(rd, rn)); - } - return; - } - case CV_FTOI_S: { - int sf_dst = type_is_64(dst.type); - int src_d = type_is_fp_double(src.type); - if (src_d) { - emit32(mc, sf_dst ? rv_fcvt_l_d(rd, rn) : rv_fcvt_w_d(rd, rn)); - } else { - emit32(mc, sf_dst ? rv_fcvt_l_s(rd, rn) : rv_fcvt_w_s(rd, rn)); - } - return; - } - case CV_FTOI_U: { - int sf_dst = type_is_64(dst.type); - int src_d = type_is_fp_double(src.type); - if (src_d) { - emit32(mc, sf_dst ? rv_fcvt_lu_d(rd, rn) : rv_fcvt_wu_d(rd, rn)); - } else { - emit32(mc, sf_dst ? rv_fcvt_lu_s(rd, rn) : rv_fcvt_wu_s(rd, rn)); - } - return; - } - case CV_FEXT: emit32(mc, rv_fcvt_d_s(rd, rn)); return; - case CV_FTRUNC: emit32(mc, rv_fcvt_s_d(rd, rn)); return; - case CV_BITCAST: { - if (src.cls == RC_INT && dst.cls == RC_FP) { - u32 sz = type_byte_size(dst.type); - emit32(mc, sz == 8 ? rv_fmv_d_x(rd, rn) : rv_fmv_w_x(rd, rn)); - } else if (src.cls == RC_FP && dst.cls == RC_INT) { - u32 sz = type_byte_size(src.type); - emit32(mc, sz == 8 ? rv_fmv_x_d(rd, rn) : rv_fmv_x_w(rd, rn)); - } else { - compiler_panic(t->c, a->loc, "rv64 BITCAST: same-class NYI"); - } - return; - } - default: - compiler_panic(t->c, a->loc, "rv64 convert kind %d unimpl", (int)k); - } -} - -/* ---- calls / return ---- */ - -static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, - u32* next_fp, u32* stack_off) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - /* For variadic args (av->abi NULL) synthesize a one-part DIRECT shape. - * On RV64 LP64D, variadic args go through the integer registers - * regardless of FP-ness (per the psABI). */ - ABIArgInfo va_ai; - ABIArgPart va_pt; - const ABIArgInfo* ai = av->abi; - if (!ai) { - u32 sz = type_byte_size(av->type); - memset(&va_ai, 0, sizeof va_ai); - memset(&va_pt, 0, sizeof va_pt); - va_ai.kind = ABI_ARG_DIRECT; - va_ai.parts = &va_pt; - va_ai.nparts = 1; - va_pt.cls = ABI_CLASS_INT; - va_pt.size = sz; - va_pt.align = sz; - va_pt.src_offset = 0; - ai = &va_ai; - } - if (ai->kind == ABI_ARG_IGNORE) return; - - if (ai->kind == ABI_ARG_INDIRECT) { - /* Pass the address of the storage in the next integer slot. */ - int to_stack = (*next_int >= 8); - u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++); - if (av->storage.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad byval slot"); - i32 off = -(i32)s->off; - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(dst_reg, RV_S0, off)); - } else { - emit_load_imm(mc, 1, dst_reg, (i64)off); - emit32(mc, rv_add(dst_reg, RV_S0, dst_reg)); - } - } else if (av->storage.kind == OPK_INDIRECT) { - u32 base = av->storage.v.ind.base & 0x1fu; - i32 off = av->storage.v.ind.ofs; - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(dst_reg, base, off)); - } else { - emit_load_imm(mc, 1, dst_reg, (i64)off); - emit32(mc, rv_add(dst_reg, base, dst_reg)); - } - } else { - compiler_panic(t->c, a->loc, - "rv64 call: INDIRECT storage kind %d NYI", - (int)av->storage.kind); - } - if (to_stack) { - emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); - *stack_off += 8; - } - return; - } - - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 sz = pt->size; - - if (pt->cls == ABI_CLASS_INT) { - int to_stack = (*next_int >= 8); - u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++); - switch (av->storage.kind) { - case OPK_IMM: { - u32 sf = (sz == 8) ? 1u : 0u; - emit_load_imm(mc, sf, dst_reg, av->storage.v.imm); - break; - } - case OPK_REG: { - /* Variadic FP arg pinned into an integer register: bitcast - * via FMV.X.{D,W}. Otherwise normal MV. */ - if (av->storage.cls == RC_FP) { - emit32(mc, (sz == 8) ? rv_fmv_x_d(dst_reg, reg_num(av->storage)) - : rv_fmv_x_w(dst_reg, reg_num(av->storage))); - } else { - emit32(mc, rv_addi(dst_reg, reg_num(av->storage), 0)); - } - break; - } - case OPK_LOCAL: { - RvSlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad arg slot"); - i32 off = -(i32)s->off + (i32)pt->src_offset; - emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off)); - break; - } - case OPK_INDIRECT: { - /* cg holds INDIRECT base regs in s2..s11, disjoint from arg - * regs a0..a7 and the t0 stack-arg scratch. */ - u32 base = av->storage.v.ind.base & 0x1fu; - i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; - emit32(mc, enc_int_load(sz, 0, dst_reg, base, off)); - break; - } - default: - compiler_panic(t->c, a->loc, - "rv64 call: storage kind %d NYI", - (int)av->storage.kind); - } - if (to_stack) { - emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); - *stack_off += 8; - } - } else if (pt->cls == ABI_CLASS_FP) { - int to_stack = (*next_fp >= 8); - if (!to_stack) { - u32 freg = 10u + (*next_fp)++; - switch (av->storage.kind) { - case OPK_REG: { - u32 fmt = (sz == 8) ? RV_FMT_D : RV_FMT_S; - u32 r = reg_num(av->storage); - emit32(mc, rv_fsgnj(fmt, freg, r, r)); - break; - } - case OPK_INDIRECT: { - u32 base = av->storage.v.ind.base & 0x1fu; - i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; - emit32(mc, (sz == 8) ? rv_fld(freg, base, off) - : rv_flw(freg, base, off)); - break; - } - default: - compiler_panic(t->c, a->loc, "rv64 call: FP storage kind %d NYI", - (int)av->storage.kind); - } - } else { - switch (av->storage.kind) { - case OPK_REG: - if (sz == 8) emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off)); - else emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off)); - break; - case OPK_INDIRECT: { - /* Route through ft0 — it is in {ft0..ft7}, caller-saved - * scratch outside the cg fs2..fs11 pool. */ - u32 base = av->storage.v.ind.base & 0x1fu; - i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; - if (sz == 8) { - emit32(mc, rv_fld(/*ft0=*/0u, base, off)); - emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off)); - } else { - emit32(mc, rv_flw(/*ft0=*/0u, base, off)); - emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off)); - } - break; - } - default: - compiler_panic(t->c, a->loc, "rv64 call: FP stack-arg NYI"); - } - *stack_off += 8; - } - } else { - compiler_panic(t->c, a->loc, "rv64 call: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static void rv_call(CGTarget* t, const CGCallDesc* d) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - u32 next_int = 0, next_fp = 0, stack_off = 0; - - /* sret: caller passes destination pointer in a0. */ - if (d->abi && d->abi->has_sret) { - if (d->ret.storage.kind != OPK_LOCAL) { - compiler_panic(t->c, a->loc, "rv64 call: sret dst must be LOCAL"); - } - RvSlot* s = slot_get(a, d->ret.storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad sret slot"); - i32 off = -(i32)s->off; - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(RV_A0, RV_S0, off)); - } else { - emit_load_imm(mc, 1, RV_A0, (i64)off); - emit32(mc, rv_add(RV_A0, RV_S0, RV_A0)); - } - next_int = 1; - } - - for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); - } - u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) a->max_outgoing = needed; - - if (d->callee.kind == OPK_GLOBAL) { - /* AUIPC ra, 0 ; JALR ra, ra, 0 with R_RV_CALL on AUIPC */ - u32 sec = mc->section_id; - u32 pos = mc->pos(mc); - emit32(mc, rv_auipc(RV_RA, 0)); - emit32(mc, rv_jalr(RV_RA, RV_RA, 0)); - mc->emit_reloc_at(mc, sec, pos, R_RV_CALL, - d->callee.v.global.sym, d->callee.v.global.addend, 0, 0); - } else if (d->callee.kind == OPK_REG) { - emit32(mc, rv_jalr(RV_RA, reg_num(d->callee), 0)); - } else { - compiler_panic(t->c, a->loc, "rv64 call: callee kind %d unsupported", - (int)d->callee.kind); - } - - /* Receive return value. */ - const ABIArgInfo* ri = &d->abi->ret; - if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return; - if (ri->nparts == 0) return; - - Operand rs = d->ret.storage; - u32 nir = 0, nfr = 0; - for (u16 i = 0; i < ri->nparts; ++i) { - const ABIArgPart* p = &ri->parts[i]; - u32 src_reg = (p->cls == ABI_CLASS_INT) ? (RV_A0 + nir++) : (10u + nfr++); - - if (rs.kind == OPK_REG) { - if (ri->nparts != 1) { - compiler_panic(t->c, a->loc, "rv64 call: REG ret with %u parts", - (unsigned)ri->nparts); - } - if (p->cls == ABI_CLASS_INT) { - emit32(mc, rv_addi(reg_num(rs), src_reg, 0)); - } else { - u32 fmt = (p->size == 8) ? RV_FMT_D : RV_FMT_S; - emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg)); - } - } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { - u32 base_reg; - i32 base_off; - if (rs.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot"); - base_reg = RV_S0; - base_off = -(i32)s->off; - } else { - base_reg = rs.v.ind.base & 0x1fu; - base_off = rs.v.ind.ofs; - } - i32 off = base_off + (i32)p->src_offset; - if (p->cls == ABI_CLASS_INT) { - emit32(mc, enc_int_store(p->size, src_reg, base_reg, off)); - } else { - if (p->size == 8) emit32(mc, rv_fsd(src_reg, base_reg, off)); - else emit32(mc, rv_fsw(src_reg, base_reg, off)); - } - } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { - /* void return placeholder — nothing to do. */ - } else { - compiler_panic(t->c, a->loc, "rv64 call: ret_storage kind %d unsupported", - (int)rs.kind); - } - } -} - -static void rv_ret(CGTarget* t, const CGABIValue* val) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - if (val) { - const ABIArgInfo* ri = val->abi; - if (ri && ri->kind == ABI_ARG_INDIRECT) { - /* sret: reload destination pointer from sret_ptr_slot into t0, - * then memcpy from val->storage into [t0]. */ - u32 src_base; - i32 src_base_off; - u32 nbytes; - if (val->storage.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot"); - src_base = RV_S0; - src_base_off = -(i32)s->off; - nbytes = s->size; - } else if (val->storage.kind == OPK_INDIRECT) { - src_base = val->storage.v.ind.base & 0x1fu; - src_base_off = val->storage.v.ind.ofs; - nbytes = val->size; - if (!nbytes) { - compiler_panic(t->c, a->loc, - "rv64 ret indirect: missing aggregate size"); - } - } else { - compiler_panic(t->c, a->loc, - "rv64 ret indirect: storage kind %d NYI", - (int)val->storage.kind); - } - RvSlot* sp = (a->sret_ptr_slot != FRAME_SLOT_NONE) - ? slot_get(a, a->sret_ptr_slot) - : NULL; - if (sp) emit32(mc, rv_ld(RV_T0, RV_S0, -(i32)sp->off)); - u32 i = 0; - while (i + 8 <= nbytes) { - emit32(mc, rv_ld(RV_T1, src_base, src_base_off + (i32)i)); - emit32(mc, rv_sd(RV_T1, RV_T0, (i32)i)); - i += 8; - } - while (i + 4 <= nbytes) { - emit32(mc, rv_lwu(RV_T1, src_base, src_base_off + (i32)i)); - emit32(mc, rv_sw(RV_T1, RV_T0, (i32)i)); - i += 4; - } - while (i + 2 <= nbytes) { - emit32(mc, rv_lhu(RV_T1, src_base, src_base_off + (i32)i)); - emit32(mc, rv_sh(RV_T1, RV_T0, (i32)i)); - i += 2; - } - while (i < nbytes) { - emit32(mc, rv_lbu(RV_T1, src_base, src_base_off + (i32)i)); - emit32(mc, rv_sb(RV_T1, RV_T0, (i32)i)); - i += 1; - } - } else if (val->storage.kind == OPK_REG) { - if (val->storage.cls == RC_FP) { - u32 fmt = type_is_fp_double(val->storage.type) ? RV_FMT_D : RV_FMT_S; - u32 r = reg_num(val->storage); - emit32(mc, rv_fsgnj(fmt, 10u, r, r)); /* fa0 = freg 10 */ - } else { - emit32(mc, rv_addi(RV_A0, reg_num(val->storage), 0)); - } - } else if (val->storage.kind == OPK_IMM) { - u32 sf = type_is_64(val->storage.type) ? 1u : 0u; - emit_load_imm(mc, sf, RV_A0, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL || - val->storage.kind == OPK_INDIRECT) { - u32 base_reg; - i32 base_off; - if (val->storage.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot"); - base_reg = RV_S0; - base_off = -(i32)s->off; - } else { - base_reg = val->storage.v.ind.base & 0x1fu; - base_off = val->storage.v.ind.ofs; - } - const ABIArgInfo* ri2 = val->abi; - u32 nir = 0, nfr = 0; - for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { - const ABIArgPart* pt = &ri2->parts[i]; - i32 off = base_off + (i32)pt->src_offset; - if (pt->cls == ABI_CLASS_INT) { - emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off)); - } else if (pt->cls == ABI_CLASS_FP) { - u32 freg = 10u + nfr++; - if (pt->size == 8) emit32(mc, rv_fld(freg, base_reg, off)); - else emit32(mc, rv_flw(freg, base_reg, off)); - } else { - compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl", - (int)pt->cls); - } - } - } - } - /* Jump to epilogue. */ - emit32(mc, rv_jal(RV_ZERO, 0)); - mc->emit_label_ref(mc, a->epilogue_label, R_RV_JAL, 4, 0); -} - -/* ---- panic stubs for features we don't yet cover ---- */ - -static void rv_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - if (d.kind != OPK_REG) { - compiler_panic(t->c, a->loc, "rv64 alloca: dst must be REG"); - } - if (align > 16) { - compiler_panic(t->c, a->loc, - "rv64 alloca: align %u > 16 not yet supported", align); - } - if (sz.kind == OPK_IMM) { - i64 v = sz.v.imm; - if (v < 0) compiler_panic(t->c, a->loc, "rv64 alloca: negative size"); - u64 aligned = ((u64)v + 15u) & ~(u64)15u; - if (aligned == 0) aligned = 16; - if (aligned > 2047u) { - compiler_panic(t->c, a->loc, - "rv64 alloca: const size %llu too large for v1", - (unsigned long long)aligned); - } - emit32(mc, rv_addi(RV_SP, RV_SP, -(i32)aligned)); - } else if (sz.kind == OPK_REG) { - u32 sz_reg = reg_num(sz); - /* t0 = (sz + 15) & ~15; sp -= t0 */ - emit32(mc, rv_addi(RV_T0, sz_reg, 15)); - emit32(mc, rv_andi(RV_T0, RV_T0, -16)); - emit32(mc, rv_sub(RV_SP, RV_SP, RV_T0)); - } else { - compiler_panic(t->c, a->loc, "rv64 alloca: size kind %d unsupported", - (int)sz.kind); - } - - /* Placeholder: addi dst, sp, max_outgoing (imm patched at func_end). */ - if (a->nadd_patches == a->add_patches_cap) { - u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4; - struct RvAllocaPatch* nb = arena_array(t->c->tu, struct RvAllocaPatch, ncap); - if (a->add_patches) - memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches); - a->add_patches = nb; - a->add_patches_cap = ncap; - } - u32 dst_reg = reg_num(d); - a->add_patches[a->nadd_patches].pos = mc->pos(mc); - a->add_patches[a->nadd_patches].dst_reg = dst_reg; - a->nadd_patches++; - emit32(mc, rv_addi(dst_reg, RV_SP, 0)); - a->has_alloca = 1; -} -/* RV64 LP64D va_list: a single `void*` pointing at the next argument - * slot. The prologue spills a_{nparams_int}..a7 into the save area at - * [s0 + 16]. The save area lives at the top of the callee frame, - * immediately above the saved-s0/ra pair, so save_area[8] coincides - * with the caller's first stack arg — a single 8-byte stride covers - * register and stack args alike. */ -static void rv_va_start_(CGTarget* t, Operand ap_op) { - RImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - if (!a->is_variadic) { - compiler_panic(t->c, a->loc, "rv64 va_start: function not variadic"); - } - u32 ap = reg_num(ap_op); - /* *ap = s0 + 16 + next_param_int*8 (skip past named-int slots). */ - i32 off = 16 + (i32)(a->next_param_int * 8u); - emit32(mc, rv_addi(RV_T0, RV_S0, off)); - emit32(mc, rv_sd(RV_T0, ap, 0)); -} - -static void rv_va_arg_(CGTarget* t, Operand dst, Operand ap_op, - const Type* ty) { - MCEmitter* mc = t->mc; - u32 ap = reg_num(ap_op); - u32 sz = type_byte_size(ty); - /* t1 = *ap; load value; *ap = t1 + 8 (rounded up). - * On RV64 LP64D every var arg occupies an 8-byte slot. */ - emit32(mc, rv_ld(RV_T1, ap, 0)); - if (dst.cls == RC_FP) { - /* For variadic FP args on RV64 LP64D, the value sits in the integer - * save area at the same bit pattern as a double bit-cast. Load and - * bitcast. */ - if (sz == 8) { - emit32(mc, rv_ld(RV_T2, RV_T1, 0)); - emit32(mc, rv_fmv_d_x(reg_num(dst), RV_T2)); - } else { - emit32(mc, rv_lw(RV_T2, RV_T1, 0)); - emit32(mc, rv_fmv_w_x(reg_num(dst), RV_T2)); - } - } else { - int sx = type_is_signed(ty); - emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T1, 0)); - } - /* advance ap by 8 bytes. */ - emit32(mc, rv_addi(RV_T1, RV_T1, 8)); - emit32(mc, rv_sd(RV_T1, ap, 0)); -} - -static void rv_va_end_(CGTarget* t, Operand a) { - (void)t; (void)a; -} - -static void rv_va_copy_(CGTarget* t, Operand d, Operand s) { - MCEmitter* mc = t->mc; - u32 dr = reg_num(d); - u32 sr = reg_num(s); - /* va_list is a single pointer (8 bytes). */ - emit32(mc, rv_ld(RV_T0, sr, 0)); - emit32(mc, rv_sd(RV_T0, dr, 0)); -} - -/* ---- atomics (LL/SC + AMO) ---- */ - -static int mem_order_is_acquire(MemOrder o) { - return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || o == MO_CONSUME; -} -static int mem_order_is_release(MemOrder o) { - return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST; -} - -static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, - MemOrder o) { - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - /* Resolve address to a register. */ - u32 base; - if (addr.kind == OPK_REG) { - base = reg_num(addr); - } else if (addr.kind == OPK_LOCAL) { - i32 off; - base = addr_base(t, addr, &off, RV_T0); - if (off) { - emit32(mc, rv_addi(RV_T0, base, off)); - base = RV_T0; - } - } else { - compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_load: addr kind %d NYI", - (int)addr.kind); - } - if (mem_order_is_acquire(o)) { - /* lr.w/d as ordered load (aq=1, rl=0). */ - emit32(mc, sf ? rv_lr_d(reg_num(dst), base, 1, 0) - : rv_lr_w(reg_num(dst), base, 1, 0)); - } else { - emit32(mc, enc_int_load(ma.size, 0, reg_num(dst), base, 0)); - } -} - -static void rv_atomic_store(CGTarget* t, Operand addr, Operand src, - MemAccess ma, MemOrder o) { - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - u32 src_reg; - if (src.kind == OPK_IMM) { - emit_load_imm(mc, sf, RV_T1, src.v.imm); - src_reg = RV_T1; - } else if (src.kind == OPK_REG) { - src_reg = reg_num(src); - } else { - compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: src kind %d NYI", - (int)src.kind); - } - u32 base; - if (addr.kind == OPK_REG) { - base = reg_num(addr); - } else if (addr.kind == OPK_LOCAL) { - i32 off; - base = addr_base(t, addr, &off, RV_T0); - if (off) { emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; } - } else { - compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI", - (int)addr.kind); - } - if (mem_order_is_release(o)) { - /* fence rw,w; sw/sd src, 0(base). Conservative for SEQ_CST. */ - emit32(mc, rv_fence_rw_rw()); - emit32(mc, enc_int_store(ma.size, src_reg, base, 0)); - if (o == MO_SEQ_CST) emit32(mc, rv_fence_rw_rw()); - } else { - emit32(mc, enc_int_store(ma.size, src_reg, base, 0)); - } -} - -static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, - Operand val, MemAccess ma, MemOrder o) { - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - u32 base = RV_T0; - if (addr.kind == OPK_REG) { - emit32(mc, rv_addi(base, reg_num(addr), 0)); - } else if (addr.kind == OPK_LOCAL) { - i32 off; - u32 b = addr_base(t, addr, &off, RV_T0); - if (b != RV_T0 || off) { - emit32(mc, rv_addi(base, b, off)); - } - } else { - compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI"); - } - u32 vreg = RV_T1; - if (val.kind == OPK_IMM) emit_load_imm(mc, sf, vreg, val.v.imm); - else if (val.kind == OPK_REG) emit32(mc, rv_addi(vreg, reg_num(val), 0)); - else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: val kind NYI"); - - int aq = mem_order_is_acquire(o); - int rl = mem_order_is_release(o); - - /* LR/SC loop for any op (simpler than per-op AMO encodings, but AMO is - * preferred for the cases the corpus exercises). */ - MCLabel L_retry = mc->label_new(mc); - mc->label_place(mc, L_retry); - emit32(mc, sf ? rv_lr_d(reg_num(dst), base, (u32)aq, 0) - : rv_lr_w(reg_num(dst), base, (u32)aq, 0)); - u32 new_r = RV_T2; - switch (op) { - case AO_XCHG: emit32(mc, rv_addi(new_r, vreg, 0)); break; - case AO_ADD: emit32(mc, sf ? rv_add(new_r, reg_num(dst), vreg) : rv_addw(new_r, reg_num(dst), vreg)); break; - case AO_SUB: emit32(mc, sf ? rv_sub(new_r, reg_num(dst), vreg) : rv_subw(new_r, reg_num(dst), vreg)); break; - case AO_AND: emit32(mc, rv_and(new_r, reg_num(dst), vreg)); break; - case AO_OR: emit32(mc, rv_or(new_r, reg_num(dst), vreg)); break; - case AO_XOR: emit32(mc, rv_xor(new_r, reg_num(dst), vreg)); break; - case AO_NAND: - emit32(mc, rv_and(new_r, reg_num(dst), vreg)); - emit32(mc, rv_xori(new_r, new_r, -1)); - break; - default: emit32(mc, rv_addi(new_r, vreg, 0)); break; - } - /* sc.w/d t3, new_r, (base); bnez t3, retry. */ - emit32(mc, sf ? rv_sc_d(RV_T3, base, new_r, 0, (u32)rl) - : rv_sc_w(RV_T3, base, new_r, 0, (u32)rl)); - emit32(mc, rv_bne(RV_T3, RV_ZERO, 0)); - mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0); -} - -static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, - Operand exp, Operand des, MemAccess ma, - MemOrder succ, MemOrder fail) { - MCEmitter* mc = t->mc; - u32 sf = (ma.size == 8) ? 1u : 0u; - (void)fail; - u32 base = RV_T0; - if (addr.kind == OPK_REG) emit32(mc, rv_addi(base, reg_num(addr), 0)); - else if (addr.kind == OPK_LOCAL) { - i32 off; u32 b = addr_base(t, addr, &off, RV_T0); - if (b != RV_T0 || off) emit32(mc, rv_addi(base, b, off)); - } else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI"); - u32 ereg = RV_T1, dreg = RV_T2; - if (exp.kind == OPK_IMM) emit_load_imm(mc, sf, ereg, exp.v.imm); - else emit32(mc, rv_addi(ereg, reg_num(exp), 0)); - if (des.kind == OPK_IMM) emit_load_imm(mc, sf, dreg, des.v.imm); - else emit32(mc, rv_addi(dreg, reg_num(des), 0)); - - int aq = mem_order_is_acquire(succ); - int rl = mem_order_is_release(succ); - - MCLabel L_retry = mc->label_new(mc); - MCLabel L_fail = mc->label_new(mc); - MCLabel L_done = mc->label_new(mc); - - mc->label_place(mc, L_retry); - emit32(mc, sf ? rv_lr_d(reg_num(prior), base, (u32)aq, 0) - : rv_lr_w(reg_num(prior), base, (u32)aq, 0)); - /* if (prior != expected) -> fail */ - emit32(mc, rv_bne(reg_num(prior), ereg, 0)); - mc->emit_label_ref(mc, L_fail, R_RV_BRANCH, 4, 0); - /* sc.w/d t3, des, (base); bnez t3, retry */ - emit32(mc, sf ? rv_sc_d(RV_T3, base, dreg, 0, (u32)rl) - : rv_sc_w(RV_T3, base, dreg, 0, (u32)rl)); - emit32(mc, rv_bne(RV_T3, RV_ZERO, 0)); - mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0); - /* ok = 1; jump done */ - emit_load_imm(mc, 0, reg_num(ok), 1); - emit32(mc, rv_jal(RV_ZERO, 0)); - mc->emit_label_ref(mc, L_done, R_RV_JAL, 4, 0); - - mc->label_place(mc, L_fail); - emit_load_imm(mc, 0, reg_num(ok), 0); - - mc->label_place(mc, L_done); -} - -static void rv_fence(CGTarget* t, MemOrder o) { - if (o == MO_RELAXED) return; - emit32(t->mc, rv_fence_rw_rw()); -} - -/* ---- intrinsics: do what we can; panic on the rest. ---- */ -static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, - const Operand* args, u32 na) { - (void)nd; (void)na; - MCEmitter* mc = t->mc; - RImpl* a = impl_of(t); - switch (kind) { - case INTRIN_ASSUME_ALIGNED: - case INTRIN_EXPECT: { - /* dst = val (hint dropped). */ - Operand val = args[0]; - Operand dst = dsts[0]; - u32 sf = type_is_64(dst.type) ? 1u : 0u; - if (val.kind == OPK_REG) { - if (reg_num(val) != reg_num(dst)) - emit32(mc, rv_addi(reg_num(dst), reg_num(val), 0)); - } else if (val.kind == OPK_IMM) { - emit_load_imm(mc, sf, reg_num(dst), val.v.imm); - } else { - compiler_panic(t->c, a->loc, "rv64 intrinsic: val kind %d NYI", - (int)val.kind); - } - return; - } - case INTRIN_PREFETCH: return; - case INTRIN_UNREACHABLE: - case INTRIN_TRAP: - emit32(mc, rv_ebreak()); - return; - case INTRIN_BSWAP16: { - /* rd = ((rs & 0xff) << 8) | ((rs >> 8) & 0xff) */ - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - emit32(mc, rv_slli(RV_T1, rs, 8)); /* t1 = rs << 8 */ - emit32(mc, rv_andi(RV_T1, RV_T1, 0)); /* placeholder */ - /* Use lui mask approach for portability: build mask 0xff00 in t2. */ - emit32(mc, rv_addi(RV_T2, RV_ZERO, 0)); - /* Simpler: 0xff00 fits in lui+addi pattern but is also small enough: - * we can build via shift: t2 = 0xff << 8 = (0xff << 8). */ - emit32(mc, rv_addi(RV_T2, RV_ZERO, 0xff)); - emit32(mc, rv_slli(RV_T2, RV_T2, 8)); - /* t1 = (rs << 8) & 0xff00 */ - emit32(mc, rv_slli(RV_T1, rs, 8)); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T2)); - /* t3 = (rs >> 8) & 0xff (use srli on RV64 — high bits zeroed by - * preceding ANDI mask if input is uint16, but be safe and mask). */ - emit32(mc, rv_srli(RV_T3, rs, 8)); - emit32(mc, rv_andi(RV_T3, RV_T3, 0xff)); - emit32(mc, rv_or(rd, RV_T1, RV_T3)); - return; - } - case INTRIN_BSWAP32: { - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - /* result = (b0<<24)|(b1<<16)|(b2<<8)|b3, where bi = (rs >> (8*i)) & 0xff. */ - /* t1 = ((rs >> 24) & 0xff) */ - emit32(mc, rv_srliw(RV_T1, rs, 24)); - emit32(mc, rv_andi(RV_T1, RV_T1, 0xff)); - /* t2 = ((rs >> 16) & 0xff) << 8 */ - emit32(mc, rv_srliw(RV_T2, rs, 16)); - emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); - emit32(mc, rv_slli(RV_T2, RV_T2, 8)); - emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); - /* t2 = ((rs >> 8) & 0xff) << 16 */ - emit32(mc, rv_srliw(RV_T2, rs, 8)); - emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); - emit32(mc, rv_slli(RV_T2, RV_T2, 16)); - emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); - /* t2 = (rs & 0xff) << 24 */ - emit32(mc, rv_andi(RV_T2, rs, 0xff)); - emit32(mc, rv_slli(RV_T2, RV_T2, 24)); - emit32(mc, rv_or(rd, RV_T1, RV_T2)); - /* zero-extend to 32 bits if dest is u32 */ - emit32(mc, rv_slli(rd, rd, 32)); - emit32(mc, rv_srli(rd, rd, 32)); - return; - } - case INTRIN_BSWAP64: { - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - /* General bswap64: iterate over the 8 bytes. */ - /* t1 accumulator */ - emit32(mc, rv_addi(RV_T1, RV_ZERO, 0)); - for (int i = 0; i < 8; ++i) { - /* t2 = (rs >> (8*i)) & 0xff */ - if (i == 0) { - emit32(mc, rv_andi(RV_T2, rs, 0xff)); - } else { - emit32(mc, rv_srli(RV_T2, rs, (u32)(8 * i))); - emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); - } - /* t2 <<= (56 - 8*i) (so byte 0 goes to top) */ - int sh = 56 - 8 * i; - if (sh) emit32(mc, rv_slli(RV_T2, RV_T2, (u32)sh)); - emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); - } - emit32(mc, rv_addi(rd, RV_T1, 0)); - return; - } - case INTRIN_POPCOUNT: { - /* Software popcount. Use the bit-twiddling sequence on the - * appropriate width. dst type drives width. */ - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - int is64 = type_is_64(args[0].type); - /* Move rs into t1 to avoid clobbering input. */ - emit32(mc, rv_addi(RV_T1, rs, 0)); - if (!is64) { - /* zext.w t1, t1 */ - emit32(mc, rv_slli(RV_T1, RV_T1, 32)); - emit32(mc, rv_srli(RV_T1, RV_T1, 32)); - } - /* t1 = t1 - ((t1 >> 1) & 0x5555...) */ - emit32(mc, rv_srli(RV_T2, RV_T1, 1)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll - : (i64)0x55555555); - emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); - emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); - /* t1 = (t1 & 0x3333...) + ((t1 >> 2) & 0x3333...) */ - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll - : (i64)0x33333333); - emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); - emit32(mc, rv_srli(RV_T1, RV_T1, 2)); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - /* t1 = (t1 + (t1 >> 4)) & 0x0f0f... */ - emit32(mc, rv_srli(RV_T2, RV_T1, 4)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll - : (i64)0x0f0f0f0f); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - /* t1 *= 0x0101010101... ; result in top byte */ - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll - : (i64)0x01010101); - emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); - /* shift right by (XLEN - 8) */ - emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); - return; - } - case INTRIN_CTZ: { - /* ctz(x) = popcount((x & -x) - 1) for x != 0. */ - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - int is64 = type_is_64(args[0].type); - /* t1 = -x */ - emit32(mc, rv_sub(RV_T1, RV_ZERO, rs)); - /* t1 = x & -x */ - emit32(mc, rv_and(RV_T1, RV_T1, rs)); - /* t1 = t1 - 1 */ - emit32(mc, rv_addi(RV_T1, RV_T1, -1)); - if (!is64) { - emit32(mc, rv_slli(RV_T1, RV_T1, 32)); - emit32(mc, rv_srli(RV_T1, RV_T1, 32)); - } - /* popcount(t1) into rd */ - emit32(mc, rv_srli(RV_T2, RV_T1, 1)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll - : (i64)0x55555555); - emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); - emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll - : (i64)0x33333333); - emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); - emit32(mc, rv_srli(RV_T1, RV_T1, 2)); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - emit32(mc, rv_srli(RV_T2, RV_T1, 4)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll - : (i64)0x0f0f0f0f); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll - : (i64)0x01010101); - emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); - emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); - return; - } - case INTRIN_CLZ: { - /* Software clz: fold the high bit downward, then popcount the - * inverted result. Standard recipe: - * x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16; - * [x |= x>>32;] // 64-bit - * clz = popcount(~x) [for the appropriate width]. - */ - u32 rd = reg_num(dsts[0]); - u32 rs = reg_num(args[0]); - int is64 = type_is_64(args[0].type); - emit32(mc, rv_addi(RV_T1, rs, 0)); - if (!is64) { - /* zero-ext to 32 to make srli safe */ - emit32(mc, rv_slli(RV_T1, RV_T1, 32)); - emit32(mc, rv_srli(RV_T1, RV_T1, 32)); - } - u32 shifts[6] = {1, 2, 4, 8, 16, 32}; - u32 ns = is64 ? 6u : 5u; - for (u32 i = 0; i < ns; ++i) { - emit32(mc, rv_srli(RV_T2, RV_T1, shifts[i])); - emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); - } - /* t1 = ~t1, then popcount and we want the (width - popcount) ... wait. - * Actually clz(x) for the folded x = popcount(~x). Let me verify. - * If x = 0b00011010, fold => 0b00011111. ~ => 0b11100000. - * popcount(~folded) = 3 = clz(0b00011010) ✓. */ - emit32(mc, rv_xori(RV_T1, RV_T1, -1)); - if (!is64) { - emit32(mc, rv_slli(RV_T1, RV_T1, 32)); - emit32(mc, rv_srli(RV_T1, RV_T1, 32)); - } - /* popcount(t1) into rd */ - emit32(mc, rv_srli(RV_T2, RV_T1, 1)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll - : (i64)0x55555555); - emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); - emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll - : (i64)0x33333333); - emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); - emit32(mc, rv_srli(RV_T1, RV_T1, 2)); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - emit32(mc, rv_srli(RV_T2, RV_T1, 4)); - emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll - : (i64)0x0f0f0f0f); - emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); - emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll - : (i64)0x01010101); - emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); - emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); - return; - } - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { - /* dsts: [val, ovf]. Signed overflow check. - * For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1) - * For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - int is64 = type_is_64(dval.type); - u32 ra = force_reg_int(t, a_op, RV_T0); - u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); - u32 rd = reg_num(dval); - u32 rovf = reg_num(dovf); - /* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */ - if (kind == INTRIN_ADD_OVERFLOW) { - emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb)); - } else { - emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb)); - } - /* t3 = a XOR t2 */ - emit32(mc, rv_xor(RV_T3, ra, RV_T2)); - if (kind == INTRIN_ADD_OVERFLOW) { - /* t4 = b XOR t2 */ - emit32(mc, rv_xor(rovf, rb, RV_T2)); - emit32(mc, rv_and(rovf, rovf, RV_T3)); - } else { - /* t4 = a XOR b */ - emit32(mc, rv_xor(rovf, ra, rb)); - emit32(mc, rv_and(rovf, rovf, RV_T3)); - } - /* shift right to extract sign bit */ - u32 sh = is64 ? 63u : 31u; - emit32(mc, is64 ? rv_srli(rovf, rovf, sh) : rv_srliw(rovf, rovf, sh)); - emit32(mc, rv_andi(rovf, rovf, 1)); - /* Now write the value. */ - emit32(mc, rv_addi(rd, RV_T2, 0)); - return; - } - case INTRIN_MUL_OVERFLOW: { - /* SMULL: full 64-bit signed product of two i32s, then compare - * with sign-extend of low 32. For i64 inputs we panic for now. */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - int is64 = type_is_64(dval.type); - if (is64) { - compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI"); - } - u32 ra = force_reg_int(t, a_op, RV_T0); - u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); - u32 rd = reg_num(dval); - u32 rovf = reg_num(dovf); - /* Sign-extend inputs from 32 to 64. */ - emit32(mc, rv_addiw(RV_T2, ra, 0)); - emit32(mc, rv_addiw(RV_T3, rb, 0)); - /* Full 64-bit product */ - emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3)); - /* sign-ext of low 32 of product */ - emit32(mc, rv_addiw(RV_T3, RV_T2, 0)); - /* ovf = (T2 != T3) */ - emit32(mc, rv_xor(rovf, RV_T2, RV_T3)); - emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); - /* dval = low 32, sign-extended */ - emit32(mc, rv_addiw(rd, RV_T2, 0)); - return; - } - case INTRIN_MEMCPY: - case INTRIN_MEMMOVE: { - Operand da = args[0], sa = args[1], nb = args[2]; - if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic(t->c, a->loc, - "rv64 intrinsic: memcpy/memmove non-const NYI"); - } - u32 dr = reg_num(da), sr = reg_num(sa), n = (u32)nb.v.imm; - if (kind == INTRIN_MEMCPY) { - u32 i = 0; - while (i + 8 <= n) { emit32(mc, rv_ld(RV_T3, sr, (i32)i)); emit32(mc, rv_sd(RV_T3, dr, (i32)i)); i += 8; } - while (i + 4 <= n) { emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); emit32(mc, rv_sw(RV_T3, dr, (i32)i)); i += 4; } - while (i + 2 <= n) { emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); emit32(mc, rv_sh(RV_T3, dr, (i32)i)); i += 2; } - while (i < n) { emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); emit32(mc, rv_sb(RV_T3, dr, (i32)i)); i += 1; } - } else { - u32 i = n; - while (i >= 8) { i -= 8; emit32(mc, rv_ld(RV_T3, sr, (i32)i)); emit32(mc, rv_sd(RV_T3, dr, (i32)i)); } - while (i >= 4) { i -= 4; emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); emit32(mc, rv_sw(RV_T3, dr, (i32)i)); } - while (i >= 2) { i -= 2; emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); emit32(mc, rv_sh(RV_T3, dr, (i32)i)); } - while (i >= 1) { i -= 1; emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); emit32(mc, rv_sb(RV_T3, dr, (i32)i)); } - } - return; - } - case INTRIN_MEMSET: { - Operand da = args[0], bv = args[1], nb = args[2]; - if (da.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic(t->c, a->loc, "rv64 intrinsic: memset non-const NYI"); - } - u32 dr = reg_num(da), n = (u32)nb.v.imm; - u32 src; - if (bv.kind == OPK_IMM) { - u32 byte = (u32)(bv.v.imm & 0xffu); - if (byte == 0) src = RV_ZERO; - else { - u64 b = byte; b |= b << 8; b |= b << 16; b |= b << 32; - emit_load_imm(mc, 1, RV_T3, (i64)b); - src = RV_T3; - } - } else { - compiler_panic(t->c, a->loc, "rv64 intrinsic: memset REG byte NYI"); - } - u32 i = 0; - while (i + 8 <= n) { emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; } - while (i + 4 <= n) { emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; } - while (i + 2 <= n) { emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; } - while (i < n) { emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; } - return; - } - default: - compiler_panic(t->c, a->loc, "rv64 intrinsic kind %d NYI", (int)kind); - } -} - -static void rv_asm_block(CGTarget* t, const char* tmpl, - const AsmConstraint* outs, u32 no, Operand* oo, - const AsmConstraint* ins, u32 ni, const Operand* io, - const Sym* clobs, u32 nc) { - (void)tmpl; (void)outs; (void)no; (void)oo; - (void)ins; (void)ni; (void)io; (void)clobs; (void)nc; - rv_panic(t, "asm_block"); -} - -static void rv_set_loc(CGTarget* t, SrcLoc l) { - ((RImpl*)t)->loc = l; - if (t->mc) t->mc->set_loc(t->mc, l); -} - -static void rv_finalize(CGTarget* t) { (void)t; } -static void rv_destroy(CGTarget* t) { (void)t; } - -static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } - -CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { - RImpl* x = arena_new(c->tu, RImpl); - memset(x, 0, sizeof *x); - - CGTarget* t = &x->base; - t->c = c; - t->obj = o; - t->mc = m; - - t->func_begin = rv_func_begin; - t->func_end = rv_func_end; - - t->alloc_reg = rv_alloc_reg; - t->free_reg = rv_free_reg; - t->frame_slot = rv_frame_slot; - t->param = rv_param; - t->clobbers = rv_clobbers; - t->spill_reg = rv_spill_reg; - t->reload_reg = rv_reload_reg; - - t->label_new = rv_label_new; - t->label_place = rv_label_place; - t->jump = rv_jump; - t->cmp_branch = rv_cmp_branch; - - t->scope_begin = rv_scope_begin; - t->scope_else = rv_scope_else; - t->scope_end = rv_scope_end; - t->break_to = rv_break_to; - t->continue_to = rv_continue_to; - - t->load_imm = rv_load_imm; - t->load_const = rv_load_const; - t->copy = rv_copy; - t->load = rv_load; - t->store = rv_store; - t->addr_of = rv_addr_of; - t->tls_addr_of = rv_tls_addr_of; - t->copy_bytes = rv_copy_bytes; - t->set_bytes = rv_set_bytes; - t->bitfield_load = rv_bitfield_load; - t->bitfield_store = rv_bitfield_store; - - t->binop = rv_binop; - t->unop = rv_unop; - t->cmp = rv_cmp; - t->convert = rv_convert; - - t->call = rv_call; - t->ret = rv_ret; - - t->alloca_ = rv_alloca_; - t->va_start_ = rv_va_start_; - t->va_arg_ = rv_va_arg_; - t->va_end_ = rv_va_end_; - t->va_copy_ = rv_va_copy_; - - t->setjmp_ = NULL; - t->longjmp_ = NULL; - - t->atomic_load = rv_atomic_load; - t->atomic_store = rv_atomic_store; - t->atomic_rmw = rv_atomic_rmw; - t->atomic_cas = rv_atomic_cas; - t->fence = rv_fence; - - t->intrinsic = rv_intrinsic; - t->asm_block = rv_asm_block; - - t->set_loc = rv_set_loc; - t->finalize = rv_finalize; - t->destroy = rv_destroy; - - (void)type_is_signed; - compiler_defer(c, cgt_cleanup, t); - return t; -} diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c @@ -0,0 +1,394 @@ +/* src/arch/rv64/alloc.c — register pool, spill/reload, labels, control flow. */ + +#include "arch/rv64/internal.h" + +/* ---- regs / frame ---- */ + +Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { + RImpl* a = impl_of(t); + (void)ty; + if (cls == RC_INT) return regpool_alloc(&a->int_pool); + if (cls == RC_FP) return regpool_alloc(&a->fp_pool); + compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls); +} + +void rv_free_reg(CGTarget* t, Reg r, RegClass cls) { + RImpl* a = impl_of(t); + RegPool* p; + switch (cls) { + case RC_INT: p = &a->int_pool; break; + case RC_FP: p = &a->fp_pool; break; + default: + compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls); + } + int rc = regpool_free(p, r); + if (rc == 1) return; + if (rc == -1) { + compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool", + (unsigned)r, cls == RC_FP ? "fp" : "int"); + } + compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool", + (unsigned)r, cls == RC_FP ? "fp" : "int"); +} + +FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) { + RImpl* a = impl_of(t); + if (a->nslots == a->slots_cap) { + u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; + RvSlot* nbuf = arena_array(t->c->tu, RvSlot, ncap); + if (a->slots) memcpy(nbuf, a->slots, sizeof(RvSlot) * a->nslots); + a->slots = nbuf; + a->slots_cap = ncap; + } + u32 size = d->size ? d->size : 8; + u32 align = d->align ? d->align : 1; + u32 next = a->cum_off + size; + u32 mask = align - 1; + next = (next + mask) & ~mask; + + RvSlot* s = &a->slots[a->nslots]; + s->off = next; + s->size = size; + s->align = align; + s->kind = d->kind; + + a->cum_off = next; + a->nslots++; + return (FrameSlot)(a->nslots); +} + +RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs) { + if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; + return &a->slots[fs - 1]; +} + +/* ---- param ---- */ + +void rv_param(CGTarget* t, const CGParamDesc* p) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + RvSlot* s = rv64_slot_get(a, p->slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 param: bad slot"); + const ABIArgInfo* ai = p->abi; + /* Caller's stack args start above the saved-s0/ra pair, plus the + * 64-byte variadic save area when this function is variadic. */ + i32 caller_stack_base = 16 + (a->is_variadic ? 64 : 0); + + if (ai->kind == ABI_ARG_IGNORE) return; + if (ai->kind == ABI_ARG_INDIRECT) { + /* Pointer-to-copy passed in a-register. Copy bytes from there into + * the home slot. Source pointer is in a0..a7. */ + u32 ptr_reg; + if (a->next_param_int < 8) { + ptr_reg = RV_A0 + a->next_param_int; + a->next_param_int++; + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + /* Incoming stack args live in the caller's outgoing-arg area, + * which is `frame_size - fp_pair_off` (= 16 + the saved-s0/ra + * pair) above s0 — same logic as aa64's `16 + caller_off`. */ + rv64_emit32(mc, rv_ld(RV_T1, RV_S0, caller_stack_base + (i32)caller_off)); + ptr_reg = RV_T1; + } + u32 nbytes = s->size; + u32 i = 0; + while (i + 8 <= nbytes) { + rv64_emit32(mc, rv_ld(RV_T2, ptr_reg, (i32)i)); + rv64_emit32(mc, rv_sd(RV_T2, RV_S0, -(i32)s->off + (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + rv64_emit32(mc, rv_lwu(RV_T2, ptr_reg, (i32)i)); + rv64_emit32(mc, rv_sw(RV_T2, RV_S0, -(i32)s->off + (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + rv64_emit32(mc, rv_lhu(RV_T2, ptr_reg, (i32)i)); + rv64_emit32(mc, rv_sh(RV_T2, RV_S0, -(i32)s->off + (i32)i)); + i += 2; + } + while (i < nbytes) { + rv64_emit32(mc, rv_lbu(RV_T2, ptr_reg, (i32)i)); + rv64_emit32(mc, rv_sb(RV_T2, RV_S0, -(i32)s->off + (i32)i)); + i += 1; + } + return; + } + /* DIRECT */ + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 part_off = pt->src_offset; + u32 sz = pt->size; + + if (pt->cls == ABI_CLASS_INT) { + if (a->next_param_int < 8) { + u32 reg = RV_A0 + a->next_param_int; + a->next_param_int++; + rv64_emit32(mc, enc_int_store(sz, reg, RV_S0, + -(i32)s->off + (i32)part_off)); + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + rv64_emit32(mc, enc_int_load(sz, 0, RV_T2, RV_S0, + caller_stack_base + (i32)caller_off)); + rv64_emit32(mc, enc_int_store(sz, RV_T2, RV_S0, + -(i32)s->off + (i32)part_off)); + } + } else if (pt->cls == ABI_CLASS_FP) { + if (a->next_param_fp < 8) { + u32 reg = a->next_param_fp; /* fa0..fa7 → freg 10..17 */ + u32 freg = 10u + reg; + a->next_param_fp++; + if (sz == 8) { + rv64_emit32(mc, rv_fsd(freg, RV_S0, -(i32)s->off + (i32)part_off)); + } else { + rv64_emit32(mc, rv_fsw(freg, RV_S0, -(i32)s->off + (i32)part_off)); + } + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + if (sz == 8) { + rv64_emit32(mc, rv_fld(0, RV_S0, caller_stack_base + (i32)caller_off)); + rv64_emit32(mc, rv_fsd(0, RV_S0, -(i32)s->off + (i32)part_off)); + } else { + rv64_emit32(mc, rv_flw(0, RV_S0, caller_stack_base + (i32)caller_off)); + rv64_emit32(mc, rv_fsw(0, RV_S0, -(i32)s->off + (i32)part_off)); + } + } + } else { + compiler_panic(t->c, a->loc, "rv64 param: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n) { + (void)c; + (void)n; + rv_panic(t, "clobbers"); +} + +void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, + MemAccess ma) { + RImpl* a = impl_of(t); + if (src.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "rv64 spill_reg: src is not OPK_REG"); + } + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + rv_store(t, addr, src, ma); + rv_free_reg(t, src.v.reg, src.cls); +} + +void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, + MemAccess ma) { + RImpl* a = impl_of(t); + if (dst.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "rv64 reload_reg: dst is not OPK_REG"); + } + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + rv_load(t, dst, addr, ma); +} + +/* ---- labels / control flow ---- */ + +Label rv_label_new(CGTarget* t) { + return (Label)t->mc->label_new(t->mc); +} +void rv_label_place(CGTarget* t, Label l) { + t->mc->label_place(t->mc, (MCLabel)l); +} +void rv_jump(CGTarget* t, Label l) { + MCEmitter* mc = t->mc; + rv64_emit32(mc, rv_jal(RV_ZERO, 0)); + mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0); +} + +/* Force an integer Operand into a register; materializes IMM via scratch. */ +u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch) { + if (op.kind == OPK_REG) return reg_num(op); + if (op.kind == OPK_IMM) { + u32 sf = type_is_64(op.type) ? 1u : 0u; + rv64_emit_load_imm(t->mc, sf, scratch, op.v.imm); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, + "rv64: operand kind %d unsupported here", (int)op.kind); +} + +/* Emit a conditional branch (a OP b) → label. Uses BEQ/BNE/BLT/BGE etc. */ +void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op, + Label l) { + MCEmitter* mc = t->mc; + RImpl* a = impl_of(t); + /* For FP compares, fall through to materialize the result and CBNZ. */ + if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) { + compiler_panic(t->c, a->loc, "rv64 cmp_branch: FP cmp NYI"); + } + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + u32 word = 0; + switch (op) { + case CMP_EQ: word = rv_beq(ra, rb, 0); break; + case CMP_NE: word = rv_bne(ra, rb, 0); break; + case CMP_LT_S: word = rv_blt(ra, rb, 0); break; + case CMP_GE_S: word = rv_bge(ra, rb, 0); break; + case CMP_LT_U: word = rv_bltu(ra, rb, 0); break; + case CMP_GE_U: word = rv_bgeu(ra, rb, 0); break; + /* >= can become < with operands swapped: a > b ↔ b < a; + * a <= b ↔ b >= a. */ + case CMP_GT_S: word = rv_blt(rb, ra, 0); break; + case CMP_LE_S: word = rv_bge(rb, ra, 0); break; + case CMP_GT_U: word = rv_bltu(rb, ra, 0); break; + case CMP_LE_U: word = rv_bgeu(rb, ra, 0); break; + default: + compiler_panic(t->c, a->loc, "rv64 cmp_branch: op %d unimpl", (int)op); + } + rv64_emit32(mc, word); + mc->emit_label_ref(mc, (MCLabel)l, R_RV_BRANCH, 4, 0); +} + +/* Materialize 0/1 into dst from a comparison. */ +void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op, + Operand b_op) { + MCEmitter* mc = t->mc; + RImpl* a = impl_of(t); + u32 rd = reg_num(dst); + + if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) { + /* FP compare in fa,fb → rd. Use FLT/FLE/FEQ depending on op. */ + int is_d = type_is_fp_double(a_op.type); + u32 fa = reg_num(a_op); + u32 fb = reg_num(b_op); + switch (op) { + case CMP_LT_F: rv64_emit32(mc, is_d ? rv_flt_d(rd, fa, fb) : rv_flt_s(rd, fa, fb)); return; + case CMP_LE_F: rv64_emit32(mc, is_d ? rv_fle_d(rd, fa, fb) : rv_fle_s(rd, fa, fb)); return; + case CMP_GT_F: rv64_emit32(mc, is_d ? rv_flt_d(rd, fb, fa) : rv_flt_s(rd, fb, fa)); return; + case CMP_GE_F: rv64_emit32(mc, is_d ? rv_fle_d(rd, fb, fa) : rv_fle_s(rd, fb, fa)); return; + default: break; + } + } + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + + switch (op) { + case CMP_EQ: + rv64_emit32(mc, rv_sub(rd, ra, rb)); + rv64_emit32(mc, rv_sltiu(rd, rd, 1)); + return; + case CMP_NE: + rv64_emit32(mc, rv_sub(rd, ra, rb)); + rv64_emit32(mc, rv_sltu(rd, RV_ZERO, rd)); + return; + case CMP_LT_S: rv64_emit32(mc, rv_slt(rd, ra, rb)); return; + case CMP_LT_U: rv64_emit32(mc, rv_sltu(rd, ra, rb)); return; + case CMP_GT_S: rv64_emit32(mc, rv_slt(rd, rb, ra)); return; + case CMP_GT_U: rv64_emit32(mc, rv_sltu(rd, rb, ra)); return; + case CMP_GE_S: + rv64_emit32(mc, rv_slt(rd, ra, rb)); + rv64_emit32(mc, rv_xori(rd, rd, 1)); + return; + case CMP_GE_U: + rv64_emit32(mc, rv_sltu(rd, ra, rb)); + rv64_emit32(mc, rv_xori(rd, rd, 1)); + return; + case CMP_LE_S: + rv64_emit32(mc, rv_slt(rd, rb, ra)); + rv64_emit32(mc, rv_xori(rd, rd, 1)); + return; + case CMP_LE_U: + rv64_emit32(mc, rv_sltu(rd, rb, ra)); + rv64_emit32(mc, rv_xori(rd, rd, 1)); + return; + default: + compiler_panic(t->c, a->loc, "rv64 cmp: op %d unimpl", (int)op); + } +} + +/* ---- structured scopes (SCOPE_IF + SCOPE_LOOP/BLOCK bookkeep) ---- */ + +CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d) { + RImpl* a = impl_of(t); + if (a->nscopes == a->scopes_cap) { + u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; + RvScope* nb = arena_array(t->c->tu, RvScope, ncap); + if (a->scopes) memcpy(nb, a->scopes, sizeof(RvScope) * a->nscopes); + a->scopes = nb; + a->scopes_cap = ncap; + } + RvScope* sc = &a->scopes[a->nscopes]; + sc->kind = (u8)d->kind; + sc->has_else = 0; + sc->else_label = 0; + sc->end_label = 0; + sc->break_label = d->break_label; + sc->continue_label = d->continue_label; + + if (d->kind == SCOPE_IF) { + sc->else_label = t->mc->label_new(t->mc); + sc->end_label = t->mc->label_new(t->mc); + u32 rn = rv64_force_reg_int(t, d->cond, RV_T0); + /* beq rn, x0, else_label */ + rv64_emit32(t->mc, rv_beq(rn, RV_ZERO, 0)); + t->mc->emit_label_ref(t->mc, sc->else_label, R_RV_BRANCH, 4, 0); + } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { + /* bookkeep only */ + } else { + compiler_panic(t->c, a->loc, + "rv64 scope_begin: kind %d not yet implemented", + (int)d->kind); + } + a->nscopes++; + return (CGScope)a->nscopes; +} + +void rv_scope_else(CGTarget* t, CGScope s) { + RImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "rv64 scope_else: bad scope"); + } + RvScope* sc = &a->scopes[s - 1]; + /* jump end ; place else */ + rv64_emit32(t->mc, rv_jal(RV_ZERO, 0)); + t->mc->emit_label_ref(t->mc, sc->end_label, R_RV_JAL, 4, 0); + t->mc->label_place(t->mc, sc->else_label); + sc->has_else = 1; +} + +void rv_scope_end(CGTarget* t, CGScope s) { + RImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "rv64 scope_end: bad scope"); + } + RvScope* sc = &a->scopes[s - 1]; + if (sc->kind == SCOPE_IF) { + if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label); + t->mc->label_place(t->mc, sc->end_label); + } +} + +void rv_break_to(CGTarget* t, CGScope s) { + RImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "rv64 break_to: bad scope"); + } + rv_jump(t, a->scopes[s - 1].break_label); +} + +void rv_continue_to(CGTarget* t, CGScope s) { + RImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "rv64 continue_to: bad scope"); + } + rv_jump(t, a->scopes[s - 1].continue_label); +} diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c @@ -0,0 +1,332 @@ +/* src/arch/rv64/emit.c — immediate encoding, function lifecycle, frame setup. */ + +#include "arch/rv64/internal.h" + +void rv64_emit32(MCEmitter* mc, u32 word) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 b[4]; + b[0] = (u8)(word & 0xff); + b[1] = (u8)((word >> 8) & 0xff); + b[2] = (u8)((word >> 16) & 0xff); + b[3] = (u8)((word >> 24) & 0xff); + mc->emit_bytes(mc, b, 4); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void rv64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) { + u8 b[4]; + b[0] = (u8)(word & 0xff); + b[1] = (u8)((word >> 8) & 0xff); + b[2] = (u8)((word >> 16) & 0xff); + b[3] = (u8)((word >> 24) & 0xff); + obj_patch(obj, sec_id, ofs, b, 4); +} + +_Noreturn void rv_panic(CGTarget* t, const char* what) { + SrcLoc loc = impl_of(t)->loc; + compiler_panic(t->c, loc, "rv64: %s not implemented", what); +} + +int fits_signed32(i64 v) { return v >= (i64)(i32)0x80000000 && v <= (i64)(i32)0x7fffffff; } + +void emit_li_32(MCEmitter* mc, u32 rd, i32 imm) { + if (imm >= -2048 && imm <= 2047) { + rv64_emit32(mc, rv_addi(rd, RV_ZERO, imm)); + return; + } + /* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */ + i32 hi = (i32)((u32)(imm + 0x800) >> 12); + i32 lo = (i32)((i32)imm - (i32)(hi << 12)); + rv64_emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu)); + if (lo) rv64_emit32(mc, rv_addiw(rd, rd, lo)); +} + +void rv64_emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm) { + if (!sf) { + /* 32-bit destination: low 32 bits, sign-extended. */ + emit_li_32(mc, rd, (i32)imm); + return; + } + if (fits_signed32(imm)) { + emit_li_32(mc, rd, (i32)imm); + return; + } + /* General 64-bit load: split into high and low 32 bits, place high + * into rd << 32, then OR in low via a temp register (t0=x5). The cg + * corpus has no IMM operands that collide with t0, so this is safe. */ + i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */ + i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */ + if (hi64 < (i64)(i32)0x80000000 || + hi64 > (i64)(i32)0x7fffffff) { + /* Out of i32 range — fallback: use a smaller chunked approach. + * For the cg corpus this isn't hit; emit a conservative sequence: + * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */ + i32 hi32 = (i32)(imm >> 32); + i32 lo32_i = (i32)imm; + emit_li_32(mc, rd, hi32); + rv64_emit32(mc, rv_slli(rd, rd, 32)); + emit_li_32(mc, RV_T0, lo32_i); + /* zero-extend t0 to clear sign-extension before OR */ + rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32)); + rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32)); + rv64_emit32(mc, rv_or(rd, rd, RV_T0)); + return; + } + emit_li_32(mc, rd, (i32)hi64); + rv64_emit32(mc, rv_slli(rd, rd, 32)); + if (lo32 != 0) { + emit_li_32(mc, RV_T0, (i32)lo32); + rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32)); + rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32)); + rv64_emit32(mc, rv_or(rd, rd, RV_T0)); + } +} + +/* sp += imm. imm can be any signed value the caller passes — we pick + * the shortest sequence. */ +void emit_sp_addi(MCEmitter* mc, i64 imm) { + if (imm >= -2048 && imm <= 2047) { + rv64_emit32(mc, rv_addi(RV_SP, RV_SP, (i32)imm)); + return; + } + rv64_emit_load_imm(mc, 1, RV_T0, imm); + rv64_emit32(mc, rv_add(RV_SP, RV_SP, RV_T0)); +} + +/* ---- function lifecycle ---- */ + +void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + mc->set_section(mc, fd->text_section_id); + mc->emit_align(mc, 4, 0); + + a->fd = fd; + a->func_start = mc->pos(mc); + a->next_param_int = 0; + a->next_param_fp = 0; + a->next_param_stack = 0; + a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; + a->cum_off = 0; + a->max_outgoing = 0; + a->fp_pair_off = 0; + regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */ + regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */ + a->nslots = 0; + a->nscopes = 0; + a->has_alloca = 0; + a->nadd_patches = 0; + a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; + a->gp_save_slot = FRAME_SLOT_NONE; + a->sret_ptr_slot = FRAME_SLOT_NONE; + a->epilogue_label = mc->label_new(mc); + + mc->cfi_startproc(mc); + + /* Reserve a NOP-filled prologue placeholder; func_end patches it. */ + a->prologue_pos = mc->pos(mc); + for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) rv64_emit32(mc, RV_NOP); + + /* For an sret return, the caller passed the destination pointer in + * a0; reserve a hidden slot to spill it into so the body can use a0 + * freely. The actual SD a0, ...(s0) is emitted in the patched + * prologue once the slot offset is known. */ + if (a->has_sret) { + FrameSlotDesc fsd = { + .type = NULL, + .name = 0, + .loc = (SrcLoc){0, 0, 0}, + .size = 8, + .align = 8, + .kind = FS_SPILL, + .flags = 0, + }; + a->sret_ptr_slot = rv_frame_slot(t, &fsd); + /* Consume a0 — it is no longer available for the first real param. */ + a->next_param_int = 1; + } + + /* Variadic: a 64-byte GP save area for a0..a7 lives at the very top + * of the frame, immediately above the saved-s0/ra pair, so its bytes + * are contiguous with the caller's stack args. The patcher spills the + * unnamed a-regs into it as part of the prologue. The slot is implicit + * (not allocated through rv_frame_slot) — it sits at [s0 + 16] when + * is_variadic is set. */ +} + +void rv_func_end(CGTarget* t) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + ObjBuilder* obj = t->obj; + u32 sec = a->fd->text_section_id; + + u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */ + u32 n_fp_saves = a->fp_pool.hwm; + u32 max_out = (a->max_outgoing + 15u) & ~15u; + u32 int_saves_sz = n_int_saves * 8u; + u32 fp_saves_sz = n_fp_saves * 8u; + + /* Variadic functions reserve a 64-byte save area at the very top of + * the frame so the save area and caller's stack args form a single + * contiguous byte stream walked by the va_list pointer. */ + u32 va_save_sz = a->is_variadic ? 64u : 0u; + u32 locals_off = max_out + int_saves_sz + fp_saves_sz; /* from sp */ + u32 fp_pair_off = locals_off + a->cum_off; + u32 frame_size = fp_pair_off + 16u + va_save_sz; + frame_size = (frame_size + 15u) & ~15u; + fp_pair_off = frame_size - 16u - va_save_sz; + a->fp_pair_off = fp_pair_off; + + /* Place the epilogue label at current pos. */ + mc->label_place(mc, a->epilogue_label); + + /* Restore int and fp saves using s0-relative addressing so they + * don't depend on the final frame_size encoding (and survive + * alloca-induced sp shifts). */ + /* layout below s0: + * s0 - 8 .. s0 - 16 saved s0/ra ? No — those are at sp+fp_pair_off + * We arranged saved-s0/ra at [sp+fp_pair_off], not below s0. So + * immediately below s0 are: int saves, then fp saves, then locals. + * Wait — let me recompute. + * + * sp + 0 outgoing args (max_out bytes) + * sp + max_out int saves + * sp + max_out + I fp saves + * sp + max_out+I+F locals (cum_off) + * sp + fp_pair_off saved s0_caller (8) + * sp + fp_pair_off+8 saved ra (8) + * sp + frame_size end + * + * s0 = sp + fp_pair_off (so [s0+0] = saved s0_caller). + * Locals at [s0 - off] where off in [1..cum_off]. + * FP saves at [s0 - cum_off - 8*i]. + * Int saves at [s0 - cum_off - F - 8*i]. */ + /* Save slots sit at the start of an 8-byte cell below the locals + * area. fp_save_base = offset of the first fp save (=-(L+8)); each + * subsequent save is 8 bytes lower. int saves start below the fp + * block. */ + i32 fp_save_base = -(i32)a->cum_off - 8; + i32 int_save_base = fp_save_base - (i32)fp_saves_sz; + + /* Reverse order: ints first (lowest address) on restore, but we emit + * the restore loop in reverse to keep the prologue/epilogue symmetric. */ + for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { + u32 r = 18u + (u32)i; /* s2 + i */ + i32 off = int_save_base - 8 * (i32)i; + rv64_emit32(mc, rv_ld(r, RV_S0, off)); + } + for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { + u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */ + i32 off = fp_save_base - 8 * (i32)i; + rv64_emit32(mc, rv_fld(r, RV_S0, off)); + } + /* Restore sp from s0 first so alloca-induced offsets don't matter. + * After this, sp == its post-prologue value. */ + if (a->has_alloca) { + if ((i32)fp_pair_off > 2047) { + compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large for alloca"); + } + rv64_emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fp_pair_off)); + } + rv64_emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fp_pair_off)); + rv64_emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fp_pair_off + 8)); + emit_sp_addi(mc, (i64)frame_size); + rv64_emit32(mc, rv_ret_()); + + /* Now patch the prologue placeholder. */ + u32 pos = a->prologue_pos; + u32 words[RV_PROLOGUE_WORDS]; + for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) words[i] = RV_NOP; + u32 wi = 0; + + /* addi sp, sp, -frame_size (or 2-insn if too large) */ + if ((i64)frame_size <= 2048) { + words[wi++] = rv_addi(RV_SP, RV_SP, -(i32)frame_size); + } else { + /* li t0, -frame_size; add sp, sp, t0 */ + /* Use a small two-instruction expansion via LUI+ADDI if it fits 32-bit; + * otherwise we'd need a full load_imm but that's overkill for tests. */ + i64 neg = -(i64)frame_size; + if (fits_signed32(neg)) { + i32 hi = (i32)((u32)((i32)neg + 0x800) >> 12); + i32 lo = (i32)neg - (hi << 12); + words[wi++] = rv_lui(RV_T0, (u32)hi & 0xfffffu); + if (lo) words[wi++] = rv_addiw(RV_T0, RV_T0, lo); + words[wi++] = rv_add(RV_SP, RV_SP, RV_T0); + } else { + compiler_panic(t->c, a->loc, "rv64: frame_size too large to patch"); + } + } + /* sd s0, fp_pair_off(sp); sd ra, fp_pair_off+8(sp); addi s0, sp, fp_pair_off */ + if ((i32)fp_pair_off > 2047 || (i32)(fp_pair_off + 8) > 2047) { + compiler_panic(t->c, a->loc, "rv64: fp_pair_off out of imm12 range"); + } + words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fp_pair_off); + words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fp_pair_off + 8); + words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fp_pair_off); + + /* If sret, spill incoming a0 into the hidden slot. */ + if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { + RvSlot* s = rv64_slot_get(a, a->sret_ptr_slot); + if (s) { + if (wi >= RV_PROLOGUE_WORDS) goto overflow; + words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off); + } + } + /* Variadic: spill the still-unconsumed a-regs (a_{nparams_int}..a7) + * into the save area at [s0 + 16 + i*8]. The save area sits between + * the saved-s0/ra pair and the caller's stack args, so save_area[8] + * == caller's first stack arg. */ + if (a->is_variadic) { + for (u32 i = a->next_param_int; i < 8; ++i) { + if (wi >= RV_PROLOGUE_WORDS) goto overflow; + words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8); + } + } + /* int saves */ + for (u32 i = 0; i < n_int_saves; ++i) { + u32 r = 18u + i; + i32 off = int_save_base - 8 * (i32)i; + if (wi >= RV_PROLOGUE_WORDS) goto overflow; + words[wi++] = rv_sd(r, RV_S0, off); + } + /* fp saves */ + for (u32 i = 0; i < n_fp_saves; ++i) { + u32 r = 18u + i; + i32 off = fp_save_base - 8 * (i32)i; + if (wi >= RV_PROLOGUE_WORDS) goto overflow; + words[wi++] = rv_fsd(r, RV_S0, off); + } + if (0) { + overflow: + compiler_panic(t->c, a->loc, + "rv64: prologue placeholder too small (used %u of %u)", wi, + RV_PROLOGUE_WORDS); + } + + for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) { + rv64_patch32(obj, sec, pos + i * 4u, words[i]); + } + + /* Patch alloca placeholders with max_outgoing. */ + if (max_out > 2047u) { + compiler_panic(t->c, a->loc, + "rv64: max_outgoing %u out of imm12 for alloca patch", + max_out); + } + for (u32 i = 0; i < a->nadd_patches; ++i) { + u32 dr = a->add_patches[i].dst_reg; + u32 word = rv_addi(dr, RV_SP, (i32)max_out); + rv64_patch32(obj, sec, a->add_patches[i].pos, word); + } + + /* Define the function symbol. */ + u32 end = mc->pos(mc); + obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start, + (u64)(end - a->func_start)); + + mc->cfi_endproc(mc); + a->fd = NULL; +} + diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -0,0 +1,222 @@ +/* src/arch/rv64/internal.h — private header shared by emit.c, alloc.c, ops.c. + * Do not include from outside src/arch/rv64/. */ +#pragma once + +#include <string.h> + +#include "arch/arch.h" +#include "arch/rv64.h" +#include "arch/rv64_isa.h" +#include "core/arena.h" +#include "obj/obj.h" +#include "type/type.h" + +#define RV_PROLOGUE_WORDS 32u + +/* ---- RegPool ---- */ +typedef struct RegPool { + u32 free; + u32 hwm; + u8 base; + u8 nregs; + u8 pad[2]; +} RegPool; + +/* ---- RvSlot / RvScope ---- */ +typedef struct RvSlot { + u32 off; /* bytes below s0 (positive); address = s0 - off */ + u32 size; + u32 align; + u8 kind; + u8 pad[3]; +} RvSlot; + +typedef struct RvScope { + u8 kind; + u8 has_else; + u8 pad[2]; + MCLabel else_label; + MCLabel end_label; + Label break_label; + Label continue_label; +} RvScope; + +/* ---- RImpl ---- */ +typedef struct RImpl { + CGTarget base; + SrcLoc loc; + const CGFuncDesc* fd; + + u32 func_start; + u32 prologue_pos; + MCLabel epilogue_label; + + RvSlot* slots; + u32 nslots; + u32 slots_cap; + u32 cum_off; + u32 max_outgoing; + u32 fp_pair_off; + + u32 next_param_int; + u32 next_param_fp; + u32 next_param_stack; + u8 has_sret; + FrameSlot sret_ptr_slot; + + RegPool int_pool; + RegPool fp_pool; + + RvScope* scopes; + u32 nscopes; + u32 scopes_cap; + + u8 has_alloca; + struct RvAllocaPatch { + u32 pos; + u32 dst_reg; + }* add_patches; + u32 nadd_patches; + u32 add_patches_cap; + + u8 is_variadic; + FrameSlot gp_save_slot; +} RImpl; + +/* ---- impl_of ---- */ +static inline RImpl* impl_of(CGTarget* t) { return (RImpl*)t; } + +/* ---- type helpers ---- */ +static inline int type_is_64(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 1; + default: + return 0; + } +} +static inline int type_is_fp_double(const Type* t) { + return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); +} +static inline u32 type_byte_size(const Type* t) { + if (!t) return 4; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + case TY_BOOL: + return 1; + case TY_SHORT: + case TY_USHORT: + return 2; + case TY_INT: + case TY_UINT: + case TY_FLOAT: + return 4; + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 8; + default: + return 8; + } +} +static inline int type_is_signed(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_LLONG: + return 1; + default: + return 0; + } +} + +static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } + +/* ---- RegPool ops (inlined — identical in each caller) ---- */ +static inline void regpool_init(RegPool* p, u8 base, u8 nregs) { + p->base = base; + p->nregs = nregs; + p->hwm = 0; + p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); +} +static inline Reg regpool_alloc(RegPool* p) { + if (p->free == 0) return (Reg)REG_NONE; + u32 idx = (u32)__builtin_ctz(p->free); + p->free &= ~(1u << idx); + if (idx + 1u > p->hwm) p->hwm = idx + 1u; + return (Reg)(p->base + idx); +} +static inline int regpool_free(RegPool* p, Reg r) { + u32 rn = (u32)r; + if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0; + u32 idx = rn - p->base; + u32 bit = 1u << idx; + if (p->free & bit) return -1; + p->free |= bit; + return 1; +} + +/* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */ +void rv_func_begin(CGTarget* t, const CGFuncDesc* fd); +void rv_func_end(CGTarget* t); + +/* ---- emit helpers (defined in emit.c, used cross-file) ---- */ +extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); + +void rv64_emit32(MCEmitter* mc, u32 word); +void rv64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word); +int fits_signed32(i64 v); +void emit_li_32(MCEmitter* mc, u32 rd, i32 imm); +void rv64_emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm); +void emit_sp_addi(MCEmitter* mc, i64 imm); +_Noreturn void rv_panic(CGTarget* t, const char* what); + +/* ---- alloc.c: all functions (non-static; referenced by ops.c vtable) ---- */ +Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty); +void rv_free_reg(CGTarget* t, Reg r, RegClass cls); +FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d); +RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs); +void rv_param(CGTarget* t, const CGParamDesc* p); +const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n); +void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); +void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); +Label rv_label_new(CGTarget* t); +void rv_label_place(CGTarget* t, Label l); +void rv_jump(CGTarget* t, Label l); +u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch); +void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op, Label l); +void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op, Operand b_op); +CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d); +void rv_scope_else(CGTarget* t, CGScope s); +void rv_scope_end(CGTarget* t, CGScope s); +void rv_break_to(CGTarget* t, CGScope s); +void rv_continue_to(CGTarget* t, CGScope s); + +/* ---- ops.c: functions used cross-file ---- */ +void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); +void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); +u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off); +u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off); +u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg); +void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off); +ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos); +void rv64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym); +u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch); +int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym); +int mem_order_is_acquire(MemOrder o); +int mem_order_is_release(MemOrder o); diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -0,0 +1,1840 @@ +/* src/arch/rv64/ops.c — data movement, arithmetic, calls, atomics, vtable. */ + +#include "arch/rv64/internal.h" + +/* ---- For a memory access of `nbytes`, pick the right store opcode. ---- */ +u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off) { + switch (nbytes) { + case 1: return rv_sb(src, base, off); + case 2: return rv_sh(src, base, off); + case 4: return rv_sw(src, base, off); + default: return rv_sd(src, base, off); + } +} +u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off) { + switch (nbytes) { + case 1: return sign_ext ? rv_lb(rd, base, off) : rv_lbu(rd, base, off); + case 2: return sign_ext ? rv_lh(rd, base, off) : rv_lhu(rd, base, off); + case 4: return sign_ext ? rv_lw(rd, base, off) : rv_lwu(rd, base, off); + default: return rv_ld(rd, base, off); + } +} + +/* ---- data movement ---- */ + +static void rv_load_imm(CGTarget* t, Operand dst, i64 imm) { + u32 sf = type_is_64(dst.type) ? 1u : 0u; + rv64_emit_load_imm(t->mc, sf, reg_num(dst), imm); +} + +static void rv_load_const(CGTarget* t, Operand dst, ConstBytes cb) { + RImpl* a = impl_of(t); + if (dst.cls != RC_FP) { + compiler_panic(t->c, a->loc, "rv64 load_const: only FP supported in v1"); + } + Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); + ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); + + u32 cur_section = t->mc->section_id; + t->mc->set_section(t->mc, ro); + u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); + t->mc->emit_bytes(t->mc, cb.bytes, cb.size); + + char namebuf[64]; + static u32 lit_seq = 0; + int len = 0; + { + const char* prefix = ".LCFP"; + for (; prefix[len]; ++len) namebuf[len] = prefix[len]; + u32 v = lit_seq++; + char tmp[16]; + int tn = 0; + if (v == 0) tmp[tn++] = '0'; + else { + while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } + } + for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; + namebuf[len] = 0; + } + Sym sname = pool_intern_cstr(t->c->global, namebuf); + ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, + (u64)cb.size); + t->mc->set_section(t->mc, cur_section); + + /* auipc t0, %pcrel_hi(sym) ; flw/fld dst, %pcrel_lo(...)(t0) + * The LO12_I reloc references the AUIPC's site address (a label/sym + * placed at the AUIPC). For simplicity we make a local symbol at the + * AUIPC and bind LO12_I to it. */ + u32 sec = t->mc->section_id; + u32 auipc_pos = t->mc->pos(t->mc); + rv64_emit32(t->mc, rv_auipc(RV_T0, 0)); + t->mc->emit_reloc_at(t->mc, sec, auipc_pos, R_RV_PCREL_HI20, sym, 0, 0, 0); + /* Create a local symbol at the AUIPC site to anchor PCREL_LO12. */ + char anchor_buf[64]; + int al = 0; + { + const char* p2 = ".LpcrelHi"; + for (; p2[al]; ++al) anchor_buf[al] = p2[al]; + static u32 seq2 = 0; + u32 v = seq2++; + char tmp[16]; int tn = 0; + if (v == 0) tmp[tn++] = '0'; + else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } } + for (int i = tn - 1; i >= 0; --i) anchor_buf[al++] = tmp[i]; + anchor_buf[al] = 0; + } + Sym aname = pool_intern_cstr(t->c->global, anchor_buf); + ObjSymId anchor = obj_symbol(t->obj, aname, SB_LOCAL, SK_OBJ, sec, + (u64)auipc_pos, 0); + u32 lpos = t->mc->pos(t->mc); + if (cb.size == 8) { + rv64_emit32(t->mc, rv_fld(reg_num(dst), RV_T0, 0)); + } else { + rv64_emit32(t->mc, rv_flw(reg_num(dst), RV_T0, 0)); + } + t->mc->emit_reloc_at(t->mc, sec, lpos, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); +} + +static void rv_copy(CGTarget* t, Operand dst, Operand src) { + if (dst.cls == RC_FP || src.cls == RC_FP) { + u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S; + /* fmv.fmt rd, rs = fsgnj.fmt rd, rs, rs */ + u32 r = reg_num(src); + rv64_emit32(t->mc, rv_fsgnj(fmt, reg_num(dst), r, r)); + return; + } + /* mv rd, rs = addi rd, rs, 0 (works for both 32 and 64-bit copies) */ + rv64_emit32(t->mc, rv_addi(reg_num(dst), reg_num(src), 0)); +} + +/* ---- address resolution ---- */ + +/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into + * `tmp_reg`. Returns the register holding the base and writes the + * effective signed offset to *out_off (0 when we synthesized into tmp). + * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */ +u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { + RImpl* a = impl_of(t); + if (addr.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot"); + i32 off = -(i32)s->off; + if (off >= -2048 && off <= 2047) { + *out_off = off; + return RV_S0; + } + rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off); + rv64_emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg)); + *out_off = 0; + return tmp_reg; + } + if (addr.kind == OPK_INDIRECT) { + i32 off = addr.v.ind.ofs; + u32 base = addr.v.ind.base & 0x1f; + if (off >= -2048 && off <= 2047) { + *out_off = off; + return base; + } + rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off); + rv64_emit32(t->mc, rv_add(tmp_reg, base, tmp_reg)); + *out_off = 0; + return tmp_reg; + } + compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported", + (int)addr.kind); +} + +int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) { + return obj_symbol_extern_via_got(t->c, t->obj, sym); +} + +/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a + * fresh local sym; the paired LO12 reloc references the anchor. */ +ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) { + char buf[64]; + int len = 0; + const char* p = ".LpcrelHi"; + for (; p[len]; ++len) buf[len] = p[len]; + static u32 seq = 0; + u32 v = seq++; + char tmp[16]; int tn = 0; + if (v == 0) tmp[tn++] = '0'; + else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } } + for (int i = tn - 1; i >= 0; --i) buf[len++] = tmp[i]; + buf[len] = 0; + Sym n = pool_intern_cstr(t->c->global, buf); + return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0); +} + +/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`, + * leaving the runtime address of `sym` (the GOT slot's contents) in + * `dst_reg`. Addends are omitted from the GOT relocs — most loaders + * disallow nonzero addends on GOT-load fixups — so callers apply any + * displacement with a follow-on ADDI/ADD against the loaded base. */ +void rv64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(dst_reg, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0); + ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); + u32 lp = mc->pos(mc); + rv64_emit32(mc, rv_ld(dst_reg, dst_reg, 0)); + mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); +} + +/* Add a signed displacement `off` to `base`, writing into `rd`. Uses + * ADDI for ±2047, otherwise materializes the offset via rv64_emit_load_imm + * + ADD. Mirrors rv64_emit_addr_adjust in aarch64.c. */ +void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) { + if (off == 0) { + if (rd != base) rv64_emit32(mc, rv_addi(rd, base, 0)); + return; + } + if (off >= -2048 && off <= 2047) { + rv64_emit32(mc, rv_addi(rd, base, off)); + return; + } + rv64_emit_load_imm(mc, 1, RV_T1, (i64)off); + rv64_emit32(mc, rv_add(rd, base, RV_T1)); +} + +void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + MCEmitter* mc = t->mc; + + if (addr.kind == OPK_GLOBAL) { + u32 sec = mc->section_id; + ObjSymId sym = addr.v.global.sym; + i64 add = addr.v.global.addend; + /* Extern-via-GOT path: load &sym from GOT, then load the value at + * +addend (addend baked into the data load's imm12; relies on the + * common case of `add` fitting ±2047 — larger addends would need a + * follow-on ADD). */ + if (rv64_use_got_for_sym(t, sym)) { + rv64_emit_got_load_addr(t, RV_T0, sym); + i32 ao = (i32)add; + if (dst.cls == RC_FP) { + if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), RV_T0, ao)); + else rv64_emit32(mc, rv_flw(reg_num(dst), RV_T0, ao)); + } else { + int sx = type_is_signed(addr.type); + rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao)); + } + return; + } + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(RV_T0, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); + ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); + u32 lp = mc->pos(mc); + if (dst.cls == RC_FP) { + if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), RV_T0, 0)); + else rv64_emit32(mc, rv_flw(reg_num(dst), RV_T0, 0)); + } else { + int sx = type_is_signed(addr.type); + rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, 0)); + } + mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); + return; + } + + i32 off; + u32 base = addr_base(t, addr, &off, RV_T0); + if (dst.cls == RC_FP) { + if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), base, off)); + else rv64_emit32(mc, rv_flw(reg_num(dst), base, off)); + } else { + int sx = type_is_signed(addr.type); + rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off)); + } +} + +void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + MCEmitter* mc = t->mc; + + if (addr.kind == OPK_GLOBAL) { + u32 sec = mc->section_id; + ObjSymId sym = addr.v.global.sym; + i64 add = addr.v.global.addend; + u32 src_reg; + int src_fp = 0; + if (src.kind == OPK_IMM) { + u32 sf = (sz == 8) ? 1u : 0u; + rv64_emit_load_imm(mc, sf, RV_T1, src.v.imm); + src_reg = RV_T1; + } else if (src.cls == RC_FP) { + src_reg = reg_num(src); + src_fp = 1; + } else { + src_reg = reg_num(src); + } + /* Extern-via-GOT path: load &sym from GOT into t0, then store with + * addend baked into the imm12 (no reloc on the store). */ + if (rv64_use_got_for_sym(t, sym)) { + rv64_emit_got_load_addr(t, RV_T0, sym); + i32 ao = (i32)add; + if (src_fp) { + if (sz == 8) rv64_emit32(mc, rv_fsd(src_reg, RV_T0, ao)); + else rv64_emit32(mc, rv_fsw(src_reg, RV_T0, ao)); + } else { + rv64_emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao)); + } + return; + } + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(RV_T0, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); + ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); + u32 sp_pos = mc->pos(mc); + if (src_fp) { + if (sz == 8) rv64_emit32(mc, rv_fsd(src_reg, RV_T0, 0)); + else rv64_emit32(mc, rv_fsw(src_reg, RV_T0, 0)); + } else { + rv64_emit32(mc, enc_int_store(sz, src_reg, RV_T0, 0)); + } + mc->emit_reloc_at(mc, sec, sp_pos, R_RV_PCREL_LO12_S, anchor, 0, 0, 0); + return; + } + + i32 off; + u32 base = addr_base(t, addr, &off, + (src.kind == OPK_IMM) ? RV_T1 : RV_T0); + if (src.kind == OPK_IMM) { + u32 sf = (sz == 8) ? 1u : 0u; + rv64_emit_load_imm(mc, sf, RV_T0, src.v.imm); + rv64_emit32(mc, enc_int_store(sz, RV_T0, base, off)); + return; + } + if (src.cls == RC_FP) { + if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), base, off)); + else rv64_emit32(mc, rv_fsw(reg_num(src), base, off)); + } else { + rv64_emit32(mc, enc_int_store(sz, reg_num(src), base, off)); + } +} + +static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 rd = reg_num(dst); + if (lv.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, lv.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 addr_of: bad slot"); + i32 off = -(i32)s->off; + if (off >= -2048 && off <= 2047) { + rv64_emit32(mc, rv_addi(rd, RV_S0, off)); + } else { + rv64_emit_load_imm(mc, 1, rd, (i64)off); + rv64_emit32(mc, rv_add(rd, RV_S0, rd)); + } + return; + } + if (lv.kind == OPK_INDIRECT) { + i32 ofs = lv.v.ind.ofs; + u32 base = lv.v.ind.base & 0x1f; + if (ofs >= -2048 && ofs <= 2047) { + rv64_emit32(mc, rv_addi(rd, base, ofs)); + } else { + rv64_emit_load_imm(mc, 1, rd, (i64)ofs); + rv64_emit32(mc, rv_add(rd, base, rd)); + } + return; + } + if (lv.kind == OPK_GLOBAL) { + ObjSymId sym = lv.v.global.sym; + i64 addend = lv.v.global.addend; + /* Extern-via-GOT path: GOT load yields &sym directly; apply any + * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */ + if (rv64_use_got_for_sym(t, sym)) { + rv64_emit_got_load_addr(t, rd, sym); + if (addend) rv64_emit_addr_adjust(mc, rd, rd, (i32)addend); + return; + } + u32 sec = mc->section_id; + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(rd, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0); + ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); + u32 ip = mc->pos(mc); + rv64_emit32(mc, rv_addi(rd, rd, 0)); + mc->emit_reloc_at(mc, sec, ip, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); + return; + } + rv_panic(t, "addr_of"); +} + +static void rv_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { + /* TLS Local-Exec: lui tmp, %tprel_hi(sym); add tmp, tp, tmp; addi dst, + * tmp, %tprel_lo(sym). Uses R_RV_TPREL_HI20 / R_RV_TPREL_LO12_I. */ + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 rd = reg_num(dst); + u32 hp = mc->pos(mc); + rv64_emit32(mc, rv_lui(RV_T0, 0)); + mc->emit_reloc_at(mc, sec, hp, R_RV_TPREL_HI20, sym, addend, 0, 0); + rv64_emit32(mc, rv_add(RV_T0, RV_TP, RV_T0)); + u32 lp = mc->pos(mc); + rv64_emit32(mc, rv_addi(rd, RV_T0, 0)); + mc->emit_reloc_at(mc, sec, lp, R_RV_TPREL_LO12_I, sym, addend, 0, 0); +} + +/* ---- aggregate ops ---- */ + +u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { + RImpl* a = impl_of(t); + if (op.kind == OPK_REG) return reg_num(op); + if (op.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, op.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 agg: bad slot"); + i32 off = -(i32)s->off; + if (off >= -2048 && off <= 2047) { + rv64_emit32(t->mc, rv_addi(scratch, RV_S0, off)); + } else { + rv64_emit_load_imm(t->mc, 1, scratch, (i64)off); + rv64_emit32(t->mc, rv_add(scratch, RV_S0, scratch)); + } + return scratch; + } + compiler_panic(t->c, a->loc, "rv64 agg: address kind %d unsupported", + (int)op.kind); +} + +static void rv_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr, + AggregateAccess agg) { + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, RV_T0); + u32 sr = agg_addr_reg(t, src_addr, (dr == RV_T1) ? RV_T2 : RV_T1); + u32 n = agg.size; + u32 i = 0; + while (i + 8 <= n) { + rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i)); + rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i)); + i += 8; + } + while (i + 4 <= n) { + rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); + rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i)); + i += 4; + } + while (i + 2 <= n) { + rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); + rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i)); + i += 2; + } + while (i < n) { + rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); + rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i)); + i += 1; + } +} + +static void rv_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value, + AggregateAccess agg) { + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, RV_T0); + u32 byte; + if (byte_value.kind == OPK_IMM) { + byte = (u32)(byte_value.v.imm & 0xffu); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "rv64 set_bytes: REG byte NYI"); + } + u32 n = agg.size; + u32 src; + if (byte == 0) { + src = RV_ZERO; + } else { + u64 b = byte; + b |= b << 8; b |= b << 16; b |= b << 32; + rv64_emit_load_imm(mc, 1, RV_T3, (i64)b); + src = RV_T3; + } + u32 i = 0; + while (i + 8 <= n) { rv64_emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; } + while (i + 4 <= n) { rv64_emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; } + while (i + 2 <= n) { rv64_emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; } + while (i < n) { rv64_emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; } +} + +static void rv_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, + BitFieldAccess bf) { + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, RV_T0); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + u32 rd = reg_num(dst); + /* Load full storage unit (zero-ext for shifts). */ + rv64_emit32(mc, enc_int_load(storage_bytes, 0, rd, base, (i32)bf.storage_offset)); + /* Shift left by (XLEN - (bit_offset + bit_width)) then arithmetic + * right-shift by (XLEN - bit_width). Use 64-bit shifts. */ + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 sh_left = 64u - (lsb + width); + u32 sh_right = 64u - width; + rv64_emit32(mc, rv_slli(rd, rd, sh_left)); + if (bf.signed_) rv64_emit32(mc, rv_srai(rd, rd, sh_right)); + else rv64_emit32(mc, rv_srli(rd, rd, sh_right)); +} + +static void rv_bitfield_store(CGTarget* t, Operand record_addr, Operand src, + BitFieldAccess bf) { + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, RV_T0); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + /* Load current value into t1 */ + rv64_emit32(mc, enc_int_load(storage_bytes, 0, RV_T1, base, + (i32)bf.storage_offset)); + u32 src_reg; + if (src.kind == OPK_IMM) { + rv64_emit_load_imm(mc, 1, RV_T2, src.v.imm); + src_reg = RV_T2; + } else if (src.kind == OPK_REG) { + src_reg = reg_num(src); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "rv64 bitfield_store: src kind %d NYI", (int)src.kind); + } + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + /* mask = ((1 << width) - 1) << lsb */ + u64 mask = ((u64)1 << width) - 1u; + /* t3 = src & ((1<<width)-1), then shifted to lsb */ + rv64_emit_load_imm(mc, 1, RV_T3, (i64)mask); + rv64_emit32(mc, rv_and(RV_T3, src_reg, RV_T3)); + if (lsb) rv64_emit32(mc, rv_slli(RV_T3, RV_T3, lsb)); + /* clear the field bits in t1: andi or and-not pattern */ + u64 mask_in = mask << lsb; + rv64_emit_load_imm(mc, 1, RV_T2, (i64)~mask_in); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T2)); + rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, enc_int_store(storage_bytes, RV_T1, base, + (i32)bf.storage_offset)); +} + +/* ---- arithmetic ---- */ + +static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, + Operand b_op) { + MCEmitter* mc = t->mc; + if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { + u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S; + u32 rd = reg_num(dst); + u32 fa = reg_num(a_op); + u32 fb = reg_num(b_op); + switch (op) { + case BO_FADD: rv64_emit32(mc, rv_fadd(fmt, rd, fa, fb)); return; + case BO_FSUB: rv64_emit32(mc, rv_fsub(fmt, rd, fa, fb)); return; + case BO_FMUL: rv64_emit32(mc, rv_fmul(fmt, rd, fa, fb)); return; + case BO_FDIV: rv64_emit32(mc, rv_fdiv(fmt, rd, fa, fb)); return; + default: break; + } + } + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 rd = reg_num(dst); + + /* Canonicalize IMM to the RHS for commutative ops so the imm-form + * check below handles `3 + a` the same as `a + 3`. ISUB is not + * commutative — IMM-on-LHS still materializes. */ + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for + * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no + * SUBI — we encode it as ADDI with the negated literal when -imm + * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is + * intentionally excluded since -INT_MIN overflows). Shifts admit a + * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the + * W-variants. */ + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 ra = reg_num(a_op); + i64 imm = b_op.v.imm; + int fits12 = imm >= -2048 && imm <= 2047; + switch (op) { + case BO_IADD: + if (fits12) { + rv64_emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm)); + return; + } + break; + case BO_ISUB: + if (imm >= -2047 && imm <= 2048) { + rv64_emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm)); + return; + } + break; + case BO_AND: + if (fits12) { rv64_emit32(mc, rv_andi(rd, ra, (i32)imm)); return; } + break; + case BO_OR: + if (fits12) { rv64_emit32(mc, rv_ori(rd, ra, (i32)imm)); return; } + break; + case BO_XOR: + if (fits12) { rv64_emit32(mc, rv_xori(rd, ra, (i32)imm)); return; } + break; + case BO_SHL: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + rv64_emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh)); + return; + } + case BO_SHR_U: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + rv64_emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh)); + return; + } + case BO_SHR_S: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + rv64_emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh)); + return; + } + default: break; + } + } + + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + + switch (op) { + case BO_IADD: rv64_emit32(mc, sf ? rv_add(rd, ra, rb) : rv_addw(rd, ra, rb)); return; + case BO_ISUB: rv64_emit32(mc, sf ? rv_sub(rd, ra, rb) : rv_subw(rd, ra, rb)); return; + case BO_IMUL: rv64_emit32(mc, sf ? rv_mul(rd, ra, rb) : rv_mulw(rd, ra, rb)); return; + case BO_AND: rv64_emit32(mc, rv_and(rd, ra, rb)); return; + case BO_OR: rv64_emit32(mc, rv_or(rd, ra, rb)); return; + case BO_XOR: rv64_emit32(mc, rv_xor(rd, ra, rb)); return; + case BO_SHL: rv64_emit32(mc, sf ? rv_sll(rd, ra, rb) : rv_sllw(rd, ra, rb)); return; + case BO_SHR_U: rv64_emit32(mc, sf ? rv_srl(rd, ra, rb) : rv_srlw(rd, ra, rb)); return; + case BO_SHR_S: rv64_emit32(mc, sf ? rv_sra(rd, ra, rb) : rv_sraw(rd, ra, rb)); return; + case BO_SDIV: rv64_emit32(mc, sf ? rv_div(rd, ra, rb) : rv_divw(rd, ra, rb)); return; + case BO_UDIV: rv64_emit32(mc, sf ? rv_divu(rd, ra, rb) : rv_divuw(rd, ra, rb)); return; + case BO_SREM: rv64_emit32(mc, sf ? rv_rem(rd, ra, rb) : rv_remw(rd, ra, rb)); return; + case BO_UREM: rv64_emit32(mc, sf ? rv_remu(rd, ra, rb) : rv_remuw(rd, ra, rb)); return; + default: + compiler_panic(t->c, impl_of(t)->loc, "rv64 binop: op %d unimpl", (int)op); + } +} + +static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { + MCEmitter* mc = t->mc; + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 rd = reg_num(dst); + /* IMM operand is legal per the CGTarget contract (arch.h); materialize + * into t0 when not already a register. cg folds literal unops upstream + * via cg_fold_unop. */ + u32 rn = rv64_force_reg_int(t, a_op, RV_T0); + switch (op) { + case UO_NEG: + rv64_emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn)); + return; + case UO_BNOT: + rv64_emit32(mc, rv_xori(rd, rn, -1)); + return; + case UO_NOT: + /* logical: 1 if rn==0 else 0 → sltiu rd, rn, 1 */ + rv64_emit32(mc, rv_sltiu(rd, rn, 1)); + return; + default: + compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: op %d unimpl", (int)op); + } +} + +static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 rd = reg_num(dst); + u32 rn = reg_num(src); + + switch (k) { + case CV_SEXT: { + u32 src_bits = type_byte_size(src.type) * 8u; + if (src_bits == 32u) { + /* sext.w rd, rs = addiw rd, rs, 0 */ + rv64_emit32(mc, rv_addiw(rd, rn, 0)); + return; + } + /* slli + srai by (64 - src_bits) */ + u32 sh = 64u - src_bits; + rv64_emit32(mc, rv_slli(rd, rn, sh)); + rv64_emit32(mc, rv_srai(rd, rd, sh)); + return; + } + case CV_ZEXT: { + u32 src_bits = type_byte_size(src.type) * 8u; + if (src_bits == 32u) { + /* zext.w: slli rd, rs, 32; srli rd, rd, 32 */ + rv64_emit32(mc, rv_slli(rd, rn, 32)); + rv64_emit32(mc, rv_srli(rd, rd, 32)); + } else { + u32 sh = 64u - src_bits; + rv64_emit32(mc, rv_slli(rd, rn, sh)); + rv64_emit32(mc, rv_srli(rd, rd, sh)); + } + return; + } + case CV_TRUNC: + /* Truncate to W: addiw rd, rs, 0 puts low 32 in rd sign-extended. + * For narrower widths the consumer (store) handles it. */ + rv64_emit32(mc, rv_addiw(rd, rn, 0)); + return; + case CV_ITOF_S: { + int sf_src = type_is_64(src.type); + int dst_d = type_is_fp_double(dst.type); + if (dst_d) { + rv64_emit32(mc, sf_src ? rv_fcvt_d_l(rd, rn) : rv_fcvt_d_w(rd, rn)); + } else { + rv64_emit32(mc, sf_src ? rv_fcvt_s_l(rd, rn) : rv_fcvt_s_w(rd, rn)); + } + return; + } + case CV_ITOF_U: { + int sf_src = type_is_64(src.type); + int dst_d = type_is_fp_double(dst.type); + if (dst_d) { + rv64_emit32(mc, sf_src ? rv_fcvt_d_lu(rd, rn) : rv_fcvt_d_wu(rd, rn)); + } else { + rv64_emit32(mc, sf_src ? rv_fcvt_s_lu(rd, rn) : rv_fcvt_s_wu(rd, rn)); + } + return; + } + case CV_FTOI_S: { + int sf_dst = type_is_64(dst.type); + int src_d = type_is_fp_double(src.type); + if (src_d) { + rv64_emit32(mc, sf_dst ? rv_fcvt_l_d(rd, rn) : rv_fcvt_w_d(rd, rn)); + } else { + rv64_emit32(mc, sf_dst ? rv_fcvt_l_s(rd, rn) : rv_fcvt_w_s(rd, rn)); + } + return; + } + case CV_FTOI_U: { + int sf_dst = type_is_64(dst.type); + int src_d = type_is_fp_double(src.type); + if (src_d) { + rv64_emit32(mc, sf_dst ? rv_fcvt_lu_d(rd, rn) : rv_fcvt_wu_d(rd, rn)); + } else { + rv64_emit32(mc, sf_dst ? rv_fcvt_lu_s(rd, rn) : rv_fcvt_wu_s(rd, rn)); + } + return; + } + case CV_FEXT: rv64_emit32(mc, rv_fcvt_d_s(rd, rn)); return; + case CV_FTRUNC: rv64_emit32(mc, rv_fcvt_s_d(rd, rn)); return; + case CV_BITCAST: { + if (src.cls == RC_INT && dst.cls == RC_FP) { + u32 sz = type_byte_size(dst.type); + rv64_emit32(mc, sz == 8 ? rv_fmv_d_x(rd, rn) : rv_fmv_w_x(rd, rn)); + } else if (src.cls == RC_FP && dst.cls == RC_INT) { + u32 sz = type_byte_size(src.type); + rv64_emit32(mc, sz == 8 ? rv_fmv_x_d(rd, rn) : rv_fmv_x_w(rd, rn)); + } else { + compiler_panic(t->c, a->loc, "rv64 BITCAST: same-class NYI"); + } + return; + } + default: + compiler_panic(t->c, a->loc, "rv64 convert kind %d unimpl", (int)k); + } +} + +/* ---- calls / return ---- */ + +static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, + u32* next_fp, u32* stack_off) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + /* For variadic args (av->abi NULL) synthesize a one-part DIRECT shape. + * On RV64 LP64D, variadic args go through the integer registers + * regardless of FP-ness (per the psABI). */ + ABIArgInfo va_ai; + ABIArgPart va_pt; + const ABIArgInfo* ai = av->abi; + if (!ai) { + u32 sz = type_byte_size(av->type); + memset(&va_ai, 0, sizeof va_ai); + memset(&va_pt, 0, sizeof va_pt); + va_ai.kind = ABI_ARG_DIRECT; + va_ai.parts = &va_pt; + va_ai.nparts = 1; + va_pt.cls = ABI_CLASS_INT; + va_pt.size = sz; + va_pt.align = sz; + va_pt.src_offset = 0; + ai = &va_ai; + } + if (ai->kind == ABI_ARG_IGNORE) return; + + if (ai->kind == ABI_ARG_INDIRECT) { + /* Pass the address of the storage in the next integer slot. */ + int to_stack = (*next_int >= 8); + u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++); + if (av->storage.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad byval slot"); + i32 off = -(i32)s->off; + if (off >= -2048 && off <= 2047) { + rv64_emit32(mc, rv_addi(dst_reg, RV_S0, off)); + } else { + rv64_emit_load_imm(mc, 1, dst_reg, (i64)off); + rv64_emit32(mc, rv_add(dst_reg, RV_S0, dst_reg)); + } + } else if (av->storage.kind == OPK_INDIRECT) { + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs; + if (off >= -2048 && off <= 2047) { + rv64_emit32(mc, rv_addi(dst_reg, base, off)); + } else { + rv64_emit_load_imm(mc, 1, dst_reg, (i64)off); + rv64_emit32(mc, rv_add(dst_reg, base, dst_reg)); + } + } else { + compiler_panic(t->c, a->loc, + "rv64 call: INDIRECT storage kind %d NYI", + (int)av->storage.kind); + } + if (to_stack) { + rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); + *stack_off += 8; + } + return; + } + + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 sz = pt->size; + + if (pt->cls == ABI_CLASS_INT) { + int to_stack = (*next_int >= 8); + u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++); + switch (av->storage.kind) { + case OPK_IMM: { + u32 sf = (sz == 8) ? 1u : 0u; + rv64_emit_load_imm(mc, sf, dst_reg, av->storage.v.imm); + break; + } + case OPK_REG: { + /* Variadic FP arg pinned into an integer register: bitcast + * via FMV.X.{D,W}. Otherwise normal MV. */ + if (av->storage.cls == RC_FP) { + rv64_emit32(mc, (sz == 8) ? rv_fmv_x_d(dst_reg, reg_num(av->storage)) + : rv_fmv_x_w(dst_reg, reg_num(av->storage))); + } else { + rv64_emit32(mc, rv_addi(dst_reg, reg_num(av->storage), 0)); + } + break; + } + case OPK_LOCAL: { + RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad arg slot"); + i32 off = -(i32)s->off + (i32)pt->src_offset; + rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off)); + break; + } + case OPK_INDIRECT: { + /* cg holds INDIRECT base regs in s2..s11, disjoint from arg + * regs a0..a7 and the t0 stack-arg scratch. */ + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, base, off)); + break; + } + default: + compiler_panic(t->c, a->loc, + "rv64 call: storage kind %d NYI", + (int)av->storage.kind); + } + if (to_stack) { + rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); + *stack_off += 8; + } + } else if (pt->cls == ABI_CLASS_FP) { + int to_stack = (*next_fp >= 8); + if (!to_stack) { + u32 freg = 10u + (*next_fp)++; + switch (av->storage.kind) { + case OPK_REG: { + u32 fmt = (sz == 8) ? RV_FMT_D : RV_FMT_S; + u32 r = reg_num(av->storage); + rv64_emit32(mc, rv_fsgnj(fmt, freg, r, r)); + break; + } + case OPK_INDIRECT: { + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + rv64_emit32(mc, (sz == 8) ? rv_fld(freg, base, off) + : rv_flw(freg, base, off)); + break; + } + default: + compiler_panic(t->c, a->loc, "rv64 call: FP storage kind %d NYI", + (int)av->storage.kind); + } + } else { + switch (av->storage.kind) { + case OPK_REG: + if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off)); + else rv64_emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off)); + break; + case OPK_INDIRECT: { + /* Route through ft0 — it is in {ft0..ft7}, caller-saved + * scratch outside the cg fs2..fs11 pool. */ + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + if (sz == 8) { + rv64_emit32(mc, rv_fld(/*ft0=*/0u, base, off)); + rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + } else { + rv64_emit32(mc, rv_flw(/*ft0=*/0u, base, off)); + rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + } + break; + } + default: + compiler_panic(t->c, a->loc, "rv64 call: FP stack-arg NYI"); + } + *stack_off += 8; + } + } else { + compiler_panic(t->c, a->loc, "rv64 call: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +static void rv_call(CGTarget* t, const CGCallDesc* d) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + u32 next_int = 0, next_fp = 0, stack_off = 0; + + /* sret: caller passes destination pointer in a0. */ + if (d->abi && d->abi->has_sret) { + if (d->ret.storage.kind != OPK_LOCAL) { + compiler_panic(t->c, a->loc, "rv64 call: sret dst must be LOCAL"); + } + RvSlot* s = rv64_slot_get(a, d->ret.storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad sret slot"); + i32 off = -(i32)s->off; + if (off >= -2048 && off <= 2047) { + rv64_emit32(mc, rv_addi(RV_A0, RV_S0, off)); + } else { + rv64_emit_load_imm(mc, 1, RV_A0, (i64)off); + rv64_emit32(mc, rv_add(RV_A0, RV_S0, RV_A0)); + } + next_int = 1; + } + + for (u32 i = 0; i < d->nargs; ++i) { + emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); + } + u32 needed = (stack_off + 15u) & ~15u; + if (needed > a->max_outgoing) a->max_outgoing = needed; + + if (d->callee.kind == OPK_GLOBAL) { + /* AUIPC ra, 0 ; JALR ra, ra, 0 with R_RV_CALL on AUIPC */ + u32 sec = mc->section_id; + u32 pos = mc->pos(mc); + rv64_emit32(mc, rv_auipc(RV_RA, 0)); + rv64_emit32(mc, rv_jalr(RV_RA, RV_RA, 0)); + mc->emit_reloc_at(mc, sec, pos, R_RV_CALL, + d->callee.v.global.sym, d->callee.v.global.addend, 0, 0); + } else if (d->callee.kind == OPK_REG) { + rv64_emit32(mc, rv_jalr(RV_RA, reg_num(d->callee), 0)); + } else { + compiler_panic(t->c, a->loc, "rv64 call: callee kind %d unsupported", + (int)d->callee.kind); + } + + /* Receive return value. */ + const ABIArgInfo* ri = &d->abi->ret; + if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return; + if (ri->nparts == 0) return; + + Operand rs = d->ret.storage; + u32 nir = 0, nfr = 0; + for (u16 i = 0; i < ri->nparts; ++i) { + const ABIArgPart* p = &ri->parts[i]; + u32 src_reg = (p->cls == ABI_CLASS_INT) ? (RV_A0 + nir++) : (10u + nfr++); + + if (rs.kind == OPK_REG) { + if (ri->nparts != 1) { + compiler_panic(t->c, a->loc, "rv64 call: REG ret with %u parts", + (unsigned)ri->nparts); + } + if (p->cls == ABI_CLASS_INT) { + rv64_emit32(mc, rv_addi(reg_num(rs), src_reg, 0)); + } else { + u32 fmt = (p->size == 8) ? RV_FMT_D : RV_FMT_S; + rv64_emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg)); + } + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot"); + base_reg = RV_S0; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0x1fu; + base_off = rs.v.ind.ofs; + } + i32 off = base_off + (i32)p->src_offset; + if (p->cls == ABI_CLASS_INT) { + rv64_emit32(mc, enc_int_store(p->size, src_reg, base_reg, off)); + } else { + if (p->size == 8) rv64_emit32(mc, rv_fsd(src_reg, base_reg, off)); + else rv64_emit32(mc, rv_fsw(src_reg, base_reg, off)); + } + } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { + /* void return placeholder — nothing to do. */ + } else { + compiler_panic(t->c, a->loc, "rv64 call: ret_storage kind %d unsupported", + (int)rs.kind); + } + } +} + +static void rv_ret(CGTarget* t, const CGABIValue* val) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + if (val) { + const ABIArgInfo* ri = val->abi; + if (ri && ri->kind == ABI_ARG_INDIRECT) { + /* sret: reload destination pointer from sret_ptr_slot into t0, + * then memcpy from val->storage into [t0]. */ + u32 src_base; + i32 src_base_off; + u32 nbytes; + if (val->storage.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot"); + src_base = RV_S0; + src_base_off = -(i32)s->off; + nbytes = s->size; + } else if (val->storage.kind == OPK_INDIRECT) { + src_base = val->storage.v.ind.base & 0x1fu; + src_base_off = val->storage.v.ind.ofs; + nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "rv64 ret indirect: missing aggregate size"); + } + } else { + compiler_panic(t->c, a->loc, + "rv64 ret indirect: storage kind %d NYI", + (int)val->storage.kind); + } + RvSlot* sp = (a->sret_ptr_slot != FRAME_SLOT_NONE) + ? rv64_slot_get(a, a->sret_ptr_slot) + : NULL; + if (sp) rv64_emit32(mc, rv_ld(RV_T0, RV_S0, -(i32)sp->off)); + u32 i = 0; + while (i + 8 <= nbytes) { + rv64_emit32(mc, rv_ld(RV_T1, src_base, src_base_off + (i32)i)); + rv64_emit32(mc, rv_sd(RV_T1, RV_T0, (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + rv64_emit32(mc, rv_lwu(RV_T1, src_base, src_base_off + (i32)i)); + rv64_emit32(mc, rv_sw(RV_T1, RV_T0, (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + rv64_emit32(mc, rv_lhu(RV_T1, src_base, src_base_off + (i32)i)); + rv64_emit32(mc, rv_sh(RV_T1, RV_T0, (i32)i)); + i += 2; + } + while (i < nbytes) { + rv64_emit32(mc, rv_lbu(RV_T1, src_base, src_base_off + (i32)i)); + rv64_emit32(mc, rv_sb(RV_T1, RV_T0, (i32)i)); + i += 1; + } + } else if (val->storage.kind == OPK_REG) { + if (val->storage.cls == RC_FP) { + u32 fmt = type_is_fp_double(val->storage.type) ? RV_FMT_D : RV_FMT_S; + u32 r = reg_num(val->storage); + rv64_emit32(mc, rv_fsgnj(fmt, 10u, r, r)); /* fa0 = freg 10 */ + } else { + rv64_emit32(mc, rv_addi(RV_A0, reg_num(val->storage), 0)); + } + } else if (val->storage.kind == OPK_IMM) { + u32 sf = type_is_64(val->storage.type) ? 1u : 0u; + rv64_emit_load_imm(mc, sf, RV_A0, val->storage.v.imm); + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + RvSlot* s = rv64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot"); + base_reg = RV_S0; + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0x1fu; + base_off = val->storage.v.ind.ofs; + } + const ABIArgInfo* ri2 = val->abi; + u32 nir = 0, nfr = 0; + for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { + const ABIArgPart* pt = &ri2->parts[i]; + i32 off = base_off + (i32)pt->src_offset; + if (pt->cls == ABI_CLASS_INT) { + rv64_emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off)); + } else if (pt->cls == ABI_CLASS_FP) { + u32 freg = 10u + nfr++; + if (pt->size == 8) rv64_emit32(mc, rv_fld(freg, base_reg, off)); + else rv64_emit32(mc, rv_flw(freg, base_reg, off)); + } else { + compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl", + (int)pt->cls); + } + } + } + } + /* Jump to epilogue. */ + rv64_emit32(mc, rv_jal(RV_ZERO, 0)); + mc->emit_label_ref(mc, a->epilogue_label, R_RV_JAL, 4, 0); +} + +/* ---- panic stubs for features we don't yet cover ---- */ + +static void rv_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (d.kind != OPK_REG) { + compiler_panic(t->c, a->loc, "rv64 alloca: dst must be REG"); + } + if (align > 16) { + compiler_panic(t->c, a->loc, + "rv64 alloca: align %u > 16 not yet supported", align); + } + if (sz.kind == OPK_IMM) { + i64 v = sz.v.imm; + if (v < 0) compiler_panic(t->c, a->loc, "rv64 alloca: negative size"); + u64 aligned = ((u64)v + 15u) & ~(u64)15u; + if (aligned == 0) aligned = 16; + if (aligned > 2047u) { + compiler_panic(t->c, a->loc, + "rv64 alloca: const size %llu too large for v1", + (unsigned long long)aligned); + } + rv64_emit32(mc, rv_addi(RV_SP, RV_SP, -(i32)aligned)); + } else if (sz.kind == OPK_REG) { + u32 sz_reg = reg_num(sz); + /* t0 = (sz + 15) & ~15; sp -= t0 */ + rv64_emit32(mc, rv_addi(RV_T0, sz_reg, 15)); + rv64_emit32(mc, rv_andi(RV_T0, RV_T0, -16)); + rv64_emit32(mc, rv_sub(RV_SP, RV_SP, RV_T0)); + } else { + compiler_panic(t->c, a->loc, "rv64 alloca: size kind %d unsupported", + (int)sz.kind); + } + + /* Placeholder: addi dst, sp, max_outgoing (imm patched at func_end). */ + if (a->nadd_patches == a->add_patches_cap) { + u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4; + struct RvAllocaPatch* nb = arena_array(t->c->tu, struct RvAllocaPatch, ncap); + if (a->add_patches) + memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches); + a->add_patches = nb; + a->add_patches_cap = ncap; + } + u32 dst_reg = reg_num(d); + a->add_patches[a->nadd_patches].pos = mc->pos(mc); + a->add_patches[a->nadd_patches].dst_reg = dst_reg; + a->nadd_patches++; + rv64_emit32(mc, rv_addi(dst_reg, RV_SP, 0)); + a->has_alloca = 1; +} +/* RV64 LP64D va_list: a single `void*` pointing at the next argument + * slot. The prologue spills a_{nparams_int}..a7 into the save area at + * [s0 + 16]. The save area lives at the top of the callee frame, + * immediately above the saved-s0/ra pair, so save_area[8] coincides + * with the caller's first stack arg — a single 8-byte stride covers + * register and stack args alike. */ +static void rv_va_start_(CGTarget* t, Operand ap_op) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (!a->is_variadic) { + compiler_panic(t->c, a->loc, "rv64 va_start: function not variadic"); + } + u32 ap = reg_num(ap_op); + /* *ap = s0 + 16 + next_param_int*8 (skip past named-int slots). */ + i32 off = 16 + (i32)(a->next_param_int * 8u); + rv64_emit32(mc, rv_addi(RV_T0, RV_S0, off)); + rv64_emit32(mc, rv_sd(RV_T0, ap, 0)); +} + +static void rv_va_arg_(CGTarget* t, Operand dst, Operand ap_op, + const Type* ty) { + MCEmitter* mc = t->mc; + u32 ap = reg_num(ap_op); + u32 sz = type_byte_size(ty); + /* t1 = *ap; load value; *ap = t1 + 8 (rounded up). + * On RV64 LP64D every var arg occupies an 8-byte slot. */ + rv64_emit32(mc, rv_ld(RV_T1, ap, 0)); + if (dst.cls == RC_FP) { + /* For variadic FP args on RV64 LP64D, the value sits in the integer + * save area at the same bit pattern as a double bit-cast. Load and + * bitcast. */ + if (sz == 8) { + rv64_emit32(mc, rv_ld(RV_T2, RV_T1, 0)); + rv64_emit32(mc, rv_fmv_d_x(reg_num(dst), RV_T2)); + } else { + rv64_emit32(mc, rv_lw(RV_T2, RV_T1, 0)); + rv64_emit32(mc, rv_fmv_w_x(reg_num(dst), RV_T2)); + } + } else { + int sx = type_is_signed(ty); + rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T1, 0)); + } + /* advance ap by 8 bytes. */ + rv64_emit32(mc, rv_addi(RV_T1, RV_T1, 8)); + rv64_emit32(mc, rv_sd(RV_T1, ap, 0)); +} + +static void rv_va_end_(CGTarget* t, Operand a) { + (void)t; (void)a; +} + +static void rv_va_copy_(CGTarget* t, Operand d, Operand s) { + MCEmitter* mc = t->mc; + u32 dr = reg_num(d); + u32 sr = reg_num(s); + /* va_list is a single pointer (8 bytes). */ + rv64_emit32(mc, rv_ld(RV_T0, sr, 0)); + rv64_emit32(mc, rv_sd(RV_T0, dr, 0)); +} + +/* ---- atomics (LL/SC + AMO) ---- */ + +int mem_order_is_acquire(MemOrder o) { + return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || o == MO_CONSUME; +} +int mem_order_is_release(MemOrder o) { + return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST; +} + +static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, + MemOrder o) { + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + /* Resolve address to a register. */ + u32 base; + if (addr.kind == OPK_REG) { + base = reg_num(addr); + } else if (addr.kind == OPK_LOCAL) { + i32 off; + base = addr_base(t, addr, &off, RV_T0); + if (off) { + rv64_emit32(mc, rv_addi(RV_T0, base, off)); + base = RV_T0; + } + } else { + compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_load: addr kind %d NYI", + (int)addr.kind); + } + if (mem_order_is_acquire(o)) { + /* lr.w/d as ordered load (aq=1, rl=0). */ + rv64_emit32(mc, sf ? rv_lr_d(reg_num(dst), base, 1, 0) + : rv_lr_w(reg_num(dst), base, 1, 0)); + } else { + rv64_emit32(mc, enc_int_load(ma.size, 0, reg_num(dst), base, 0)); + } +} + +static void rv_atomic_store(CGTarget* t, Operand addr, Operand src, + MemAccess ma, MemOrder o) { + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + u32 src_reg; + if (src.kind == OPK_IMM) { + rv64_emit_load_imm(mc, sf, RV_T1, src.v.imm); + src_reg = RV_T1; + } else if (src.kind == OPK_REG) { + src_reg = reg_num(src); + } else { + compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: src kind %d NYI", + (int)src.kind); + } + u32 base; + if (addr.kind == OPK_REG) { + base = reg_num(addr); + } else if (addr.kind == OPK_LOCAL) { + i32 off; + base = addr_base(t, addr, &off, RV_T0); + if (off) { rv64_emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; } + } else { + compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI", + (int)addr.kind); + } + if (mem_order_is_release(o)) { + /* fence rw,w; sw/sd src, 0(base). Conservative for SEQ_CST. */ + rv64_emit32(mc, rv_fence_rw_rw()); + rv64_emit32(mc, enc_int_store(ma.size, src_reg, base, 0)); + if (o == MO_SEQ_CST) rv64_emit32(mc, rv_fence_rw_rw()); + } else { + rv64_emit32(mc, enc_int_store(ma.size, src_reg, base, 0)); + } +} + +static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, + Operand val, MemAccess ma, MemOrder o) { + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + u32 base = RV_T0; + if (addr.kind == OPK_REG) { + rv64_emit32(mc, rv_addi(base, reg_num(addr), 0)); + } else if (addr.kind == OPK_LOCAL) { + i32 off; + u32 b = addr_base(t, addr, &off, RV_T0); + if (b != RV_T0 || off) { + rv64_emit32(mc, rv_addi(base, b, off)); + } + } else { + compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI"); + } + u32 vreg = RV_T1; + if (val.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, vreg, val.v.imm); + else if (val.kind == OPK_REG) rv64_emit32(mc, rv_addi(vreg, reg_num(val), 0)); + else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: val kind NYI"); + + int aq = mem_order_is_acquire(o); + int rl = mem_order_is_release(o); + + /* LR/SC loop for any op (simpler than per-op AMO encodings, but AMO is + * preferred for the cases the corpus exercises). */ + MCLabel L_retry = mc->label_new(mc); + mc->label_place(mc, L_retry); + rv64_emit32(mc, sf ? rv_lr_d(reg_num(dst), base, (u32)aq, 0) + : rv_lr_w(reg_num(dst), base, (u32)aq, 0)); + u32 new_r = RV_T2; + switch (op) { + case AO_XCHG: rv64_emit32(mc, rv_addi(new_r, vreg, 0)); break; + case AO_ADD: rv64_emit32(mc, sf ? rv_add(new_r, reg_num(dst), vreg) : rv_addw(new_r, reg_num(dst), vreg)); break; + case AO_SUB: rv64_emit32(mc, sf ? rv_sub(new_r, reg_num(dst), vreg) : rv_subw(new_r, reg_num(dst), vreg)); break; + case AO_AND: rv64_emit32(mc, rv_and(new_r, reg_num(dst), vreg)); break; + case AO_OR: rv64_emit32(mc, rv_or(new_r, reg_num(dst), vreg)); break; + case AO_XOR: rv64_emit32(mc, rv_xor(new_r, reg_num(dst), vreg)); break; + case AO_NAND: + rv64_emit32(mc, rv_and(new_r, reg_num(dst), vreg)); + rv64_emit32(mc, rv_xori(new_r, new_r, -1)); + break; + default: rv64_emit32(mc, rv_addi(new_r, vreg, 0)); break; + } + /* sc.w/d t3, new_r, (base); bnez t3, retry. */ + rv64_emit32(mc, sf ? rv_sc_d(RV_T3, base, new_r, 0, (u32)rl) + : rv_sc_w(RV_T3, base, new_r, 0, (u32)rl)); + rv64_emit32(mc, rv_bne(RV_T3, RV_ZERO, 0)); + mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0); +} + +static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, + Operand exp, Operand des, MemAccess ma, + MemOrder succ, MemOrder fail) { + MCEmitter* mc = t->mc; + u32 sf = (ma.size == 8) ? 1u : 0u; + (void)fail; + u32 base = RV_T0; + if (addr.kind == OPK_REG) rv64_emit32(mc, rv_addi(base, reg_num(addr), 0)); + else if (addr.kind == OPK_LOCAL) { + i32 off; u32 b = addr_base(t, addr, &off, RV_T0); + if (b != RV_T0 || off) rv64_emit32(mc, rv_addi(base, b, off)); + } else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI"); + u32 ereg = RV_T1, dreg = RV_T2; + if (exp.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, ereg, exp.v.imm); + else rv64_emit32(mc, rv_addi(ereg, reg_num(exp), 0)); + if (des.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, dreg, des.v.imm); + else rv64_emit32(mc, rv_addi(dreg, reg_num(des), 0)); + + int aq = mem_order_is_acquire(succ); + int rl = mem_order_is_release(succ); + + MCLabel L_retry = mc->label_new(mc); + MCLabel L_fail = mc->label_new(mc); + MCLabel L_done = mc->label_new(mc); + + mc->label_place(mc, L_retry); + rv64_emit32(mc, sf ? rv_lr_d(reg_num(prior), base, (u32)aq, 0) + : rv_lr_w(reg_num(prior), base, (u32)aq, 0)); + /* if (prior != expected) -> fail */ + rv64_emit32(mc, rv_bne(reg_num(prior), ereg, 0)); + mc->emit_label_ref(mc, L_fail, R_RV_BRANCH, 4, 0); + /* sc.w/d t3, des, (base); bnez t3, retry */ + rv64_emit32(mc, sf ? rv_sc_d(RV_T3, base, dreg, 0, (u32)rl) + : rv_sc_w(RV_T3, base, dreg, 0, (u32)rl)); + rv64_emit32(mc, rv_bne(RV_T3, RV_ZERO, 0)); + mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0); + /* ok = 1; jump done */ + rv64_emit_load_imm(mc, 0, reg_num(ok), 1); + rv64_emit32(mc, rv_jal(RV_ZERO, 0)); + mc->emit_label_ref(mc, L_done, R_RV_JAL, 4, 0); + + mc->label_place(mc, L_fail); + rv64_emit_load_imm(mc, 0, reg_num(ok), 0); + + mc->label_place(mc, L_done); +} + +static void rv_fence(CGTarget* t, MemOrder o) { + if (o == MO_RELAXED) return; + rv64_emit32(t->mc, rv_fence_rw_rw()); +} + +/* ---- intrinsics: do what we can; panic on the rest. ---- */ +static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, + const Operand* args, u32 na) { + (void)nd; (void)na; + MCEmitter* mc = t->mc; + RImpl* a = impl_of(t); + switch (kind) { + case INTRIN_ASSUME_ALIGNED: + case INTRIN_EXPECT: { + /* dst = val (hint dropped). */ + Operand val = args[0]; + Operand dst = dsts[0]; + u32 sf = type_is_64(dst.type) ? 1u : 0u; + if (val.kind == OPK_REG) { + if (reg_num(val) != reg_num(dst)) + rv64_emit32(mc, rv_addi(reg_num(dst), reg_num(val), 0)); + } else if (val.kind == OPK_IMM) { + rv64_emit_load_imm(mc, sf, reg_num(dst), val.v.imm); + } else { + compiler_panic(t->c, a->loc, "rv64 intrinsic: val kind %d NYI", + (int)val.kind); + } + return; + } + case INTRIN_PREFETCH: return; + case INTRIN_UNREACHABLE: + case INTRIN_TRAP: + rv64_emit32(mc, rv_ebreak()); + return; + case INTRIN_BSWAP16: { + /* rd = ((rs & 0xff) << 8) | ((rs >> 8) & 0xff) */ + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + rv64_emit32(mc, rv_slli(RV_T1, rs, 8)); /* t1 = rs << 8 */ + rv64_emit32(mc, rv_andi(RV_T1, RV_T1, 0)); /* placeholder */ + /* Use lui mask approach for portability: build mask 0xff00 in t2. */ + rv64_emit32(mc, rv_addi(RV_T2, RV_ZERO, 0)); + /* Simpler: 0xff00 fits in lui+addi pattern but is also small enough: + * we can build via shift: t2 = 0xff << 8 = (0xff << 8). */ + rv64_emit32(mc, rv_addi(RV_T2, RV_ZERO, 0xff)); + rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 8)); + /* t1 = (rs << 8) & 0xff00 */ + rv64_emit32(mc, rv_slli(RV_T1, rs, 8)); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T2)); + /* t3 = (rs >> 8) & 0xff (use srli on RV64 — high bits zeroed by + * preceding ANDI mask if input is uint16, but be safe and mask). */ + rv64_emit32(mc, rv_srli(RV_T3, rs, 8)); + rv64_emit32(mc, rv_andi(RV_T3, RV_T3, 0xff)); + rv64_emit32(mc, rv_or(rd, RV_T1, RV_T3)); + return; + } + case INTRIN_BSWAP32: { + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + /* result = (b0<<24)|(b1<<16)|(b2<<8)|b3, where bi = (rs >> (8*i)) & 0xff. */ + /* t1 = ((rs >> 24) & 0xff) */ + rv64_emit32(mc, rv_srliw(RV_T1, rs, 24)); + rv64_emit32(mc, rv_andi(RV_T1, RV_T1, 0xff)); + /* t2 = ((rs >> 16) & 0xff) << 8 */ + rv64_emit32(mc, rv_srliw(RV_T2, rs, 16)); + rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); + rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 8)); + rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); + /* t2 = ((rs >> 8) & 0xff) << 16 */ + rv64_emit32(mc, rv_srliw(RV_T2, rs, 8)); + rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); + rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 16)); + rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); + /* t2 = (rs & 0xff) << 24 */ + rv64_emit32(mc, rv_andi(RV_T2, rs, 0xff)); + rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 24)); + rv64_emit32(mc, rv_or(rd, RV_T1, RV_T2)); + /* zero-extend to 32 bits if dest is u32 */ + rv64_emit32(mc, rv_slli(rd, rd, 32)); + rv64_emit32(mc, rv_srli(rd, rd, 32)); + return; + } + case INTRIN_BSWAP64: { + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + /* General bswap64: iterate over the 8 bytes. */ + /* t1 accumulator */ + rv64_emit32(mc, rv_addi(RV_T1, RV_ZERO, 0)); + for (int i = 0; i < 8; ++i) { + /* t2 = (rs >> (8*i)) & 0xff */ + if (i == 0) { + rv64_emit32(mc, rv_andi(RV_T2, rs, 0xff)); + } else { + rv64_emit32(mc, rv_srli(RV_T2, rs, (u32)(8 * i))); + rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff)); + } + /* t2 <<= (56 - 8*i) (so byte 0 goes to top) */ + int sh = 56 - 8 * i; + if (sh) rv64_emit32(mc, rv_slli(RV_T2, RV_T2, (u32)sh)); + rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); + } + rv64_emit32(mc, rv_addi(rd, RV_T1, 0)); + return; + } + case INTRIN_POPCOUNT: { + /* Software popcount. Use the bit-twiddling sequence on the + * appropriate width. dst type drives width. */ + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + int is64 = type_is_64(args[0].type); + /* Move rs into t1 to avoid clobbering input. */ + rv64_emit32(mc, rv_addi(RV_T1, rs, 0)); + if (!is64) { + /* zext.w t1, t1 */ + rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32)); + } + /* t1 = t1 - ((t1 >> 1) & 0x5555...) */ + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll + : (i64)0x55555555); + rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); + rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); + /* t1 = (t1 & 0x3333...) + ((t1 >> 2) & 0x3333...) */ + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll + : (i64)0x33333333); + rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2)); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + /* t1 = (t1 + (t1 >> 4)) & 0x0f0f... */ + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll + : (i64)0x0f0f0f0f); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + /* t1 *= 0x0101010101... ; result in top byte */ + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll + : (i64)0x01010101); + rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); + /* shift right by (XLEN - 8) */ + rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); + return; + } + case INTRIN_CTZ: { + /* ctz(x) = popcount((x & -x) - 1) for x != 0. */ + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + int is64 = type_is_64(args[0].type); + /* t1 = -x */ + rv64_emit32(mc, rv_sub(RV_T1, RV_ZERO, rs)); + /* t1 = x & -x */ + rv64_emit32(mc, rv_and(RV_T1, RV_T1, rs)); + /* t1 = t1 - 1 */ + rv64_emit32(mc, rv_addi(RV_T1, RV_T1, -1)); + if (!is64) { + rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32)); + } + /* popcount(t1) into rd */ + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll + : (i64)0x55555555); + rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); + rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll + : (i64)0x33333333); + rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2)); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll + : (i64)0x0f0f0f0f); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll + : (i64)0x01010101); + rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); + return; + } + case INTRIN_CLZ: { + /* Software clz: fold the high bit downward, then popcount the + * inverted result. Standard recipe: + * x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16; + * [x |= x>>32;] // 64-bit + * clz = popcount(~x) [for the appropriate width]. + */ + u32 rd = reg_num(dsts[0]); + u32 rs = reg_num(args[0]); + int is64 = type_is_64(args[0].type); + rv64_emit32(mc, rv_addi(RV_T1, rs, 0)); + if (!is64) { + /* zero-ext to 32 to make srli safe */ + rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32)); + } + u32 shifts[6] = {1, 2, 4, 8, 16, 32}; + u32 ns = is64 ? 6u : 5u; + for (u32 i = 0; i < ns; ++i) { + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, shifts[i])); + rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2)); + } + /* t1 = ~t1, then popcount and we want the (width - popcount) ... wait. + * Actually clz(x) for the folded x = popcount(~x). Let me verify. + * If x = 0b00011010, fold => 0b00011111. ~ => 0b11100000. + * popcount(~folded) = 3 = clz(0b00011010) ✓. */ + rv64_emit32(mc, rv_xori(RV_T1, RV_T1, -1)); + if (!is64) { + rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32)); + } + /* popcount(t1) into rd */ + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll + : (i64)0x55555555); + rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3)); + rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll + : (i64)0x33333333); + rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3)); + rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2)); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4)); + rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll + : (i64)0x0f0f0f0f); + rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3)); + rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll + : (i64)0x01010101); + rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3)); + rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); + return; + } + case INTRIN_ADD_OVERFLOW: + case INTRIN_SUB_OVERFLOW: { + /* dsts: [val, ovf]. Signed overflow check. + * For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1) + * For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */ + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int is64 = type_is_64(dval.type); + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + u32 rd = reg_num(dval); + u32 rovf = reg_num(dovf); + /* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */ + if (kind == INTRIN_ADD_OVERFLOW) { + rv64_emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb)); + } else { + rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb)); + } + /* t3 = a XOR t2 */ + rv64_emit32(mc, rv_xor(RV_T3, ra, RV_T2)); + if (kind == INTRIN_ADD_OVERFLOW) { + /* t4 = b XOR t2 */ + rv64_emit32(mc, rv_xor(rovf, rb, RV_T2)); + rv64_emit32(mc, rv_and(rovf, rovf, RV_T3)); + } else { + /* t4 = a XOR b */ + rv64_emit32(mc, rv_xor(rovf, ra, rb)); + rv64_emit32(mc, rv_and(rovf, rovf, RV_T3)); + } + /* shift right to extract sign bit */ + u32 sh = is64 ? 63u : 31u; + rv64_emit32(mc, is64 ? rv_srli(rovf, rovf, sh) : rv_srliw(rovf, rovf, sh)); + rv64_emit32(mc, rv_andi(rovf, rovf, 1)); + /* Now write the value. */ + rv64_emit32(mc, rv_addi(rd, RV_T2, 0)); + return; + } + case INTRIN_MUL_OVERFLOW: { + /* SMULL: full 64-bit signed product of two i32s, then compare + * with sign-extend of low 32. For i64 inputs we panic for now. */ + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int is64 = type_is_64(dval.type); + if (is64) { + compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI"); + } + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + u32 rd = reg_num(dval); + u32 rovf = reg_num(dovf); + /* Sign-extend inputs from 32 to 64. */ + rv64_emit32(mc, rv_addiw(RV_T2, ra, 0)); + rv64_emit32(mc, rv_addiw(RV_T3, rb, 0)); + /* Full 64-bit product */ + rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3)); + /* sign-ext of low 32 of product */ + rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0)); + /* ovf = (T2 != T3) */ + rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + /* dval = low 32, sign-extended */ + rv64_emit32(mc, rv_addiw(rd, RV_T2, 0)); + return; + } + case INTRIN_MEMCPY: + case INTRIN_MEMMOVE: { + Operand da = args[0], sa = args[1], nb = args[2]; + if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic(t->c, a->loc, + "rv64 intrinsic: memcpy/memmove non-const NYI"); + } + u32 dr = reg_num(da), sr = reg_num(sa), n = (u32)nb.v.imm; + if (kind == INTRIN_MEMCPY) { + u32 i = 0; + while (i + 8 <= n) { rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i)); i += 8; } + while (i + 4 <= n) { rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i)); i += 4; } + while (i + 2 <= n) { rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i)); i += 2; } + while (i < n) { rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i)); i += 1; } + } else { + u32 i = n; + while (i >= 8) { i -= 8; rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i)); } + while (i >= 4) { i -= 4; rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i)); } + while (i >= 2) { i -= 2; rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i)); } + while (i >= 1) { i -= 1; rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i)); } + } + return; + } + case INTRIN_MEMSET: { + Operand da = args[0], bv = args[1], nb = args[2]; + if (da.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic(t->c, a->loc, "rv64 intrinsic: memset non-const NYI"); + } + u32 dr = reg_num(da), n = (u32)nb.v.imm; + u32 src; + if (bv.kind == OPK_IMM) { + u32 byte = (u32)(bv.v.imm & 0xffu); + if (byte == 0) src = RV_ZERO; + else { + u64 b = byte; b |= b << 8; b |= b << 16; b |= b << 32; + rv64_emit_load_imm(mc, 1, RV_T3, (i64)b); + src = RV_T3; + } + } else { + compiler_panic(t->c, a->loc, "rv64 intrinsic: memset REG byte NYI"); + } + u32 i = 0; + while (i + 8 <= n) { rv64_emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; } + while (i + 4 <= n) { rv64_emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; } + while (i + 2 <= n) { rv64_emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; } + while (i < n) { rv64_emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; } + return; + } + default: + compiler_panic(t->c, a->loc, "rv64 intrinsic kind %d NYI", (int)kind); + } +} + +static void rv_asm_block(CGTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 no, Operand* oo, + const AsmConstraint* ins, u32 ni, const Operand* io, + const Sym* clobs, u32 nc) { + (void)tmpl; (void)outs; (void)no; (void)oo; + (void)ins; (void)ni; (void)io; (void)clobs; (void)nc; + rv_panic(t, "asm_block"); +} + +static void rv_set_loc(CGTarget* t, SrcLoc l) { + ((RImpl*)t)->loc = l; + if (t->mc) t->mc->set_loc(t->mc, l); +} + +static void rv_finalize(CGTarget* t) { (void)t; } +static void rv_destroy(CGTarget* t) { (void)t; } + +static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } + +CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { + RImpl* x = arena_new(c->tu, RImpl); + memset(x, 0, sizeof *x); + + CGTarget* t = &x->base; + t->c = c; + t->obj = o; + t->mc = m; + + t->func_begin = rv_func_begin; + t->func_end = rv_func_end; + + t->alloc_reg = rv_alloc_reg; + t->free_reg = rv_free_reg; + t->frame_slot = rv_frame_slot; + t->param = rv_param; + t->clobbers = rv_clobbers; + t->spill_reg = rv_spill_reg; + t->reload_reg = rv_reload_reg; + + t->label_new = rv_label_new; + t->label_place = rv_label_place; + t->jump = rv_jump; + t->cmp_branch = rv_cmp_branch; + + t->scope_begin = rv_scope_begin; + t->scope_else = rv_scope_else; + t->scope_end = rv_scope_end; + t->break_to = rv_break_to; + t->continue_to = rv_continue_to; + + t->load_imm = rv_load_imm; + t->load_const = rv_load_const; + t->copy = rv_copy; + t->load = rv_load; + t->store = rv_store; + t->addr_of = rv_addr_of; + t->tls_addr_of = rv_tls_addr_of; + t->copy_bytes = rv_copy_bytes; + t->set_bytes = rv_set_bytes; + t->bitfield_load = rv_bitfield_load; + t->bitfield_store = rv_bitfield_store; + + t->binop = rv_binop; + t->unop = rv_unop; + t->cmp = rv_cmp; + t->convert = rv_convert; + + t->call = rv_call; + t->ret = rv_ret; + + t->alloca_ = rv_alloca_; + t->va_start_ = rv_va_start_; + t->va_arg_ = rv_va_arg_; + t->va_end_ = rv_va_end_; + t->va_copy_ = rv_va_copy_; + + t->setjmp_ = NULL; + t->longjmp_ = NULL; + + t->atomic_load = rv_atomic_load; + t->atomic_store = rv_atomic_store; + t->atomic_rmw = rv_atomic_rmw; + t->atomic_cas = rv_atomic_cas; + t->fence = rv_fence; + + t->intrinsic = rv_intrinsic; + t->asm_block = rv_asm_block; + + t->set_loc = rv_set_loc; + t->finalize = rv_finalize; + t->destroy = rv_destroy; + + (void)type_is_signed; + compiler_defer(c, cgt_cleanup, t); + return t; +} diff --git a/src/arch/x64.c b/src/arch/x64.c @@ -1,3075 +0,0 @@ -/* Minimal x86_64 (SysV AMD64, Linux ELF) CGTarget. - * - * Single-pass codegen mirroring the structure of src/arch/aarch64.c - * and src/arch/rv64.c. The frame uses rbp as a frame pointer; locals - * live at negative offsets from rbp, callee-save spills live below - * the local area at known offsets, and outgoing args sit at sp+0. - * The prologue is reserved as a NOP-filled placeholder at func_begin - * and patched at func_end once frame_size and the callee-save high- - * water mark are known. - * - * Reg allocator: lowest-bit-first over a fixed preference list. INT - * pool has callee-saves (rbx, r12..r15) at the low bits, then a single - * caller-saved tail (r10) — so the first reg handed out is callee-saved, - * which is what tests like g11_caller_saved_live_across_call rely on. - * FP pool is xmm6..xmm15 (10 regs, all caller-saved on SysV). - * - * The six SysV arg-passing GPRs (rdi, rsi, rdx, rcx, r8, r9) are - * deliberately kept OUT of the pool. If they were in the pool, the - * arg-emit loop in x_call could clobber an arg's source register - * before reading it: e.g. `mov rdi, [arg1_local]; mov r8d, edi` for - * arg5 reads the wrong edi. Mirrors aarch64, which keeps x0..x7 out - * of its allocator pool for the same reason. - * - * Scratches kept outside the pools: rax (primary), rcx, rdx, r11 - * (secondary). rax is also the int return reg; xmm0 is the FP return - * reg. - * - * Scope: the test/cg spine (Groups A–H) plus alloca/VLA (Group I) and - * SysV varargs (Group J). Remaining unimplemented methods past that - * panic with a clear message — see doc/X64.md. */ - -#include <string.h> - -#include "arch/arch.h" -#include "arch/x64.h" -#include "arch/x64_isa.h" -#include "core/arena.h" -#include "core/pool.h" -#include "obj/obj.h" -#include "type/type.h" - -#define X64_PROLOGUE_BYTES 96u - -/* ============================================================ - * Custom register pool. - * - * Unlike aa64/rv64 the x64 pool is non-contiguous (skipping rax, - * rcx, rdx, rsp, rbp, r11). So we keep a bitmap over a static - * preference order rather than a (base, nregs) range. */ -typedef struct XRegPool { - u32 free; /* bit i set ⇔ alloc_order[i] is free */ - u32 hwm; /* highest index+1 ever allocated */ - const u8* order; /* alloc_order; first n_cs are callee-saved */ - u8 nregs; - u8 n_cs; - u8 pad[2]; -} XRegPool; - -static void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) { - p->order = order; - p->nregs = nregs; - p->n_cs = n_cs; - p->hwm = 0; - p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); -} - -static Reg xpool_alloc(XRegPool* p) { - if (p->free == 0) return (Reg)REG_NONE; - u32 idx = (u32)__builtin_ctz(p->free); - p->free &= ~(1u << idx); - if (idx + 1u > p->hwm) p->hwm = idx + 1u; - return (Reg)p->order[idx]; -} - -static int xpool_free(XRegPool* p, Reg r) { - for (u8 i = 0; i < p->nregs; ++i) { - if (p->order[i] == (u8)r) { - u32 bit = 1u << i; - if (p->free & bit) return -1; - p->free |= bit; - return 1; - } - } - return 0; -} - -static const u8 g_int_order[6] = { - X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */ - X64_R10, /* caller-saved tail */ -}; - -static const u8 g_fp_order[10] = { - /* All xmm regs are caller-saved on SysV; preference order is xmm6 - * upward to keep the low arg/return regs (xmm0..5) clear for calls. */ - X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10, - X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15, -}; - -static const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX, - X64_RCX, X64_R8, X64_R9}; - -/* ============================================================ - * XImpl */ - -typedef struct XSlot { - u32 off; /* bytes below rbp (positive); address = rbp - off */ - u32 size; - u32 align; - u8 kind; - u8 pad[3]; -} XSlot; - -typedef struct XScope { - u8 kind; - u8 has_else; - u8 pad[2]; - MCLabel else_label; - MCLabel end_label; - Label break_label; - Label continue_label; -} XScope; - -/* alloca emits a placeholder `lea dst, [rsp + 0]` whose disp32 is patched - * at func_end with the final max_outgoing value. disp_pos records the - * byte offset of that disp32 in the active text section. */ -typedef struct XAllocaPatch { - u32 disp_pos; -} XAllocaPatch; - -typedef struct XImpl { - CGTarget base; - SrcLoc loc; - const CGFuncDesc* fd; - - u32 func_start; - u32 prologue_pos; - MCLabel epilogue_label; - - XSlot* slots; - u32 nslots; - u32 slots_cap; - u32 cum_off; - u32 max_outgoing; - - u32 next_param_int; - u32 next_param_fp; - u32 next_param_stack; - u8 has_sret; - u8 has_alloca; - u8 is_variadic; - u8 pad0; - FrameSlot sret_ptr_slot; - FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */ - - XRegPool int_pool; - XRegPool fp_pool; - - XScope* scopes; - u32 nscopes; - u32 scopes_cap; - - XAllocaPatch* alloca_patches; - u32 nalloca_patches; - u32 alloca_patches_cap; -} XImpl; - -static XImpl* impl_of(CGTarget* t) { return (XImpl*)t; } - -/* Forward declarations. */ -static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); -static XSlot* slot_get(XImpl* a, FrameSlot fs); -static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); -static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); -static void x_free_reg(CGTarget* t, Reg r, RegClass cls); - -extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); - -/* ---- type helpers ---- */ -static int type_is_64(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 1; - default: - return 0; - } -} -static int type_is_fp_double(const Type* t) { - return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); -} -static u32 type_byte_size(const Type* t) { - if (!t) return 4; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_UCHAR: - case TY_BOOL: - return 1; - case TY_SHORT: - case TY_USHORT: - return 2; - case TY_INT: - case TY_UINT: - case TY_FLOAT: - return 4; - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_PTR: - case TY_DOUBLE: - return 8; - default: - return 8; - } -} -static int type_is_signed(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_CHAR: - case TY_SCHAR: - case TY_SHORT: - case TY_INT: - case TY_LONG: - case TY_LLONG: - return 1; - default: - return 0; - } -} - -static _Noreturn void x_panic(CGTarget* t, const char* what) { - SrcLoc loc = impl_of(t)->loc; - compiler_panic(t->c, loc, "x64: %s not implemented", what); -} - -/* ============================================================ - * Byte-level emit helpers. - * - * x64 instructions are variable length: optional legacy prefix(es), - * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional - * displacement, optional immediate. Helpers below build sequences - * into the active MCEmitter section, recording one Debug row per - * instruction-start. */ -static void emit1(MCEmitter* mc, u8 b) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - mc->emit_bytes(mc, &b, 1); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} -static void emit_u32le(MCEmitter* mc, u32 v) { - u8 b[4]; - b[0] = (u8)v; - b[1] = (u8)(v >> 8); - b[2] = (u8)(v >> 16); - b[3] = (u8)(v >> 24); - mc->emit_bytes(mc, b, 4); -} -static void emit_u64le(MCEmitter* mc, u64 v) { - u8 b[8]; - for (int i = 0; i < 8; ++i) b[i] = (u8)(v >> (i * 8)); - mc->emit_bytes(mc, b, 8); -} - -static u8 make_rex(int w, u32 reg, u32 index, u32 rm) { - u8 r = 0; - if (w) r |= X64_REX_W; - if (reg & 8) r |= X64_REX_R; - if (index & 8) r |= X64_REX_X; - if (rm & 8) r |= X64_REX_B; - return r ? (u8)(X64_REX_BASE | r) : 0; -} -static void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { - u8 r = make_rex(w, reg, index, rm); - if (r) mc->emit_bytes(mc, &r, 1); -} -/* Force REX (even REX=0x40) — required for byte-reg encodings that - * promote SIL/DIL/etc. */ -static void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { - u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) | - ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0)); - mc->emit_bytes(mc, &r, 1); -} - -static u8 modrm(u32 mod, u32 reg, u32 rm) { - return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u)); -} -static u8 sib(u32 scale, u32 index, u32 base) { - return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u)); -} - -static u32 disp_mod(u32 base, i32 disp) { - if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */ - if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */ - return 2u; /* [base + disp32] */ -} - -static void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) { - u32 m = disp_mod(base, disp); - if ((base & 7u) == 4u) { - /* SIB byte required: index=4 (none), base=base. */ - u8 mr = modrm(m, reg, 4u); - mc->emit_bytes(mc, &mr, 1); - u8 s = sib(0, 4u, base); - mc->emit_bytes(mc, &s, 1); - } else { - u8 mr = modrm(m, reg, base); - mc->emit_bytes(mc, &mr, 1); - } - if (m == 1u) { - u8 d = (u8)(i8)disp; - mc->emit_bytes(mc, &d, 1); - } else if (m == 2u) { - emit_u32le(mc, (u32)disp); - } -} -static void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) { - u8 mr = modrm(3u, reg, rm); - mc->emit_bytes(mc, &mr, 1); -} - -/* ---- specific instruction emitters ---- */ - -/* mov rd, rs (64-bit if w, else 32-bit). */ -static void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, src, 0, dst); - u8 op = 0x89; /* MOV r/m, r */ - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, src, dst); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* mov reg, [base + disp]; size 1/2/4/8. */ -static void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, - u32 base, i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (size == 8) { - emit_rex(mc, 1, dst, 0, base); - u8 op = 0x8B; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, dst, base, disp); - } else if (size == 4) { - emit_rex(mc, 0, dst, 0, base); - u8 op = 0x8B; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, dst, base, disp); - } else if (size == 2) { - emit_rex(mc, 0, dst, 0, base); - u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, dst, base, disp); - } else if (size == 1) { - emit_rex(mc, 0, dst, 0, base); - u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, dst, base, disp); - } - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* mov [base + disp], src; size 1/2/4/8. */ -static void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, - i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (size == 8) { - emit_rex(mc, 1, src, 0, base); - u8 op = 0x89; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, src, base, disp); - } else if (size == 4) { - emit_rex(mc, 0, src, 0, base); - u8 op = 0x89; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, src, base, disp); - } else if (size == 2) { - u8 p = 0x66; - mc->emit_bytes(mc, &p, 1); - emit_rex(mc, 0, src, 0, base); - u8 op = 0x89; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, src, base, disp); - } else if (size == 1) { - /* Force REX so SIL/DIL/etc are addressable as byte regs. */ - emit_rex_force(mc, 0, src, 0, base); - u8 op = 0x88; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, src, base, disp); - } - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, 1, dst, 0, base); - u8 op = 0x8D; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, dst, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r - * imm32) for !is64. Both 10/5 bytes. */ -static void emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (is64) { - emit_rex(mc, 1, 0, 0, dst); - u8 op = (u8)(0xB8 | (dst & 7)); - mc->emit_bytes(mc, &op, 1); - emit_u64le(mc, (u64)imm); - } else { - emit_rex(mc, 0, 0, 0, dst); - u8 op = (u8)(0xB8 | (dst & 7)); - mc->emit_bytes(mc, &op, 1); - emit_u32le(mc, (u32)imm); - } - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/ - * CMP(39)/MOV(89)/TEST(85). */ -static void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, src, 0, dst); - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, src, dst); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, dst, 0, src); - u8 op[2] = {0x0F, 0xAF}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 op = 0xF7; - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, sub, reg); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 op = 0xD3; - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, sub, reg); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* Shift r/m by imm8: opcode C1 /sub ib. sub: SHL=4, SHR=5, SAR=7. */ -static void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 buf[3]; - buf[0] = 0xC1; - buf[1] = modrm(3u, sub, reg); - buf[2] = imm; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_cqo_or_cdq(MCEmitter* mc, int w) { - if (w) { - u8 buf[2] = {X64_REX_BASE | X64_REX_W, 0x99}; - mc->emit_bytes(mc, buf, 2); - } else { - u8 op = 0x99; - mc->emit_bytes(mc, &op, 1); - } -} - -static void emit_xor_self(MCEmitter* mc, int w, u32 r) { - emit_alu_rr(mc, w, 0x31, r, r); -} - -/* cmp r/m, imm8 (0x83 /7). */ -static void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 buf[3]; - buf[0] = 0x83; - buf[1] = modrm(3u, 7u, reg); - buf[2] = (u8)imm; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0, - * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */ -static void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 buf[3]; - buf[0] = 0x83; - buf[1] = modrm(3u, sub, reg); - buf[2] = (u8)imm; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */ -static void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 buf[6]; - buf[0] = 0x81; - buf[1] = modrm(3u, sub, reg); - buf[2] = (u8)(imm & 0xFF); - buf[3] = (u8)((imm >> 8) & 0xFF); - buf[4] = (u8)((imm >> 16) & 0xFF); - buf[5] = (u8)((imm >> 24) & 0xFF); - mc->emit_bytes(mc, buf, 6); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext). - * Both forms write the result back to the same `dst` register so the - * caller doesn't need an explicit copy beforehand — unlike the ALU - * forms which read-modify-write a single operand. */ -static void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, dst, 0, src); - u8 buf[3]; - buf[0] = 0x6B; - buf[1] = modrm(3u, dst, src); - buf[2] = (u8)imm; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} -static void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, dst, 0, src); - u8 buf[6]; - buf[0] = 0x69; - buf[1] = modrm(3u, dst, src); - buf[2] = (u8)(imm & 0xFF); - buf[3] = (u8)((imm >> 8) & 0xFF); - buf[4] = (u8)((imm >> 16) & 0xFF); - buf[5] = (u8)((imm >> 24) & 0xFF); - mc->emit_bytes(mc, buf, 6); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B - * imm8-sign-extended forms)? */ -static int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; } -/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/ - * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to - * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those - * require a full materialization through emit_load_imm. */ -static int imm_fits_i32(i64 imm) { - return imm >= -2147483648LL && imm <= 2147483647LL; -} - -static void emit_test_self(MCEmitter* mc, int w, u32 reg) { - emit_alu_rr(mc, w, 0x85, reg, reg); -} - -static void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex_force(mc, 0, 0, 0, reg); - u8 op[2] = {0x0F, (u8)(0x90 | (cc & 0xF))}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, 0u, reg); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex_force(mc, 0, dst, 0, src); - u8 op[2] = {0x0F, 0xB6}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* movzx/movsx r→r. src_size is source byte width. */ -static void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size, - u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (src_size == 4 && signed_ext) { - /* movsxd r64, r32: REX.W 0x63 ModRM */ - emit_rex(mc, 1, dst, 0, src); - u8 op = 0x63; - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, dst, src); - } else if (src_size == 4 && !signed_ext) { - /* zext 32→64 is `mov r32, r32` (clears high 32). */ - emit_rex(mc, 0, src, 0, dst); - u8 op = 0x89; - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, src, dst); - } else if (src_size == 1) { - emit_rex_force(mc, w, dst, 0, src); - u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - } else if (src_size == 2) { - emit_rex(mc, w, dst, 0, src); - u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - } - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_ret(MCEmitter* mc) { - u8 op = 0xC3; - mc->emit_bytes(mc, &op, 1); -} -static void emit_leave(MCEmitter* mc) { - u8 op = 0xC9; - mc->emit_bytes(mc, &op, 1); -} - -/* ---- SSE scalar FP encoders ---- */ -static void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (prefix) mc->emit_bytes(mc, &prefix, 1); - emit_rex(mc, 0, dst, 0, src); - u8 op[2] = {0x0F, opcode}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} -static void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, - u32 base, i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (prefix) mc->emit_bytes(mc, &prefix, 1); - emit_rex(mc, 0, dst, 0, base); - u8 op[2] = {0x0F, opcode}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, dst, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} -static void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, - u32 base, i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (prefix) mc->emit_bytes(mc, &prefix, 1); - emit_rex(mc, 0, src, 0, base); - u8 op[2] = {0x0F, opcode}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, src, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} -static void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, - u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - if (prefix) mc->emit_bytes(mc, &prefix, 1); - emit_rex(mc, w, dst, 0, src); - u8 op[2] = {0x0F, opcode}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* ============================================================ - * Function lifecycle */ - -static void x_func_begin(CGTarget* t, const CGFuncDesc* fd) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - mc->set_section(mc, fd->text_section_id); - mc->emit_align(mc, 16, 0x90); - - a->fd = fd; - a->func_start = mc->pos(mc); - a->next_param_int = 0; - a->next_param_fp = 0; - a->next_param_stack = 0; - a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; - a->has_alloca = 0; - a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; - a->cum_off = 0; - a->max_outgoing = 0; - xpool_init(&a->int_pool, g_int_order, 6u, 5u); - xpool_init(&a->fp_pool, g_fp_order, 10u, 0u); - a->nslots = 0; - a->nscopes = 0; - a->nalloca_patches = 0; - a->sret_ptr_slot = FRAME_SLOT_NONE; - a->reg_save_slot = FRAME_SLOT_NONE; - a->epilogue_label = mc->label_new(mc); - - mc->cfi_startproc(mc); - - /* Reserve a fixed-size prologue placeholder filled with NOPs. */ - a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) emit1(mc, 0x90); - - /* sret: rdi at entry holds the destination pointer. Spill it to a - * hidden slot so the body can use rdi freely. */ - if (a->has_sret) { - FrameSlotDesc fsd = { - .type = NULL, .name = 0, .loc = {0, 0, 0}, - .size = 8, .align = 8, .kind = FS_SPILL, .flags = 0, - }; - a->sret_ptr_slot = x_frame_slot(t, &fsd); - /* Subsequent int args start at rsi (next_param_int = 1). */ - a->next_param_int = 1; - } - - /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then - * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves - * directly after the prologue placeholder so the original register - * args are preserved before x_param() spills the named ones. */ - if (a->is_variadic) { - FrameSlotDesc rsd = { - .type = NULL, .name = 0, .loc = {0, 0, 0}, - .size = 176, .align = 8, .kind = FS_SPILL, .flags = 0, - }; - a->reg_save_slot = x_frame_slot(t, &rsd); - XSlot* rs = slot_get(a, a->reg_save_slot); - static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX, - X64_RCX, X64_R8, X64_R9}; - for (u32 i = 0; i < 6; ++i) { - emit_mov_store(mc, 8, gprs[i], X64_RBP, - -(i32)rs->off + (i32)(i * 8u)); - } - /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per - * FP slot, so the upper half of the 16-byte stride stays unused. */ - for (u32 i = 0; i < 8; ++i) { - emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP, - -(i32)rs->off + (i32)(48u + i * 16u)); - } - } -} - -static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); } - -static void x_func_end(CGTarget* t) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - u32 cs_used = a->int_pool.hwm; - if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs; - u32 cs_size = cs_used * 8u; - - /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call, - * which means rsp ≡ 8 mod 16 inside the function (after the return - * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it - * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0 - * mod 16, so frame_size must be a multiple of 16. */ - u32 raw = a->max_outgoing + cs_size + a->cum_off; - u32 frame_size = align_up_u32(raw, 16u); - if (frame_size == 0) frame_size = 16; - - mc->label_place(mc, a->epilogue_label); - - /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */ - for (i32 i = (i32)cs_used - 1; i >= 0; --i) { - u32 reg = a->int_pool.order[i]; - i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; - emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off); - } - - /* leave; ret. */ - emit_leave(mc); - emit_ret(mc); - - /* Patch prologue placeholder. */ - u8 buf[X64_PROLOGUE_BYTES]; - for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) buf[i] = 0x90; - u32 wi = 0; - - /* push rbp (1 byte). */ - buf[wi++] = 0x55; - /* mov rbp, rsp: REX.W 89 E5. */ - buf[wi++] = X64_REX_BASE | X64_REX_W; - buf[wi++] = 0x89; - buf[wi++] = modrm(3u, X64_RSP, X64_RBP); - /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */ - buf[wi++] = X64_REX_BASE | X64_REX_W; - buf[wi++] = 0x81; - buf[wi++] = modrm(3u, 5u, X64_RSP); - buf[wi++] = (u8)frame_size; - buf[wi++] = (u8)(frame_size >> 8); - buf[wi++] = (u8)(frame_size >> 16); - buf[wi++] = (u8)(frame_size >> 24); - - /* sret: mov [rbp + disp32], rdi. */ - if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { - XSlot* s = slot_get(a, a->sret_ptr_slot); - if (s) { - i32 off = -(i32)s->off; - if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow; - buf[wi++] = X64_REX_BASE | X64_REX_W; - buf[wi++] = 0x89; - buf[wi++] = modrm(2u, X64_RDI, X64_RBP); - buf[wi++] = (u8)off; - buf[wi++] = (u8)(off >> 8); - buf[wi++] = (u8)(off >> 16); - buf[wi++] = (u8)(off >> 24); - } - } - - /* Spill callee-saves. */ - for (u32 i = 0; i < cs_used; ++i) { - u32 reg = a->int_pool.order[i]; - i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; - if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow; - buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0)); - buf[wi++] = 0x89; - buf[wi++] = modrm(2u, (reg & 7u), X64_RBP); - buf[wi++] = (u8)off; - buf[wi++] = (u8)(off >> 8); - buf[wi++] = (u8)(off >> 16); - buf[wi++] = (u8)(off >> 24); - } - - if (0) { - overflow: - compiler_panic(t->c, a->loc, - "x64: prologue placeholder overflow (%u of %u bytes)", wi, - X64_PROLOGUE_BYTES); - } - obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf, - X64_PROLOGUE_BYTES); - - /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final - * max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round - * at every call site). */ - for (u32 i = 0; i < a->nalloca_patches; ++i) { - u8 dbuf[4]; - u32 m = a->max_outgoing; - dbuf[0] = (u8)m; - dbuf[1] = (u8)(m >> 8); - dbuf[2] = (u8)(m >> 16); - dbuf[3] = (u8)(m >> 24); - obj_patch(t->obj, a->fd->text_section_id, - a->alloca_patches[i].disp_pos, dbuf, 4); - } - - /* Define the function symbol. */ - u32 end = mc->pos(mc); - obj_symbol_define(t->obj, a->fd->sym, a->fd->text_section_id, - (u64)a->func_start, (u64)(end - a->func_start)); - - mc->cfi_endproc(mc); - a->fd = NULL; -} - -/* ============================================================ - * Registers / frame */ - -static Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { - XImpl* a = impl_of(t); - (void)ty; - if (cls == RC_INT) return xpool_alloc(&a->int_pool); - if (cls == RC_FP) return xpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls); -} - -static void x_free_reg(CGTarget* t, Reg r, RegClass cls) { - XImpl* a = impl_of(t); - XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool; - int rc = xpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free", - (unsigned)r); - } - compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} - -static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) { - XImpl* a = impl_of(t); - if (a->nslots == a->slots_cap) { - u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; - XSlot* nbuf = arena_array(t->c->tu, XSlot, ncap); - if (a->slots) memcpy(nbuf, a->slots, sizeof(XSlot) * a->nslots); - a->slots = nbuf; - a->slots_cap = ncap; - } - u32 size = d->size ? d->size : 8; - u32 align = d->align ? d->align : 1; - u32 next = a->cum_off + size; - u32 mask = align - 1u; - next = (next + mask) & ~mask; - XSlot* s = &a->slots[a->nslots]; - s->off = next; - s->size = size; - s->align = align; - s->kind = d->kind; - a->cum_off = next; - a->nslots++; - return (FrameSlot)(a->nslots); -} - -static XSlot* slot_get(XImpl* a, FrameSlot fs) { - if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; - return &a->slots[fs - 1]; -} - -/* ---- param: store incoming arg(s) into the home slot ---- */ -static void x_param(CGTarget* t, const CGParamDesc* p) { - XImpl* a = impl_of(t); - XSlot* s = slot_get(a, p->slot); - if (!s) compiler_panic(t->c, a->loc, "x64 param: bad slot"); - const ABIArgInfo* ai = p->abi; - - if (ai->kind == ABI_ARG_IGNORE) return; - if (ai->kind == ABI_ARG_INDIRECT) { - /* Incoming pointer to byval copy: load pointer, memcpy into slot. */ - u32 ptr_reg; - if (a->next_param_int < 6) { - ptr_reg = g_int_arg_regs[a->next_param_int++]; - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit_mov_load(t->mc, 8, 0, X64_R11, X64_RBP, (i32)(16 + caller_off)); - ptr_reg = X64_R11; - } - u32 nbytes = s->size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit_mov_load(t->mc, 8, 0, X64_RAX, ptr_reg, (i32)i); - emit_mov_store(t->mc, 8, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); - i += 8; - } - while (i + 4 <= nbytes) { - emit_mov_load(t->mc, 4, 0, X64_RAX, ptr_reg, (i32)i); - emit_mov_store(t->mc, 4, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); - i += 4; - } - while (i + 2 <= nbytes) { - emit_mov_load(t->mc, 2, 0, X64_RAX, ptr_reg, (i32)i); - emit_mov_store(t->mc, 2, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); - i += 2; - } - while (i < nbytes) { - emit_mov_load(t->mc, 1, 0, X64_RAX, ptr_reg, (i32)i); - emit_mov_store(t->mc, 1, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); - i += 1; - } - return; - } - /* DIRECT */ - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 part_off = pt->src_offset; - u32 sz = pt->size; - if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 6) { - u32 reg = g_int_arg_regs[a->next_param_int++]; - emit_mov_store(t->mc, sz, reg, X64_RBP, - -(i32)s->off + (i32)part_off); - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - emit_mov_load(t->mc, sz, 0, X64_RAX, X64_RBP, - (i32)(16 + caller_off)); - emit_mov_store(t->mc, sz, X64_RAX, X64_RBP, - -(i32)s->off + (i32)part_off); - } - } else if (pt->cls == ABI_CLASS_FP) { - if (a->next_param_fp < 8) { - u32 xmm = a->next_param_fp++; - u8 prefix = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP, - -(i32)s->off + (i32)part_off); - } else { - u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - u8 prefix = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(t->mc, prefix, 0x10, X64_XMM0, X64_RBP, - (i32)(16 + caller_off)); - emit_sse_store(t->mc, prefix, 0x11, X64_XMM0, X64_RBP, - -(i32)s->off + (i32)part_off); - } - } else { - compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - x_panic(t, "clobbers"); -} -static void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, - MemAccess ma) { - XImpl* a = impl_of(t); - if (src.kind != OPK_REG) - compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG"); - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - x_store(t, addr, src, ma); - x_free_reg(t, src.v.reg, src.cls); -} - -static void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, - MemAccess ma) { - XImpl* a = impl_of(t); - if (dst.kind != OPK_REG) - compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG"); - Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_LOCAL; - addr.cls = RC_INT; - addr.type = ma.type; - addr.v.frame_slot = slot; - x_load(t, dst, addr, ma); -} - -/* ============================================================ - * Labels / control flow */ - -static Label x_label_new(CGTarget* t) { - return (Label)t->mc->label_new(t->mc); -} -static void x_label_place(CGTarget* t, Label l) { - t->mc->label_place(t->mc, (MCLabel)l); -} - -/* Emit `jmp rel32` (E9 + 4-byte disp) with a label fixup. R_PC32 applied - * at the disp32 site with addend=-4 yields target - end_of_insn. */ -static void emit_jmp_label(MCEmitter* mc, MCLabel l) { - u8 op = 0xE9; - mc->emit_bytes(mc, &op, 1); - emit_u32le(mc, 0); - mc->emit_label_ref(mc, l, R_PC32, 4, -4); -} - -/* Emit `Jcc rel32` (0F 8x + 4-byte disp) with a label fixup. */ -static void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) { - u8 op[2] = {0x0F, (u8)(0x80 | (cc & 0xF))}; - mc->emit_bytes(mc, op, 2); - emit_u32le(mc, 0); - mc->emit_label_ref(mc, l, R_PC32, 4, -4); -} - -static void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); } - -static u32 cmp_to_cc(CmpOp op) { - switch (op) { - case CMP_EQ: return X64_CC_E; - case CMP_NE: return X64_CC_NE; - case CMP_LT_U: return X64_CC_B; - case CMP_LE_U: return X64_CC_BE; - case CMP_GT_U: return X64_CC_A; - case CMP_GE_U: return X64_CC_AE; - case CMP_LT_S: return X64_CC_L; - case CMP_LE_S: return X64_CC_LE; - case CMP_GT_S: return X64_CC_G; - case CMP_GE_S: return X64_CC_GE; - default: return X64_CC_E; - } -} - -static u32 force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) { - if (op.kind == OPK_REG) return op.v.reg & 0xFu; - if (op.kind == OPK_IMM) { - emit_load_imm(t->mc, w, scratch, op.v.imm); - return scratch; - } - compiler_panic(t->c, impl_of(t)->loc, "x64: operand kind %d not REG/IMM", - (int)op.kind); -} - -static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { - int w = type_is_64(a_op.type) ? 1 : 0; - /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the - * cond codes, so IMM-on-LHS still has to materialize. */ - if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) { - if (imm_fits_i8(b_op.v.imm)) { - emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm); - return; - } - if (imm_fits_i32(b_op.v.imm)) { - emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu, - (i32)b_op.v.imm); - return; - } - } - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - u32 rb = force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11); - /* cmp r/m, r — opcode 0x39 (encoded as `cmp ra, rb` ⇒ flags = ra - rb). */ - emit_alu_rr(t->mc, w, 0x39, ra, rb); -} - -static void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, - Label l) { - emit_cmp_ab(t, a, b); - emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l); -} - -static void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { - emit_cmp_ab(t, a, b); - u32 d = dst.v.reg & 0xFu; - emit_setcc(t->mc, cmp_to_cc(op), d); - emit_movzx_r32_r8(t->mc, d, d); -} - -/* ---- structured scopes ---- */ -static CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) { - XImpl* a = impl_of(t); - if (a->nscopes == a->scopes_cap) { - u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; - XScope* nb = arena_array(t->c->tu, XScope, ncap); - if (a->scopes) memcpy(nb, a->scopes, sizeof(XScope) * a->nscopes); - a->scopes = nb; - a->scopes_cap = ncap; - } - XScope* sc = &a->scopes[a->nscopes]; - sc->kind = (u8)d->kind; - sc->has_else = 0; - sc->else_label = 0; - sc->end_label = 0; - sc->break_label = d->break_label; - sc->continue_label = d->continue_label; - - if (d->kind == SCOPE_IF) { - sc->else_label = t->mc->label_new(t->mc); - sc->end_label = t->mc->label_new(t->mc); - int w = type_is_64(d->cond.type) ? 1 : 0; - u32 rc = force_reg_int(t, d->cond, w, X64_RAX); - emit_test_self(t->mc, w, rc); - emit_jcc_label(t->mc, X64_CC_E, sc->else_label); - } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { - /* Bookkeeping only. */ - } else { - compiler_panic(t->c, a->loc, - "x64 scope_begin: kind %d not yet implemented", - (int)d->kind); - } - a->nscopes++; - return (CGScope)a->nscopes; -} - -static void x_scope_else(CGTarget* t, CGScope s) { - XImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) - compiler_panic(t->c, a->loc, "x64 scope_else: bad scope"); - XScope* sc = &a->scopes[s - 1]; - emit_jmp_label(t->mc, sc->end_label); - t->mc->label_place(t->mc, sc->else_label); - sc->has_else = 1; -} - -static void x_scope_end(CGTarget* t, CGScope s) { - XImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) - compiler_panic(t->c, a->loc, "x64 scope_end: bad scope"); - XScope* sc = &a->scopes[s - 1]; - if (sc->kind == SCOPE_IF) { - if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label); - t->mc->label_place(t->mc, sc->end_label); - } -} - -static void x_break_to(CGTarget* t, CGScope s) { - XImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) - compiler_panic(t->c, a->loc, "x64 break_to: bad scope"); - x_jump(t, a->scopes[s - 1].break_label); -} -static void x_continue_to(CGTarget* t, CGScope s) { - XImpl* a = impl_of(t); - if (s == CG_SCOPE_NONE || s > a->nscopes) - compiler_panic(t->c, a->loc, "x64 continue_to: bad scope"); - x_jump(t, a->scopes[s - 1].continue_label); -} - -/* ============================================================ - * Data movement */ - -static void x_load_imm(CGTarget* t, Operand dst, i64 imm) { - int w = type_is_64(dst.type) ? 1 : 0; - emit_load_imm(t->mc, w, dst.v.reg & 0xFu, imm); -} - -/* Materialize an FP literal: stash bytes in .rodata as a fresh local - * symbol, then load via RIP-relative movss/movsd. */ -static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) { - XImpl* a = impl_of(t); - if (dst.cls != RC_FP) - compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1"); - - Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); - ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); - - u32 cur_section = t->mc->section_id; - t->mc->set_section(t->mc, ro); - u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); - t->mc->emit_bytes(t->mc, cb.bytes, cb.size); - - char namebuf[64]; - static u32 lit_seq = 0; - int len = 0; - const char* prefix = ".LCFP_x64_"; - for (; prefix[len]; ++len) namebuf[len] = prefix[len]; - u32 v = lit_seq++; - char tmp[16]; - int tn = 0; - if (v == 0) - tmp[tn++] = '0'; - else - while (v) { - tmp[tn++] = '0' + (char)(v % 10); - v /= 10; - } - for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; - namebuf[len] = 0; - - Sym sname = pool_intern_cstr(t->c->global, namebuf); - ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, - (u64)cb.size); - t->mc->set_section(t->mc, cur_section); - - /* movs{s,d} xmm, [rip+disp32]. Reloc R_PC32 with addend=-4 at the - * disp32 site so the linker resolves to target relative to end-of-insn. */ - u8 prefix2 = (cb.size == 8) ? 0xF2 : 0xF3; - u32 dst_x = dst.v.reg & 0xFu; - t->mc->emit_bytes(t->mc, &prefix2, 1); - emit_rex(t->mc, 0, dst_x, 0, 0); - u8 op[2] = {0x0F, 0x10}; - t->mc->emit_bytes(t->mc, op, 2); - u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */ - t->mc->emit_bytes(t->mc, &mr, 1); - u32 disp_pos = t->mc->pos(t->mc); - emit_u32le(t->mc, 0); - t->mc->emit_reloc_at(t->mc, cur_section, disp_pos, R_PC32, sym, -4, 1, 0); -} - -static void x_copy(CGTarget* t, Operand dst, Operand src) { - if (dst.cls == RC_FP || src.cls == RC_FP) { - u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; - emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu); - return; - } - int w = type_is_64(dst.type) ? 1 : 0; - emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu); -} - -static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) { - XImpl* a = impl_of(t); - if (addr.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, addr.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot"); - *out_off = -(i32)s->off; - return X64_RBP; - } - if (addr.kind == OPK_INDIRECT) { - *out_off = addr.v.ind.ofs; - return addr.v.ind.base & 0xFu; - } - compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported", - (int)addr.kind); -} - -static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) { - return obj_symbol_extern_via_got(t->c, t->obj, sym); -} - -/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or - * static-link extern symbols, emit `lea rd, [rip + disp32]` with - * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time - * — the PLT routing only fires when the linker actually needs the - * trampoline, i.e. function calls into a DSO). For undef externs in - * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot - * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by - * patching a single slot rather than touching .text. - * - * Addend -4 because the PC is end-of-instruction. When routing - * through the GOT we omit any extra addend on the reloc (most loaders - * disallow nonzero addends on GOT-load fixups); a follow-up `add` / - * `lea` would have to add it after the load if the codegen needed - * `&sym + nonzero`. In practice the caller only ever passes - * addend=0 for global references that go through the GOT path. */ -static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym, - i64 addend) { - if (x64_use_got_for_sym(t, sym)) { - /* mov rd, [rip + disp32] */ - emit_rex(t->mc, 1, dst_reg, 0, 0); - u8 op = 0x8B; - t->mc->emit_bytes(t->mc, &op, 1); - u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ - t->mc->emit_bytes(t->mc, &mr, 1); - u32 disp_pos = t->mc->pos(t->mc); - emit_u32le(t->mc, 0); - t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, - R_X64_REX_GOTPCRELX, sym, -4, 1, 0); - /* Apply any nonzero addend by adjusting the loaded value. */ - if (addend) { - i32 a = (i32)addend; - if (a >= -128 && a <= 127) { - /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */ - emit_rex(t->mc, 1, 0, 0, dst_reg); - u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))}; - t->mc->emit_bytes(t->mc, add_op, 2); - u8 ib = (u8)a; - t->mc->emit_bytes(t->mc, &ib, 1); - } else { - /* add r/m64, imm32 (REX.W + 0x81 /0 id) */ - emit_rex(t->mc, 1, 0, 0, dst_reg); - u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))}; - t->mc->emit_bytes(t->mc, add_op, 2); - emit_u32le(t->mc, (u32)a); - } - } - return; - } - emit_rex(t->mc, 1, dst_reg, 0, 0); - u8 op = 0x8D; - t->mc->emit_bytes(t->mc, &op, 1); - u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ - t->mc->emit_bytes(t->mc, &mr, 1); - u32 disp_pos = t->mc->pos(t->mc); - emit_u32le(t->mc, 0); - t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, R_X64_PLT32, sym, - addend - 4, 1, 0); -} - -static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - - if (addr.kind == OPK_GLOBAL) { - /* Materialize &sym into R11, then load from [r11]. */ - emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend); - if (dst.cls == RC_FP) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, X64_R11, 0); - } else { - int signed_ = type_is_signed(ma.type ? ma.type : addr.type); - emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, X64_R11, 0); - } - return; - } - - i32 off; - u32 base = addr_base(t, addr, &off); - if (dst.cls == RC_FP) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off); - } else { - int signed_ = type_is_signed(ma.type ? ma.type : addr.type); - emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off); - } -} - -static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { - u32 sz = ma.size ? ma.size : type_byte_size(addr.type); - - if (addr.kind == OPK_GLOBAL) { - /* Materialize &sym into R11, then store via [r11]. The IMM source - * branch below uses RAX as a scratch for the value, so R11 stays - * untouched between the LEA and the store. */ - emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend); - if (src.kind == OPK_IMM) { - int w = (sz == 8) ? 1 : 0; - emit_load_imm(t->mc, w, X64_RAX, src.v.imm); - emit_mov_store(t->mc, sz, X64_RAX, X64_R11, 0); - return; - } - if (src.cls == RC_FP) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, X64_R11, 0); - return; - } - emit_mov_store(t->mc, sz, src.v.reg & 0xFu, X64_R11, 0); - return; - } - - i32 off; - u32 base = addr_base(t, addr, &off); - - if (src.kind == OPK_IMM) { - int w = (sz == 8) ? 1 : 0; - emit_load_imm(t->mc, w, X64_RAX, src.v.imm); - emit_mov_store(t->mc, sz, X64_RAX, base, off); - return; - } - if (src.cls == RC_FP) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off); - return; - } - emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off); -} - -static void x_addr_of(CGTarget* t, Operand dst, Operand lv) { - XImpl* a = impl_of(t); - if (lv.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, lv.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 addr_of: bad slot"); - emit_lea(t->mc, dst.v.reg & 0xFu, X64_RBP, -(i32)s->off); - return; - } - if (lv.kind == OPK_INDIRECT) { - emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs); - return; - } - if (lv.kind == OPK_GLOBAL) { - emit_global_lea(t, dst.v.reg & 0xFu, lv.v.global.sym, lv.v.global.addend); - return; - } - x_panic(t, "addr_of: kind unsupported"); -} - -/* x86_64 TLS Local-Exec materialization. - * mov rd, fs:0 ; read thread pointer (FS base + 0) - * lea rd, [rd + sym@tpoff] ; add TP-relative offset - * The disp32 of the LEA carries an R_X64_TPOFF32 reloc; the linker fills - * in the signed TP-relative offset (negative under variant II — TLS image - * sits below the TCB that FS points at). */ -static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { - MCEmitter* mc = t->mc; - u32 sec = mc->section_id; - u32 rd = dst.v.reg & 0xFu; - - /* mov rd, qword ptr fs:[0] - * 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */ - u8 fs_prefix = 0x64; - mc->emit_bytes(mc, &fs_prefix, 1); - emit_rex(mc, 1, rd, 0, 0); - u8 op_mov = 0x8B; - mc->emit_bytes(mc, &op_mov, 1); - u8 mr1 = modrm(0u, rd & 7u, 4u); - mc->emit_bytes(mc, &mr1, 1); - u8 s1 = sib(0u, 4u, 5u); - mc->emit_bytes(mc, &s1, 1); - emit_u32le(mc, 0); - - /* lea rd, [rd + disp32] - * [REX.W|REX.R|REX.B] 8D mod=10/reg=rd/rm=rd [SIB if rd&7==4] disp32 */ - emit_rex(mc, 1, rd, 0, rd); - u8 op_lea = 0x8D; - mc->emit_bytes(mc, &op_lea, 1); - u32 disp_pos; - if ((rd & 7u) == 4u) { - u8 mr2 = modrm(2u, rd & 7u, 4u); - mc->emit_bytes(mc, &mr2, 1); - u8 s2 = sib(0u, 4u, rd & 7u); - mc->emit_bytes(mc, &s2, 1); - disp_pos = mc->pos(mc); - emit_u32le(mc, 0); - } else { - u8 mr2 = modrm(2u, rd & 7u, rd & 7u); - mc->emit_bytes(mc, &mr2, 1); - disp_pos = mc->pos(mc); - emit_u32le(mc, 0); - } - mc->emit_reloc_at(mc, sec, disp_pos, R_X64_TPOFF32, sym, addend, 0, 0); -} - -/* Aggregate ops — small unrolled memcpy/memset. */ -static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { - if (op.kind == OPK_REG) return op.v.reg & 0xFu; - if (op.kind == OPK_LOCAL) { - XImpl* a = impl_of(t); - XSlot* s = slot_get(a, op.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 agg: bad slot"); - emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off); - return scratch; - } - compiler_panic(t->c, impl_of(t)->loc, - "x64 agg: address kind %d unsupported", (int)op.kind); -} - -static void x_copy_bytes(CGTarget* t, Operand da, Operand sa, - AggregateAccess g) { - u32 dr = agg_addr_reg(t, da, X64_R11); - u32 sr = agg_addr_reg(t, sa, (dr == X64_RAX) ? X64_RCX : X64_RAX); - u32 nbytes = g.size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit_mov_load(t->mc, 8, 0, X64_RDX, sr, (i32)i); - emit_mov_store(t->mc, 8, X64_RDX, dr, (i32)i); - i += 8; - } - while (i + 4 <= nbytes) { - emit_mov_load(t->mc, 4, 0, X64_RDX, sr, (i32)i); - emit_mov_store(t->mc, 4, X64_RDX, dr, (i32)i); - i += 4; - } - while (i + 2 <= nbytes) { - emit_mov_load(t->mc, 2, 0, X64_RDX, sr, (i32)i); - emit_mov_store(t->mc, 2, X64_RDX, dr, (i32)i); - i += 2; - } - while (i < nbytes) { - emit_mov_load(t->mc, 1, 0, X64_RDX, sr, (i32)i); - emit_mov_store(t->mc, 1, X64_RDX, dr, (i32)i); - i += 1; - } -} - -static void x_set_bytes(CGTarget* t, Operand da, Operand bv, - AggregateAccess g) { - u32 dr = agg_addr_reg(t, da, X64_R11); - if (bv.kind != OPK_IMM) - compiler_panic(t->c, impl_of(t)->loc, - "x64 set_bytes: non-IMM byte not yet supported"); - u8 b = (u8)(bv.v.imm & 0xff); - u64 b64 = b; - b64 |= b64 << 8; - b64 |= b64 << 16; - b64 |= b64 << 32; - emit_load_imm(t->mc, 1, X64_RAX, (i64)b64); - u32 nbytes = g.size; - u32 i = 0; - while (i + 8 <= nbytes) { - emit_mov_store(t->mc, 8, X64_RAX, dr, (i32)i); - i += 8; - } - while (i + 4 <= nbytes) { - emit_mov_store(t->mc, 4, X64_RAX, dr, (i32)i); - i += 4; - } - while (i + 2 <= nbytes) { - emit_mov_store(t->mc, 2, X64_RAX, dr, (i32)i); - i += 2; - } - while (i < nbytes) { - emit_mov_store(t->mc, 1, X64_RAX, dr, (i32)i); - i += 1; - } -} - -/* Load the storage unit, then extract the field by shifting it to the - * top of the register and shifting back. SAR for signed, SHR for unsigned. */ -static void x_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, - BitFieldAccess bf) { - u32 base = agg_addr_reg(t, record_addr, X64_R11); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - int w = (storage_bytes == 8u) ? 1 : 0; - u32 reg_size = w ? 64u : 32u; - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - u32 rd = dst.v.reg & 0xFu; - - emit_mov_load(t->mc, storage_bytes, 0, rd, base, (i32)bf.storage_offset); - u8 left = (u8)(reg_size - lsb - width); - u8 right = (u8)(reg_size - width); - if (left) emit_shift_imm(t->mc, w, 4u, rd, left); - if (right) emit_shift_imm(t->mc, w, bf.signed_ ? 7u : 5u, rd, right); -} - -/* Read-modify-write: clear the field bits in the storage unit via AND ~mask, - * mask/shift the source into place, OR it in, write back. RAX holds the - * storage word; RCX is the staged value; RDX holds the source-side mask when - * needed. Avoids touching the base register. */ -static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src, - BitFieldAccess bf) { - u32 base = agg_addr_reg(t, record_addr, X64_R11); - u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; - int w = (storage_bytes == 8u) ? 1 : 0; - u32 lsb = bf.bit_offset; - u32 width = bf.bit_width ? bf.bit_width : 1u; - u64 ones = (width >= 64u) ? ~(u64)0 : (((u64)1 << width) - 1u); - u64 mask = ones << lsb; - - emit_mov_load(t->mc, storage_bytes, 0, X64_RAX, base, (i32)bf.storage_offset); - emit_load_imm(t->mc, w, X64_RCX, (i64)~mask); - emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */ - - if (src.kind == OPK_IMM) { - u64 v = ((u64)src.v.imm & ones) << lsb; - emit_load_imm(t->mc, w, X64_RCX, (i64)v); - } else if (src.kind == OPK_REG) { - emit_mov_rr(t->mc, w, X64_RCX, src.v.reg & 0xFu); - emit_load_imm(t->mc, w, X64_RDX, (i64)ones); - emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */ - if (lsb) emit_shift_imm(t->mc, w, 4u, X64_RCX, (u8)lsb); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "x64 bitfield_store: src kind %d unsupported", - (int)src.kind); - } - emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */ - emit_mov_store(t->mc, storage_bytes, X64_RAX, base, (i32)bf.storage_offset); -} - -/* ============================================================ - * Arithmetic */ - -static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, - Operand b_op) { - MCEmitter* mc = t->mc; - - /* FP binops. */ - if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { - u32 rd = dst.v.reg & 0xFu; - u32 ra = a_op.v.reg & 0xFu; - u32 rb = b_op.v.reg & 0xFu; - u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; - if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra); - u8 opcode; - switch (op) { - case BO_FADD: opcode = 0x58; break; - case BO_FSUB: opcode = 0x5C; break; - case BO_FMUL: opcode = 0x59; break; - case BO_FDIV: opcode = 0x5E; break; - default: opcode = 0x58; break; - } - emit_sse_rr(mc, prefix2, opcode, rd, rb); - return; - } - - int w = type_is_64(dst.type) ? 1 : 0; - u32 rd = dst.v.reg & 0xFu; - - /* Division: idiv/div uses rax/rdx implicitly. Route divisor through r11 - * if it would otherwise be rax/rdx. */ - if (op == BO_SDIV || op == BO_UDIV || op == BO_SREM || op == BO_UREM) { - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra); - u32 rb; - if (b_op.kind == OPK_REG) { - rb = b_op.v.reg & 0xFu; - if (rb == X64_RAX || rb == X64_RDX) { - emit_mov_rr(mc, w, X64_R11, rb); - rb = X64_R11; - } - } else if (b_op.kind == OPK_IMM) { - emit_load_imm(mc, w, X64_R11, b_op.v.imm); - rb = X64_R11; - } else { - compiler_panic(t->c, impl_of(t)->loc, - "x64 div: divisor kind %d unsupported", (int)b_op.kind); - } - if (op == BO_SDIV || op == BO_SREM) { - emit_cqo_or_cdq(mc, w); - emit_f7_rm(mc, w, 7u, rb); /* idiv */ - } else { - emit_xor_self(mc, w, X64_RDX); - emit_f7_rm(mc, w, 6u, rb); /* div */ - } - u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX; - if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg); - return; - } - - /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1 - * /sub ib). Use the imm form when b is OPK_IMM and skip materializing - * into cl. */ - if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) { - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u); - if (b_op.kind == OPK_IMM) { - u32 width = w ? 64u : 32u; - emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u))); - return; - } - if (b_op.kind == OPK_REG) { - u32 rb = b_op.v.reg & 0xFu; - if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb); - } else { - compiler_panic(t->c, impl_of(t)->loc, - "x64 shift: count kind %d unsupported", (int)b_op.kind); - } - emit_shift_cl(mc, w, sub, rd); - return; - } - - /* For commutative ops, canonicalize IMM to the RHS so the imm-form - * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS - * still materializes. */ - switch (op) { - case BO_IADD: - case BO_AND: - case BO_OR: - case BO_XOR: - case BO_IMUL: { - if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { - Operand t_op = a_op; a_op = b_op; b_op = t_op; - } - break; - } - default: break; - } - - /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding - * reads-and-writes a single reg — copy ra → dst first, then `dst OP= - * imm`. For IMUL the imm form is three-operand (`dst = src * imm`) - * and reads from `ra` directly without the prep copy. */ - if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG && - (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR || - op == BO_XOR || op == BO_IMUL)) { - i64 imm = b_op.v.imm; - u32 ra = a_op.v.reg & 0xFu; - if (op == BO_IMUL) { - if (imm_fits_i8(imm)) { - emit_imul_imm8(mc, w, rd, ra, (i8)imm); - return; - } - if (imm_fits_i32(imm)) { - emit_imul_imm32(mc, w, rd, ra, (i32)imm); - return; - } - } else { - u32 sub; - switch (op) { - case BO_IADD: sub = 0u; break; - case BO_OR: sub = 1u; break; - case BO_AND: sub = 4u; break; - case BO_ISUB: sub = 5u; break; - case BO_XOR: sub = 6u; break; - default: sub = 0u; break; /* unreachable */ - } - if (imm_fits_i8(imm)) { - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - emit_alu_imm8(mc, w, sub, rd, (i8)imm); - return; - } - if (imm_fits_i32(imm)) { - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - emit_alu_imm32(mc, w, sub, rd, (i32)imm); - return; - } - } - /* Fall through to materialize for >32-bit literals. */ - } - - /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */ - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - u32 rb = force_reg_int(t, b_op, w, X64_R11); - switch (op) { - case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break; - case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break; - case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break; - case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break; - case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break; - case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break; - default: - compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl", - (int)op); - } -} - -static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { - MCEmitter* mc = t->mc; - int w = type_is_64(dst.type) ? 1 : 0; - u32 rd = dst.v.reg & 0xFu; - /* IMM operand is legal per the CGTarget contract (arch.h); materialize - * into a scratch register when not already a register. cg folds - * literal unops upstream (cg_fold_unop), so this path is reached only - * when opt's emit hands us an unfolded literal. */ - u32 ra = force_reg_int(t, a_op, w, X64_R11); - switch (op) { - case UO_NEG: - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - emit_f7_rm(mc, w, 3u, rd); - return; - case UO_BNOT: - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - emit_f7_rm(mc, w, 2u, rd); - return; - case UO_NOT: - /* !x → (x == 0) materialized as 0/1 in dst. */ - emit_test_self(mc, w, ra); - emit_setcc(mc, X64_CC_E, rd); - emit_movzx_r32_r8(mc, rd, rd); - return; - default: - compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl", - (int)op); - } -} - -static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 rd = dst.v.reg & 0xFu; - u32 rs = src.v.reg & 0xFu; - switch (k) { - case CV_SEXT: { - u32 src_bytes = type_byte_size(src.type); - int w = type_is_64(dst.type) ? 1 : 0; - emit_extend_rr(mc, w, /*signed=*/1, src_bytes, rd, rs); - return; - } - case CV_ZEXT: { - u32 src_bytes = type_byte_size(src.type); - int w = type_is_64(dst.type) ? 1 : 0; - emit_extend_rr(mc, w, /*signed=*/0, src_bytes, rd, rs); - return; - } - case CV_TRUNC: { - /* In-reg truncation: `mov r32, r32` clears high 32. Narrower stores - * select width themselves. */ - emit_mov_rr(mc, 0, rd, rs); - return; - } - case CV_ITOF_S: - case CV_ITOF_U: { - int w_src = type_is_64(src.type) ? 1 : 0; - u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; - if (k == CV_ITOF_U && w_src == 1) { - compiler_panic(t->c, a->loc, - "x64 convert: u64→fp not yet implemented"); - } - if (k == CV_ITOF_U) { - /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */ - emit_extend_rr(mc, 0, 0, 4, X64_R11, rs); - rs = X64_R11; - w_src = 1; - } - emit_sse_rr_w(mc, prefix2, 0x2A, w_src, rd, rs); - return; - } - case CV_FTOI_S: - case CV_FTOI_U: { - int w_dst = type_is_64(dst.type) ? 1 : 0; - u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3; - if (k == CV_FTOI_U && w_dst == 1) { - compiler_panic(t->c, a->loc, - "x64 convert: fp→u64 not yet implemented"); - } - emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs); - return; - } - case CV_FEXT: - emit_sse_rr(mc, 0xF3, 0x5A, rd, rs); - return; - case CV_FTRUNC: - emit_sse_rr(mc, 0xF2, 0x5A, rd, rs); - return; - case CV_BITCAST: { - /* movd/movq between xmm and GPR. */ - if (src.cls == RC_INT && dst.cls == RC_FP) { - int w = type_is_64(dst.type) ? 1 : 0; - emit_sse_rr_w(mc, 0x66, 0x6E, w, rd, rs); - } else if (src.cls == RC_FP && dst.cls == RC_INT) { - int w = type_is_64(src.type) ? 1 : 0; - emit_sse_rr_w(mc, 0x66, 0x7E, w, rs, rd); - } else { - compiler_panic(t->c, a->loc, - "x64 convert BITCAST: same-class not supported"); - } - return; - } - default: - compiler_panic(t->c, a->loc, "x64 convert kind %d unimpl", (int)k); - } -} - -/* ============================================================ - * Calls / return */ - -static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, - u32* next_fp, u32* stack_off) { - XImpl* a = impl_of(t); - /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */ - ABIArgInfo va_ai; - ABIArgPart va_pt; - const ABIArgInfo* ai = av->abi; - if (!ai) { - u32 sz = type_byte_size(av->type); - memset(&va_ai, 0, sizeof va_ai); - memset(&va_pt, 0, sizeof va_pt); - va_ai.kind = ABI_ARG_DIRECT; - va_ai.parts = &va_pt; - va_ai.nparts = 1; - va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; - va_pt.size = sz; - va_pt.align = sz; - va_pt.src_offset = 0; - ai = &va_ai; - } - if (ai->kind == ABI_ARG_IGNORE) return; - if (ai->kind == ABI_ARG_INDIRECT) { - /* Pass &av->storage_local in the next int arg reg. */ - u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX; - int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6); - /* Above is awkward — recompute clearly: */ - if (*next_int >= 6 + (a->has_sret ? 0 : 0)) { - /* (next_int was already bumped past 6) — stack route */ - } - to_stack = (dst_reg == X64_RAX); - if (av->storage.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot"); - emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off); - } else if (av->storage.kind == OPK_INDIRECT) { - emit_lea(t->mc, dst_reg, av->storage.v.ind.base & 0xFu, - av->storage.v.ind.ofs); - } else { - compiler_panic(t->c, a->loc, - "x64 call: INDIRECT arg storage kind %d unsupported", - (int)av->storage.kind); - } - if (to_stack) { - emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); - *stack_off += 8; - } - return; - } - - for (u16 i = 0; i < ai->nparts; ++i) { - const ABIArgPart* pt = &ai->parts[i]; - u32 sz = pt->size; - if (pt->cls == ABI_CLASS_INT) { - int to_stack = (*next_int >= 6); - u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++]; - switch (av->storage.kind) { - case OPK_IMM: { - int w = (sz == 8) ? 1 : 0; - emit_load_imm(t->mc, w, dst_reg, av->storage.v.imm); - break; - } - case OPK_REG: { - int w = (sz == 8) ? 1 : 0; - u32 sr = av->storage.v.reg & 0xFu; - if (sr != dst_reg) emit_mov_rr(t->mc, w, dst_reg, sr); - break; - } - case OPK_LOCAL: { - XSlot* s = slot_get(a, av->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 call: bad arg slot"); - emit_mov_load(t->mc, sz, 0, dst_reg, X64_RBP, - -(i32)s->off + (i32)pt->src_offset); - break; - } - case OPK_INDIRECT: { - /* cg holds INDIRECT base regs in {RBX, R10, R12..R15}, disjoint - * from arg regs (RDI/RSI/RDX/RCX/R8/R9) and the RAX scratch, so - * the base survives across the part loop. */ - emit_mov_load(t->mc, sz, 0, dst_reg, av->storage.v.ind.base & 0xFu, - av->storage.v.ind.ofs + (i32)pt->src_offset); - break; - } - default: - compiler_panic(t->c, a->loc, - "x64 call: arg storage kind %d unsupported", - (int)av->storage.kind); - } - if (to_stack) { - emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); - *stack_off += 8; - } - } else if (pt->cls == ABI_CLASS_FP) { - int to_stack = (*next_fp >= 8); - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - if (!to_stack) { - u32 dst_x = (*next_fp)++; - if (av->storage.kind == OPK_REG) { - u32 sx = av->storage.v.reg & 0xFu; - if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx); - } else if (av->storage.kind == OPK_INDIRECT) { - emit_sse_load(t->mc, prefix2, 0x10, dst_x, - av->storage.v.ind.base & 0xFu, - av->storage.v.ind.ofs + (i32)pt->src_offset); - } else { - compiler_panic(t->c, a->loc, - "x64 call: FP arg storage kind %d unsupported", - (int)av->storage.kind); - } - } else { - if (av->storage.kind == OPK_REG) { - emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu, - X64_RSP, (i32)*stack_off); - } else if (av->storage.kind == OPK_INDIRECT) { - /* Load through xmm15 (scratch — last in g_fp_order so cg won't - * have it live mid-call) then store. */ - emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, - av->storage.v.ind.base & 0xFu, - av->storage.v.ind.ofs + (i32)pt->src_offset); - emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP, - (i32)*stack_off); - } else { - compiler_panic(t->c, a->loc, - "x64 call: FP stack-arg storage kind %d unsupported", - (int)av->storage.kind); - } - *stack_off += 8; - } - } else { - compiler_panic(t->c, a->loc, "x64 call: ABI class %d unimpl", - (int)pt->cls); - } - } -} - -static void x_call(CGTarget* t, const CGCallDesc* d) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - u32 next_int = 0, next_fp = 0, stack_off = 0; - - /* sret: caller puts destination pointer in rdi. */ - if (d->abi && d->abi->has_sret) { - if (d->ret.storage.kind != OPK_LOCAL) { - compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL"); - } - XSlot* s = slot_get(a, d->ret.storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot"); - emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off); - next_int = 1; - } - for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); - } - u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) a->max_outgoing = needed; - - /* Variadic calls: AL = number of XMM regs used. */ - if (d->abi && d->abi->variadic) { - emit_load_imm(mc, 0, X64_RAX, (i64)next_fp); - } - - if (d->callee.kind == OPK_GLOBAL) { - /* call rel32: E8 + disp32 + R_X64_PLT32. */ - u8 op = 0xE8; - mc->emit_bytes(mc, &op, 1); - u32 disp_pos = mc->pos(mc); - emit_u32le(mc, 0); - mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32, - d->callee.v.global.sym, - d->callee.v.global.addend - 4, 1, 0); - } else if (d->callee.kind == OPK_REG) { - u32 r = d->callee.v.reg & 0xFu; - emit_rex(mc, 0, 0, 0, r); - u8 buf[2] = {0xFF, modrm(3u, 2u, r)}; - mc->emit_bytes(mc, buf, 2); - } else { - compiler_panic(t->c, a->loc, "x64 call: callee kind %d unsupported", - (int)d->callee.kind); - } - - /* Receive return value. */ - const ABIArgInfo* ri = &d->abi->ret; - if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return; - if (ri->nparts == 0) return; - - Operand rs = d->ret.storage; - u32 next_int_ret = 0, next_fp_ret = 0; - static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX}; - for (u16 i = 0; i < ri->nparts; ++i) { - const ABIArgPart* p = &ri->parts[i]; - u32 src_reg; - if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++]; - else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++); - else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl", - (int)p->cls); - - if (rs.kind == OPK_REG) { - if (ri->nparts != 1) { - compiler_panic(t->c, a->loc, - "x64 call: REG ret_storage with %u parts", - (unsigned)ri->nparts); - } - if (p->cls == ABI_CLASS_INT) { - int w = (p->size == 8) ? 1 : 0; - u32 dr = rs.v.reg & 0xFu; - if (dr != src_reg) emit_mov_rr(mc, w, dr, src_reg); - } else { - u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; - u32 dr = rs.v.reg & 0xFu; - if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg); - } - } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { - u32 base_reg; - i32 base_off; - if (rs.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot"); - base_reg = X64_RBP; - base_off = -(i32)s->off; - } else { - base_reg = rs.v.ind.base & 0xFu; - base_off = rs.v.ind.ofs; - } - i32 off = base_off + (i32)p->src_offset; - if (p->cls == ABI_CLASS_INT) { - emit_mov_store(mc, p->size, src_reg, base_reg, off); - } else { - u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; - emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off); - } - } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { - /* void ret placeholder — nothing to do. */ - } else { - compiler_panic(t->c, a->loc, - "x64 call: ret_storage kind %d unsupported", - (int)rs.kind); - } - } -} - -static void x_ret(CGTarget* t, const CGABIValue* val) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - - if (val) { - const ABIArgInfo* ri = val->abi; - if (ri && ri->kind == ABI_ARG_INDIRECT) { - /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */ - u32 src_base; - i32 src_base_off; - u32 nbytes; - if (val->storage.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot"); - src_base = X64_RBP; - src_base_off = -(i32)s->off; - nbytes = s->size; - } else if (val->storage.kind == OPK_INDIRECT) { - src_base = val->storage.v.ind.base & 0xFu; - src_base_off = val->storage.v.ind.ofs; - nbytes = val->size; - if (!nbytes) { - compiler_panic(t->c, a->loc, - "x64 ret indirect: missing aggregate size"); - } - } else { - compiler_panic(t->c, a->loc, - "x64 ret indirect: storage kind %d unsupported", - (int)val->storage.kind); - } - if (a->sret_ptr_slot != FRAME_SLOT_NONE) { - XSlot* sp = slot_get(a, a->sret_ptr_slot); - if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off); - } - u32 i = 0; - while (i + 8 <= nbytes) { - emit_mov_load(mc, 8, 0, X64_RAX, src_base, src_base_off + (i32)i); - emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i); - i += 8; - } - while (i + 4 <= nbytes) { - emit_mov_load(mc, 4, 0, X64_RAX, src_base, src_base_off + (i32)i); - emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i); - i += 4; - } - while (i + 2 <= nbytes) { - emit_mov_load(mc, 2, 0, X64_RAX, src_base, src_base_off + (i32)i); - emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i); - i += 2; - } - while (i < nbytes) { - emit_mov_load(mc, 1, 0, X64_RAX, src_base, src_base_off + (i32)i); - emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i); - i += 1; - } - /* Convention: return sret pointer in rax. */ - emit_mov_rr(mc, 1, X64_RAX, X64_RDI); - } else if (val->storage.kind == OPK_REG) { - if (val->storage.cls == RC_FP) { - u8 prefix2 = type_is_fp_double(val->storage.type) ? 0xF2 : 0xF3; - u32 sr = val->storage.v.reg & 0xFu; - if (sr != X64_XMM0) emit_sse_rr(mc, prefix2, 0x10, X64_XMM0, sr); - } else { - int w = type_is_64(val->storage.type) ? 1 : 0; - u32 sr = val->storage.v.reg & 0xFu; - if (sr != X64_RAX) emit_mov_rr(mc, w, X64_RAX, sr); - } - } else if (val->storage.kind == OPK_IMM) { - int w = type_is_64(val->storage.type) ? 1 : 0; - emit_load_imm(mc, w, X64_RAX, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL || - val->storage.kind == OPK_INDIRECT) { - /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */ - u32 base_reg; - i32 base_off; - if (val->storage.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot"); - base_reg = X64_RBP; - base_off = -(i32)s->off; - } else { - base_reg = val->storage.v.ind.base & 0xFu; - base_off = val->storage.v.ind.ofs; - } - const ABIArgInfo* ri2 = val->abi; - u32 next_int_ret = 0, next_fp_ret = 0; - static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX}; - for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { - const ABIArgPart* pt = &ri2->parts[i]; - i32 off = base_off + (i32)pt->src_offset; - if (pt->cls == ABI_CLASS_INT) { - emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++], - base_reg, off); - } else if (pt->cls == ABI_CLASS_FP) { - u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3; - emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++), - base_reg, off); - } else { - compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl", - (int)pt->cls); - } - } - } - } - emit_jmp_label(mc, a->epilogue_label); -} - -/* ============================================================ - * Alloca / VLA. - * - * Layout (low → high addresses, after a `sub rsp, aligned_size`): - * [rsp + 0, +max_outgoing): outgoing-arg area - * [rsp + max_outgoing, +max_outgoing +aligned_size): newly allocated block - * - * max_outgoing is only known at func_end (it is the max across all - * x_call sites in the function), so each alloca emits a placeholder - * `lea dst, [rsp + 0]` whose 4-byte disp is patched at func_end. The - * epilogue restores rsp via `leave` (mov rsp, rbp; pop rbp), so no - * extra dance is needed when alloca is present. */ - -static void emit_lea_rsp_disp32(MCEmitter* mc, u32 dst, u32* out_disp_pos) { - /* Force the disp32 form (mod=10, rm=SIB, base=rsp, no index, scale=0) - * regardless of the displacement value so func_end has a fixed-width - * field to patch. 8 bytes: REX.W [+R] | 0x8D | ModRM | SIB | disp32. */ - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, 1, dst, 0, X64_RSP); - u8 op = 0x8D; - mc->emit_bytes(mc, &op, 1); - u8 mr = modrm(2u, dst & 7u, 4u); - mc->emit_bytes(mc, &mr, 1); - u8 s = sib(0, 4u, X64_RSP); - mc->emit_bytes(mc, &s, 1); - *out_disp_pos = mc->pos(mc); - emit_u32le(mc, 0); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void x_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - if (d.kind != OPK_REG) - compiler_panic(t->c, a->loc, "x64 alloca: dst must be REG"); - if (align > 16) { - compiler_panic(t->c, a->loc, - "x64 alloca: align %u > 16 not yet supported", align); - } - - if (sz.kind == OPK_IMM) { - i64 v = sz.v.imm; - if (v < 0) compiler_panic(t->c, a->loc, "x64 alloca: negative size"); - u64 aligned = ((u64)v + 15u) & ~(u64)15u; - if (aligned == 0) aligned = 16; - /* sub rsp, imm32 : REX.W 0x81 /5 imm32 (7 bytes). */ - emit_rex(mc, 1, 0, 0, X64_RSP); - u8 buf[2] = {0x81, modrm(3u, 5u, X64_RSP)}; - mc->emit_bytes(mc, buf, 2); - emit_u32le(mc, (u32)aligned); - } else if (sz.kind == OPK_REG) { - u32 sz_reg = sz.v.reg & 0xFu; - /* rax = (sz_reg + 15) & ~15 */ - emit_lea(mc, X64_RAX, sz_reg, 15); - /* and rax, -16 : REX.W 0x83 /4 imm8(0xF0). */ - emit_rex(mc, 1, 0, 0, X64_RAX); - u8 abuf[3] = {0x83, modrm(3u, 4u, X64_RAX), 0xF0}; - mc->emit_bytes(mc, abuf, 3); - /* sub rsp, rax */ - emit_alu_rr(mc, 1, 0x29, X64_RSP, X64_RAX); - } else { - compiler_panic(t->c, a->loc, "x64 alloca: size kind %d unsupported", - (int)sz.kind); - } - - /* lea dst, [rsp + max_outgoing] — placeholder, patched at func_end. */ - if (a->nalloca_patches == a->alloca_patches_cap) { - u32 ncap = a->alloca_patches_cap ? a->alloca_patches_cap * 2u : 4u; - XAllocaPatch* nb = arena_array(t->c->tu, XAllocaPatch, ncap); - if (a->alloca_patches) - memcpy(nb, a->alloca_patches, sizeof(XAllocaPatch) * a->nalloca_patches); - a->alloca_patches = nb; - a->alloca_patches_cap = ncap; - } - u32 disp_pos; - emit_lea_rsp_disp32(mc, d.v.reg & 0xFu, &disp_pos); - a->alloca_patches[a->nalloca_patches].disp_pos = disp_pos; - a->nalloca_patches++; - a->has_alloca = 1; -} - -/* SysV AMD64 __va_list_tag (24 bytes, 8-aligned): - * off 0 u32 gp_offset next free GP slot in reg_save_area (0..48) - * off 4 u32 fp_offset next free FP slot (48..176) - * off 8 ptr overflow_arg_area pointer to next stack-passed arg - * off 16 ptr reg_save_area pointer to the 176-byte save area - * - * The reg_save_area layout (filled in func_begin): - * +0..+40 : rdi, rsi, rdx, rcx, r8, r9 (8B each) - * +48..+168 : xmm0..xmm7 at 16B stride (low 8B written via movsd) - * - * va_arg dispatches on dst class. When the relevant offset reaches its - * max (48 for GP, 176 for FP), fall through to overflow_arg_area at - * 8-byte stride. */ - -static void x_va_start_(CGTarget* t, Operand ap_op) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - if (!a->is_variadic) - compiler_panic(t->c, a->loc, "x64 va_start: function not variadic"); - u32 ap = ap_op.v.reg & 0xFu; - XSlot* rs = slot_get(a, a->reg_save_slot); - if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot"); - - /* gp_offset = next_param_int * 8 */ - emit_load_imm(mc, 0, X64_RAX, (i64)(a->next_param_int * 8u)); - emit_mov_store(mc, 4, X64_RAX, ap, 0); - /* fp_offset = 48 + next_param_fp * 16 */ - emit_load_imm(mc, 0, X64_RAX, (i64)(48u + a->next_param_fp * 16u)); - emit_mov_store(mc, 4, X64_RAX, ap, 4); - /* overflow_arg_area = rbp + 16 + next_param_stack */ - emit_lea(mc, X64_RAX, X64_RBP, (i32)(16u + a->next_param_stack)); - emit_mov_store(mc, 8, X64_RAX, ap, 8); - /* reg_save_area = rbp - reg_save_slot.off */ - emit_lea(mc, X64_RAX, X64_RBP, -(i32)rs->off); - emit_mov_store(mc, 8, X64_RAX, ap, 16); -} - -static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op, - const Type* ty) { - MCEmitter* mc = t->mc; - u32 ap = ap_op.v.reg & 0xFu; - u32 sz = type_byte_size(ty); - int is_fp = (dst.cls == RC_FP); - u32 offs_field = is_fp ? 4u : 0u; - u32 max_offs = is_fp ? 176u : 48u; - u32 stride = is_fp ? 16u : 8u; - u32 dr = dst.v.reg & 0xFu; - - MCLabel L_stack = mc->label_new(mc); - MCLabel L_done = mc->label_new(mc); - - /* eax = ap[offs_field]; cmp eax, max_offs; jae L_stack. */ - emit_mov_load(mc, 4, 0, X64_RAX, ap, (i32)offs_field); - if (max_offs <= 127u) { - emit_cmp_imm8(mc, 0, X64_RAX, (i8)max_offs); - } else { - /* cmp eax, imm32 : 0x3D imm32 (5 bytes, EAX-specific form). */ - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 op = 0x3D; - mc->emit_bytes(mc, &op, 1); - emit_u32le(mc, max_offs); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); - } - emit_jcc_label(mc, X64_CC_AE, L_stack); - - /* Reg path: - * r11 = ap[16] (reg_save_area) - * r11 = r11 + rax - * load dst from [r11 + 0] - * eax += stride; ap[offs_field] = eax - * jmp L_done */ - emit_mov_load(mc, 8, 0, X64_R11, ap, 16); - emit_alu_rr(mc, 1, 0x01, X64_R11, X64_RAX); - if (is_fp) { - u8 prefix = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0); - } else { - int sx = type_is_signed(ty); - emit_mov_load(mc, sz, sx, dr, X64_R11, 0); - } - /* add eax, imm8 : 0x83 /0 imm8 (no REX needed for eax). */ - { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 buf[3] = {0x83, modrm(3u, 0u, X64_RAX), (u8)stride}; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); - } - emit_mov_store(mc, 4, X64_RAX, ap, (i32)offs_field); - emit_jmp_label(mc, L_done); - - /* L_stack: - * r11 = ap[8] (overflow_arg_area) - * load dst from [r11 + 0] - * r11 += 8; ap[8] = r11 */ - mc->label_place(mc, L_stack); - emit_mov_load(mc, 8, 0, X64_R11, ap, 8); - if (is_fp) { - u8 prefix = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0); - } else { - int sx = type_is_signed(ty); - emit_mov_load(mc, sz, sx, dr, X64_R11, 0); - } - /* add r11, 8 : REX.WB 0x83 /0 imm8. */ - { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B); - mc->emit_bytes(mc, &rex, 1); - u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8}; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); - } - emit_mov_store(mc, 8, X64_R11, ap, 8); - - mc->label_place(mc, L_done); -} - -static void x_va_end_(CGTarget* t, Operand a) { - (void)t; - (void)a; -} - -static void x_va_copy_(CGTarget* t, Operand d, Operand s) { - MCEmitter* mc = t->mc; - u32 dr = d.v.reg & 0xFu; - u32 sr = s.v.reg & 0xFu; - /* va_list is 24 bytes; three 8B loads + stores via rax. */ - for (u32 i = 0; i < 24u; i += 8u) { - emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); - } -} - -/* ============================================================ - * Atomics (Group K). - * - * x86 has a strong memory model: plain MOV is acquire on loads and - * release on stores, so most MemOrders need no extra fence. The - * exception is SEQ_CST stores, which need a full StoreLoad barrier — - * realized either via XCHG (which has implicit LOCK) or MOV+MFENCE. - * All LOCK-prefixed RMWs (XADD/XCHG/CMPXCHG) act as full barriers, - * subsuming any MemOrder the front end requests. */ - -static void emit_lock_prefix(MCEmitter* mc) { - u8 b = 0xF0; - mc->emit_bytes(mc, &b, 1); -} - -static void emit_mfence(MCEmitter* mc) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 b[3] = {0x0F, 0xAE, 0xF0}; - mc->emit_bytes(mc, b, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -static void emit_ud2(MCEmitter* mc) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 b[2] = {0x0F, 0x0B}; - mc->emit_bytes(mc, b, 2); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* LOCK XADD [base+disp], src. Opcode 0F C1 /r (32/64-bit; sets src=prior, - * mem=mem+src). */ -static void emit_lock_xadd(MCEmitter* mc, int w, u32 src, u32 base, i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_lock_prefix(mc); - emit_rex(mc, w, src, 0, base); - u8 op[2] = {0x0F, 0xC1}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, src, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* XCHG [base+disp], src. Opcode 87 /r. LOCK is implicit when the - * destination is memory, but we emit it explicitly for clarity. */ -static void emit_lock_xchg_mem(MCEmitter* mc, int w, u32 src, u32 base, - i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_lock_prefix(mc); - emit_rex(mc, w, src, 0, base); - u8 op = 0x87; - mc->emit_bytes(mc, &op, 1); - emit_mem_operand(mc, src, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* LOCK CMPXCHG [base+disp], src. Opcode 0F B1 /r. Compares RAX with [mem]; - * if equal, [mem]=src and ZF=1; else RAX=[mem] and ZF=0. */ -static void emit_lock_cmpxchg(MCEmitter* mc, int w, u32 src, u32 base, - i32 disp) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_lock_prefix(mc); - emit_rex(mc, w, src, 0, base); - u8 op[2] = {0x0F, 0xB1}; - mc->emit_bytes(mc, op, 2); - emit_mem_operand(mc, src, base, disp); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* POPCNT rd, rs. Encoding: F3 0F B8 /r. */ -static void emit_popcnt(MCEmitter* mc, int w, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 p = 0xF3; - mc->emit_bytes(mc, &p, 1); - emit_rex(mc, w, dst, 0, src); - u8 op[2] = {0x0F, 0xB8}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* BSF/BSR rd, rs. opcode2 = 0xBC (BSF) or 0xBD (BSR). */ -static void emit_bs(MCEmitter* mc, int w, u8 opcode2, u32 dst, u32 src) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, dst, 0, src); - u8 op[2] = {0x0F, opcode2}; - mc->emit_bytes(mc, op, 2); - emit_rm_reg(mc, dst, src); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* BSWAP r32/r64. Opcode 0F C8+r; REX.W for r64; REX.B if reg>=8. */ -static void emit_bswap(MCEmitter* mc, int w, u32 reg) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 op[2] = {0x0F, (u8)(0xC8 + (reg & 7))}; - mc->emit_bytes(mc, op, 2); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* ROL r/m16, imm8. Used to swap bytes in a 16-bit value (ROL by 8). */ -static void emit_rol16_imm8(MCEmitter* mc, u32 reg, u8 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - u8 p = 0x66; - mc->emit_bytes(mc, &p, 1); - emit_rex(mc, 0, 0, 0, reg); - u8 buf[3]; - buf[0] = 0xC1; - buf[1] = modrm(3u, 0u, reg & 7u); - buf[2] = imm; - mc->emit_bytes(mc, buf, 3); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* XOR r/m, imm32 — opcode 81 /6. Used to compute (bits-1) - x via XOR. */ -static void emit_xor_imm32(MCEmitter* mc, int w, u32 reg, i32 imm) { - u32 ofs = obj_pos(mc->obj, mc->section_id); - emit_rex(mc, w, 0, 0, reg); - u8 op = 0x81; - mc->emit_bytes(mc, &op, 1); - emit_rm_reg(mc, 6u, reg); - emit_u32le(mc, (u32)imm); - if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); -} - -/* Resolve an atomic addr operand to (base, disp) for a memory operand. - * Accepts OPK_REG (pointer in reg, disp=0), OPK_LOCAL, or OPK_INDIRECT. */ -static u32 atomic_addr_base(CGTarget* t, Operand addr, i32* out_disp) { - if (addr.kind == OPK_REG) { - *out_disp = 0; - return addr.v.reg & 0xFu; - } - return addr_base(t, addr, out_disp); -} - -static void x_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, - MemOrder ord) { - MCEmitter* mc = t->mc; - (void)ord; /* x86: plain MOV satisfies all orders for loads. */ - u32 sz = ma.size ? ma.size : type_byte_size(dst.type); - i32 disp; - u32 base = atomic_addr_base(t, addr, &disp); - int signed_ = type_is_signed(ma.type ? ma.type : dst.type); - emit_mov_load(mc, sz, signed_, dst.v.reg & 0xFu, base, disp); -} - -static void x_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess ma, - MemOrder ord) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - u32 sz = ma.size ? ma.size : type_byte_size(src.type); - int w = (sz == 8) ? 1 : 0; - i32 disp; - u32 base = atomic_addr_base(t, addr, &disp); - - /* Materialize src into a register. */ - u32 sr; - if (src.kind == OPK_IMM) { - emit_load_imm(mc, w, X64_R11, src.v.imm); - sr = X64_R11; - } else if (src.kind == OPK_REG) { - sr = src.v.reg & 0xFu; - } else { - compiler_panic(t->c, a->loc, "x64 atomic_store: src kind %d unsupported", - (int)src.kind); - } - - if (ord == MO_SEQ_CST) { - /* SEQ_CST store: XCHG implicitly fences. Move src into r11 so the - * caller's reg is unmodified, then xchg [mem], r11. */ - if (sr != X64_R11) emit_mov_rr(mc, w, X64_R11, sr); - emit_lock_xchg_mem(mc, w, X64_R11, base, disp); - return; - } - /* Plain store covers RELAXED / RELEASE. */ - emit_mov_store(mc, sz, sr, base, disp); -} - -static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, - Operand val, MemAccess ma, MemOrder ord) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */ - u32 sz = ma.size ? ma.size : type_byte_size(dst.type); - int w = (sz == 8) ? 1 : 0; - i32 disp; - u32 base = atomic_addr_base(t, addr, &disp); - u32 dr = dst.v.reg & 0xFu; - - /* Materialize val into r11 (it's our working temp). For SUB we negate - * it so the XADD does the subtraction. */ - if (val.kind == OPK_IMM) { - i64 v = val.v.imm; - if (op == AO_SUB) v = -v; - emit_load_imm(mc, w, X64_R11, v); - } else if (val.kind == OPK_REG) { - u32 vr = val.v.reg & 0xFu; - if (vr != X64_R11) emit_mov_rr(mc, w, X64_R11, vr); - if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */ - } else { - compiler_panic(t->c, a->loc, "x64 atomic_rmw: val kind %d unsupported", - (int)val.kind); - } - - if (op == AO_ADD || op == AO_SUB) { - /* LOCK XADD [base], r11 — afterwards r11 holds prior. */ - emit_lock_xadd(mc, w, X64_R11, base, disp); - if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11); - return; - } - if (op == AO_XCHG) { - emit_lock_xchg_mem(mc, w, X64_R11, base, disp); - if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11); - return; - } - - /* AND/OR/XOR/NAND: CMPXCHG retry loop. - * - * mov rax, [mem] - * .retry: - * mov rcx, rax ; new = prior - * <op> rcx, r11 ; combine with val - * [NAND: not rcx] - * lock cmpxchg [mem], rcx - * jne .retry - * mov dr, rax - * - * rax = prior (cmpxchg implicit), rcx = new (scratch), r11 = val. */ - emit_mov_load(mc, sz, 0, X64_RAX, base, disp); - MCLabel L_retry = mc->label_new(mc); - mc->label_place(mc, L_retry); - emit_mov_rr(mc, w, X64_RCX, X64_RAX); - switch (op) { - case AO_AND: - emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11); - break; - case AO_OR: - emit_alu_rr(mc, w, 0x09, X64_RCX, X64_R11); - break; - case AO_XOR: - emit_alu_rr(mc, w, 0x31, X64_RCX, X64_R11); - break; - case AO_NAND: - emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11); - emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */ - break; - default: - compiler_panic(t->c, a->loc, "x64 atomic_rmw: op %d unimpl", (int)op); - } - emit_lock_cmpxchg(mc, w, X64_RCX, base, disp); - emit_jcc_label(mc, X64_CC_NE, L_retry); - if (dr != X64_RAX) emit_mov_rr(mc, w, dr, X64_RAX); -} - -static void x_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, - Operand expected, Operand desired, MemAccess ma, - MemOrder succ, MemOrder fail) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - (void)succ; - (void)fail; - u32 sz = ma.size ? ma.size : type_byte_size(prior.type); - int w = (sz == 8) ? 1 : 0; - i32 disp; - u32 base = atomic_addr_base(t, addr, &disp); - - /* RAX = expected. */ - if (expected.kind == OPK_IMM) { - emit_load_imm(mc, w, X64_RAX, expected.v.imm); - } else if (expected.kind == OPK_REG) { - u32 er = expected.v.reg & 0xFu; - if (er != X64_RAX) emit_mov_rr(mc, w, X64_RAX, er); - } else { - compiler_panic(t->c, a->loc, "x64 atomic_cas: exp kind %d unsupported", - (int)expected.kind); - } - /* R11 = desired. */ - if (desired.kind == OPK_IMM) { - emit_load_imm(mc, w, X64_R11, desired.v.imm); - } else if (desired.kind == OPK_REG) { - u32 dr2 = desired.v.reg & 0xFu; - if (dr2 != X64_R11) emit_mov_rr(mc, w, X64_R11, dr2); - } else { - compiler_panic(t->c, a->loc, "x64 atomic_cas: des kind %d unsupported", - (int)desired.kind); - } - - emit_lock_cmpxchg(mc, w, X64_R11, base, disp); - - /* ok = ZF (success). */ - u32 ok_r = ok.v.reg & 0xFu; - emit_setcc(mc, X64_CC_E, ok_r); - emit_movzx_r32_r8(mc, ok_r, ok_r); - - /* prior = rax. */ - u32 pr = prior.v.reg & 0xFu; - if (pr != X64_RAX) emit_mov_rr(mc, w, pr, X64_RAX); -} - -static void x_fence(CGTarget* t, MemOrder o) { - /* x86: only SEQ_CST needs an explicit StoreLoad barrier. RELAXED is - * a no-op; ACQUIRE/RELEASE/ACQ_REL are satisfied by plain MOV. */ - if (o == MO_SEQ_CST) emit_mfence(t->mc); -} - -/* ============================================================ - * Intrinsics (Group L). */ - -static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, - const Operand* args, u32 na) { - XImpl* a = impl_of(t); - MCEmitter* mc = t->mc; - (void)nd; - (void)na; - - switch (kind) { - case INTRIN_POPCOUNT: { - Operand src = args[0]; - Operand dst = dsts[0]; - int w = type_is_64(src.type) ? 1 : 0; - emit_popcnt(mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu); - return; - } - case INTRIN_CTZ: { - /* BSF gives the index of the lowest set bit (undefined for 0). */ - Operand src = args[0]; - Operand dst = dsts[0]; - int w = type_is_64(src.type) ? 1 : 0; - emit_bs(mc, w, 0xBC, dst.v.reg & 0xFu, src.v.reg & 0xFu); - return; - } - case INTRIN_CLZ: { - /* BSR gives the index of the highest set bit; clz = (bits-1) - bsr. - * XOR with (bits-1) computes the subtraction for in-range values. */ - Operand src = args[0]; - Operand dst = dsts[0]; - int w = type_is_64(src.type) ? 1 : 0; - u32 dr = dst.v.reg & 0xFu; - emit_bs(mc, w, 0xBD, dr, src.v.reg & 0xFu); - emit_xor_imm32(mc, w, dr, w ? 63 : 31); - return; - } - case INTRIN_BSWAP16: { - Operand src = args[0]; - Operand dst = dsts[0]; - u32 dr = dst.v.reg & 0xFu; - u32 sr = src.v.reg & 0xFu; - if (dr != sr) emit_mov_rr(mc, 0, dr, sr); - emit_rol16_imm8(mc, dr, 8); - return; - } - case INTRIN_BSWAP32: { - Operand src = args[0]; - Operand dst = dsts[0]; - u32 dr = dst.v.reg & 0xFu; - u32 sr = src.v.reg & 0xFu; - if (dr != sr) emit_mov_rr(mc, 0, dr, sr); - emit_bswap(mc, 0, dr); - return; - } - case INTRIN_BSWAP64: { - Operand src = args[0]; - Operand dst = dsts[0]; - u32 dr = dst.v.reg & 0xFu; - u32 sr = src.v.reg & 0xFu; - if (dr != sr) emit_mov_rr(mc, 1, dr, sr); - emit_bswap(mc, 1, dr); - return; - } - case INTRIN_MEMCPY: - case INTRIN_MEMMOVE: { - /* args = (dst_addr, src_addr, n_bytes). v1: const n, REG ptrs. */ - Operand da = args[0], sa = args[1], nb = args[2]; - if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic(t->c, a->loc, - "x64 intrinsic: %s with non-const n or non-REG ptr", - kind == INTRIN_MEMCPY ? "memcpy" : "memmove"); - } - u32 dr = da.v.reg & 0xFu; - u32 sr = sa.v.reg & 0xFu; - u32 n = (u32)nb.v.imm; - if (kind == INTRIN_MEMCPY) { - u32 i = 0; - while (i + 8 <= n) { - emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); - i += 8; - } - while (i + 4 <= n) { - emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); - i += 4; - } - while (i + 2 <= n) { - emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); - i += 2; - } - while (i < n) { - emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); - i += 1; - } - } else { - /* memmove: copy backward so dst>src overlap is safe. */ - u32 i = n; - while (i >= 8) { - i -= 8; - emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); - } - while (i >= 4) { - i -= 4; - emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); - } - while (i >= 2) { - i -= 2; - emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); - } - while (i >= 1) { - i -= 1; - emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i); - emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); - } - } - return; - } - case INTRIN_MEMSET: { - /* args = (dst_addr, byte, n). */ - Operand da = args[0], bv = args[1], nb = args[2]; - if (da.kind != OPK_REG || nb.kind != OPK_IMM) { - compiler_panic(t->c, a->loc, - "x64 intrinsic: memset with non-const n / non-REG ptr"); - } - u32 dr = da.v.reg & 0xFu; - u32 n = (u32)nb.v.imm; - /* Build a 64-bit value with the byte broadcast across all 8 bytes. */ - if (bv.kind == OPK_IMM) { - u8 byte = (u8)(bv.v.imm & 0xffu); - u64 b64 = byte; - b64 |= b64 << 8; - b64 |= b64 << 16; - b64 |= b64 << 32; - emit_load_imm(mc, 1, X64_RAX, (i64)b64); - } else if (bv.kind == OPK_REG) { - /* Broadcast low byte of bv across 8 bytes: rax = bv * 0x0101010101010101. */ - emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll); - emit_mov_rr(mc, 1, X64_RAX, bv.v.reg & 0xFu); - emit_imul_rr(mc, 1, X64_RAX, X64_R11); - } else { - compiler_panic(t->c, a->loc, - "x64 intrinsic: memset byte kind %d unsupported", - (int)bv.kind); - } - u32 i = 0; - while (i + 8 <= n) { - emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); - i += 8; - } - while (i + 4 <= n) { - emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); - i += 4; - } - while (i + 2 <= n) { - emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); - i += 2; - } - while (i < n) { - emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); - i += 1; - } - return; - } - case INTRIN_PREFETCH: - /* Drop the hint. */ - return; - case INTRIN_ASSUME_ALIGNED: { - /* dst = src (alignment is a hint only). */ - Operand src = args[0]; - Operand dst = dsts[0]; - u32 dr = dst.v.reg & 0xFu; - u32 sr = src.v.reg & 0xFu; - if (dr != sr) emit_mov_rr(mc, 1, dr, sr); - return; - } - case INTRIN_EXPECT: { - /* dst = val; expected hint dropped. */ - Operand val = args[0]; - Operand dst = dsts[0]; - int w = type_is_64(dst.type) ? 1 : 0; - u32 dr = dst.v.reg & 0xFu; - if (val.kind == OPK_REG) { - u32 sr = val.v.reg & 0xFu; - if (sr != dr) emit_mov_rr(mc, w, dr, sr); - } else if (val.kind == OPK_IMM) { - emit_load_imm(mc, w, dr, val.v.imm); - } else { - compiler_panic(t->c, a->loc, - "x64 intrinsic: expect val kind %d unsupported", - (int)val.kind); - } - return; - } - case INTRIN_UNREACHABLE: - case INTRIN_TRAP: - emit_ud2(mc); - return; - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { - /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - int w = type_is_64(dval.type) ? 1 : 0; - u32 rd = dval.v.reg & 0xFu; - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - u32 rb = force_reg_int(t, b_op, w, X64_R11); - u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29; - emit_alu_rr(mc, w, op, rd, rb); - u32 dovf_r = dovf.v.reg & 0xFu; - emit_setcc(mc, X64_CC_O, dovf_r); - emit_movzx_r32_r8(mc, dovf_r, dovf_r); - return; - } - case INTRIN_MUL_OVERFLOW: { - /* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed - * two-operand form: low 32 bits of product go to dst, OF set if - * the result didn't fit. i64 not yet supported. */ - Operand a_op = args[0], b_op = args[1]; - Operand dval = dsts[0], dovf = dsts[1]; - int w = type_is_64(dval.type) ? 1 : 0; - if (w) { - compiler_panic(t->c, a->loc, - "x64 intrinsic: mul_overflow on i64 not yet supported"); - } - u32 rd = dval.v.reg & 0xFu; - u32 ra = force_reg_int(t, a_op, w, X64_RAX); - if (rd != ra) emit_mov_rr(mc, w, rd, ra); - u32 rb = force_reg_int(t, b_op, w, X64_R11); - emit_imul_rr(mc, w, rd, rb); - u32 dovf_r = dovf.v.reg & 0xFu; - emit_setcc(mc, X64_CC_O, dovf_r); - emit_movzx_r32_r8(mc, dovf_r, dovf_r); - return; - } - default: - compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported", - (int)kind); - } -} -static void x_asm_block(CGTarget* t, const char* tmpl, - const AsmConstraint* outs, u32 no, Operand* oo, - const AsmConstraint* ins, u32 ni, const Operand* io, - const Sym* clobs, u32 nc) { - (void)tmpl; - (void)outs; - (void)no; - (void)oo; - (void)ins; - (void)ni; - (void)io; - (void)clobs; - (void)nc; - x_panic(t, "asm_block"); -} - -static void x_set_loc(CGTarget* t, SrcLoc l) { - ((XImpl*)t)->loc = l; - if (t->mc) t->mc->set_loc(t->mc, l); -} - -static void x_finalize(CGTarget* t) { (void)t; } -static void x_destroy(CGTarget* t) { (void)t; } - -static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } - -CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { - XImpl* x = arena_new(c->tu, XImpl); - memset(x, 0, sizeof *x); - - CGTarget* t = &x->base; - t->c = c; - t->obj = o; - t->mc = m; - - t->func_begin = x_func_begin; - t->func_end = x_func_end; - - t->alloc_reg = x_alloc_reg; - t->free_reg = x_free_reg; - t->frame_slot = x_frame_slot; - t->param = x_param; - t->clobbers = x_clobbers; - t->spill_reg = x_spill_reg; - t->reload_reg = x_reload_reg; - - t->label_new = x_label_new; - t->label_place = x_label_place; - t->jump = x_jump; - t->cmp_branch = x_cmp_branch; - - t->scope_begin = x_scope_begin; - t->scope_else = x_scope_else; - t->scope_end = x_scope_end; - t->break_to = x_break_to; - t->continue_to = x_continue_to; - - t->load_imm = x_load_imm; - t->load_const = x_load_const; - t->copy = x_copy; - t->load = x_load; - t->store = x_store; - t->addr_of = x_addr_of; - t->tls_addr_of = x_tls_addr_of; - t->copy_bytes = x_copy_bytes; - t->set_bytes = x_set_bytes; - t->bitfield_load = x_bitfield_load; - t->bitfield_store = x_bitfield_store; - - t->binop = x_binop; - t->unop = x_unop; - t->cmp = x_cmp; - t->convert = x_convert; - - t->call = x_call; - t->ret = x_ret; - - t->alloca_ = x_alloca_; - t->va_start_ = x_va_start_; - t->va_arg_ = x_va_arg_; - t->va_end_ = x_va_end_; - t->va_copy_ = x_va_copy_; - - t->setjmp_ = NULL; - t->longjmp_ = NULL; - - t->atomic_load = x_atomic_load; - t->atomic_store = x_atomic_store; - t->atomic_rmw = x_atomic_rmw; - t->atomic_cas = x_atomic_cas; - t->fence = x_fence; - - t->intrinsic = x_intrinsic; - t->asm_block = x_asm_block; - - t->set_loc = x_set_loc; - t->finalize = x_finalize; - t->destroy = x_destroy; - - compiler_defer(c, cgt_cleanup, t); - return t; -} diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -0,0 +1,378 @@ +/* arch/x64/alloc.c — register pool, spill/reload, labels, control flow. + * + * Covers: xpool_init/alloc/free, x_alloc_reg, x_free_reg, x_frame_slot, + * x64_slot_get, x_param, x_clobbers, x_spill_reg, x_reload_reg, x_label_*, + * emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab, + * x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */ + +#include <string.h> + +#include "arch/arch.h" +#include "arch/x64.h" +#include "arch/x64_isa.h" +#include "core/arena.h" +#include "core/pool.h" +#include "obj/obj.h" +#include "type/type.h" + +#include "arch/x64/internal.h" + +/* ============================================================ + * XRegPool implementation. */ + +void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) { + p->order = order; + p->nregs = nregs; + p->n_cs = n_cs; + p->hwm = 0; + p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u); +} + +static Reg xpool_alloc(XRegPool* p) { + if (p->free == 0) return (Reg)REG_NONE; + u32 idx = (u32)__builtin_ctz(p->free); + p->free &= ~(1u << idx); + if (idx + 1u > p->hwm) p->hwm = idx + 1u; + return (Reg)p->order[idx]; +} + +static int xpool_free(XRegPool* p, Reg r) { + for (u8 i = 0; i < p->nregs; ++i) { + if (p->order[i] == (u8)r) { + u32 bit = 1u << i; + if (p->free & bit) return -1; + p->free |= bit; + return 1; + } + } + return 0; +} + +/* ============================================================ + * Registers / frame */ + +Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { + XImpl* a = impl_of(t); + (void)ty; + if (cls == RC_INT) return xpool_alloc(&a->int_pool); + if (cls == RC_FP) return xpool_alloc(&a->fp_pool); + compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls); +} + +void x_free_reg(CGTarget* t, Reg r, RegClass cls) { + XImpl* a = impl_of(t); + XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool; + int rc = xpool_free(p, r); + if (rc == 1) return; + if (rc == -1) { + compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free", + (unsigned)r); + } + compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool", + (unsigned)r, cls == RC_FP ? "fp" : "int"); +} + +FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) { + XImpl* a = impl_of(t); + if (a->nslots == a->slots_cap) { + u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8; + XSlot* nbuf = arena_array(t->c->tu, XSlot, ncap); + if (a->slots) memcpy(nbuf, a->slots, sizeof(XSlot) * a->nslots); + a->slots = nbuf; + a->slots_cap = ncap; + } + u32 size = d->size ? d->size : 8; + u32 align = d->align ? d->align : 1; + u32 next = a->cum_off + size; + u32 mask = align - 1u; + next = (next + mask) & ~mask; + XSlot* s = &a->slots[a->nslots]; + s->off = next; + s->size = size; + s->align = align; + s->kind = d->kind; + a->cum_off = next; + a->nslots++; + return (FrameSlot)(a->nslots); +} + +XSlot* x64_slot_get(XImpl* a, FrameSlot fs) { + if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL; + return &a->slots[fs - 1]; +} + +/* ---- param: store incoming arg(s) into the home slot ---- */ +void x_param(CGTarget* t, const CGParamDesc* p) { + XImpl* a = impl_of(t); + XSlot* s = x64_slot_get(a, p->slot); + if (!s) compiler_panic(t->c, a->loc, "x64 param: bad slot"); + const ABIArgInfo* ai = p->abi; + + if (ai->kind == ABI_ARG_IGNORE) return; + if (ai->kind == ABI_ARG_INDIRECT) { + /* Incoming pointer to byval copy: load pointer, memcpy into slot. */ + u32 ptr_reg; + if (a->next_param_int < 6) { + ptr_reg = g_int_arg_regs[a->next_param_int++]; + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + emit_mov_load(t->mc, 8, 0, X64_R11, X64_RBP, (i32)(16 + caller_off)); + ptr_reg = X64_R11; + } + u32 nbytes = s->size; + u32 i = 0; + while (i + 8 <= nbytes) { + emit_mov_load(t->mc, 8, 0, X64_RAX, ptr_reg, (i32)i); + emit_mov_store(t->mc, 8, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + i += 8; + } + while (i + 4 <= nbytes) { + emit_mov_load(t->mc, 4, 0, X64_RAX, ptr_reg, (i32)i); + emit_mov_store(t->mc, 4, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + i += 4; + } + while (i + 2 <= nbytes) { + emit_mov_load(t->mc, 2, 0, X64_RAX, ptr_reg, (i32)i); + emit_mov_store(t->mc, 2, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + i += 2; + } + while (i < nbytes) { + emit_mov_load(t->mc, 1, 0, X64_RAX, ptr_reg, (i32)i); + emit_mov_store(t->mc, 1, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + i += 1; + } + return; + } + /* DIRECT */ + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 part_off = pt->src_offset; + u32 sz = pt->size; + if (pt->cls == ABI_CLASS_INT) { + if (a->next_param_int < 6) { + u32 reg = g_int_arg_regs[a->next_param_int++]; + emit_mov_store(t->mc, sz, reg, X64_RBP, + -(i32)s->off + (i32)part_off); + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + emit_mov_load(t->mc, sz, 0, X64_RAX, X64_RBP, + (i32)(16 + caller_off)); + emit_mov_store(t->mc, sz, X64_RAX, X64_RBP, + -(i32)s->off + (i32)part_off); + } + } else if (pt->cls == ABI_CLASS_FP) { + if (a->next_param_fp < 8) { + u32 xmm = a->next_param_fp++; + u8 prefix = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP, + -(i32)s->off + (i32)part_off); + } else { + u32 caller_off = a->next_param_stack; + a->next_param_stack += 8; + u8 prefix = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(t->mc, prefix, 0x10, X64_XMM0, X64_RBP, + (i32)(16 + caller_off)); + emit_sse_store(t->mc, prefix, 0x11, X64_XMM0, X64_RBP, + -(i32)s->off + (i32)part_off); + } + } else { + compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) { + (void)c; + (void)n; + x_panic(t, "clobbers"); +} +void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, + MemAccess ma) { + XImpl* a = impl_of(t); + if (src.kind != OPK_REG) + compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG"); + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + x_store(t, addr, src, ma); + x_free_reg(t, src.v.reg, src.cls); +} + +void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, + MemAccess ma) { + XImpl* a = impl_of(t); + if (dst.kind != OPK_REG) + compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG"); + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_LOCAL; + addr.cls = RC_INT; + addr.type = ma.type; + addr.v.frame_slot = slot; + x_load(t, dst, addr, ma); +} + +/* ============================================================ + * Labels / control flow */ + +Label x_label_new(CGTarget* t) { + return (Label)t->mc->label_new(t->mc); +} +void x_label_place(CGTarget* t, Label l) { + t->mc->label_place(t->mc, (MCLabel)l); +} + +/* Emit `jmp rel32` (E9 + 4-byte disp) with a label fixup. R_PC32 applied + * at the disp32 site with addend=-4 yields target - end_of_insn. */ +void emit_jmp_label(MCEmitter* mc, MCLabel l) { + u8 op = 0xE9; + mc->emit_bytes(mc, &op, 1); + emit_u32le(mc, 0); + mc->emit_label_ref(mc, l, R_PC32, 4, -4); +} + +/* Emit `Jcc rel32` (0F 8x + 4-byte disp) with a label fixup. */ +void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) { + u8 op[2] = {0x0F, (u8)(0x80 | (cc & 0xF))}; + mc->emit_bytes(mc, op, 2); + emit_u32le(mc, 0); + mc->emit_label_ref(mc, l, R_PC32, 4, -4); +} + +void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); } + +static u32 cmp_to_cc(CmpOp op) { + switch (op) { + case CMP_EQ: return X64_CC_E; + case CMP_NE: return X64_CC_NE; + case CMP_LT_U: return X64_CC_B; + case CMP_LE_U: return X64_CC_BE; + case CMP_GT_U: return X64_CC_A; + case CMP_GE_U: return X64_CC_AE; + case CMP_LT_S: return X64_CC_L; + case CMP_LE_S: return X64_CC_LE; + case CMP_GT_S: return X64_CC_G; + case CMP_GE_S: return X64_CC_GE; + default: return X64_CC_E; + } +} + +u32 x64_force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) { + if (op.kind == OPK_REG) return op.v.reg & 0xFu; + if (op.kind == OPK_IMM) { + x64_emit_load_imm(t->mc, w, scratch, op.v.imm); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, "x64: operand kind %d not REG/IMM", + (int)op.kind); +} + +static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { + int w = type_is_64(a_op.type) ? 1 : 0; + /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the + * cond codes, so IMM-on-LHS still has to materialize. */ + if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) { + if (imm_fits_i8(b_op.v.imm)) { + emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm); + return; + } + if (imm_fits_i32(b_op.v.imm)) { + emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu, + (i32)b_op.v.imm); + return; + } + } + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + u32 rb = x64_force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11); + /* cmp r/m, r — opcode 0x39 (encoded as `cmp ra, rb` ⇒ flags = ra - rb). */ + emit_alu_rr(t->mc, w, 0x39, ra, rb); +} + +void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, + Label l) { + emit_cmp_ab(t, a, b); + emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l); +} + +void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { + emit_cmp_ab(t, a, b); + u32 d = dst.v.reg & 0xFu; + emit_setcc(t->mc, cmp_to_cc(op), d); + emit_movzx_r32_r8(t->mc, d, d); +} + +/* ---- structured scopes ---- */ +CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) { + XImpl* a = impl_of(t); + if (a->nscopes == a->scopes_cap) { + u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; + XScope* nb = arena_array(t->c->tu, XScope, ncap); + if (a->scopes) memcpy(nb, a->scopes, sizeof(XScope) * a->nscopes); + a->scopes = nb; + a->scopes_cap = ncap; + } + XScope* sc = &a->scopes[a->nscopes]; + sc->kind = (u8)d->kind; + sc->has_else = 0; + sc->else_label = 0; + sc->end_label = 0; + sc->break_label = d->break_label; + sc->continue_label = d->continue_label; + + if (d->kind == SCOPE_IF) { + sc->else_label = t->mc->label_new(t->mc); + sc->end_label = t->mc->label_new(t->mc); + int w = type_is_64(d->cond.type) ? 1 : 0; + u32 rc = x64_force_reg_int(t, d->cond, w, X64_RAX); + emit_test_self(t->mc, w, rc); + emit_jcc_label(t->mc, X64_CC_E, sc->else_label); + } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { + /* Bookkeeping only. */ + } else { + compiler_panic(t->c, a->loc, + "x64 scope_begin: kind %d not yet implemented", + (int)d->kind); + } + a->nscopes++; + return (CGScope)a->nscopes; +} + +void x_scope_else(CGTarget* t, CGScope s) { + XImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) + compiler_panic(t->c, a->loc, "x64 scope_else: bad scope"); + XScope* sc = &a->scopes[s - 1]; + emit_jmp_label(t->mc, sc->end_label); + t->mc->label_place(t->mc, sc->else_label); + sc->has_else = 1; +} + +void x_scope_end(CGTarget* t, CGScope s) { + XImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) + compiler_panic(t->c, a->loc, "x64 scope_end: bad scope"); + XScope* sc = &a->scopes[s - 1]; + if (sc->kind == SCOPE_IF) { + if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label); + t->mc->label_place(t->mc, sc->end_label); + } +} + +void x_break_to(CGTarget* t, CGScope s) { + XImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) + compiler_panic(t->c, a->loc, "x64 break_to: bad scope"); + x_jump(t, a->scopes[s - 1].break_label); +} +void x_continue_to(CGTarget* t, CGScope s) { + XImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) + compiler_panic(t->c, a->loc, "x64 continue_to: bad scope"); + x_jump(t, a->scopes[s - 1].continue_label); +} diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -0,0 +1,647 @@ +/* arch/x64/emit.c — byte-level emit helpers, function prologue/epilogue. + * + * Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin, + * x_func_end, and the shared constant tables (g_int_order, g_fp_order, + * g_int_arg_regs). */ + +#include <string.h> + +#include "arch/arch.h" +#include "arch/x64.h" +#include "arch/x64_isa.h" +#include "core/arena.h" +#include "core/pool.h" +#include "obj/obj.h" +#include "type/type.h" + +#include "arch/x64/internal.h" + +/* ============================================================ + * Shared constant tables. */ + +const u8 g_int_order[6] = { + X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */ + X64_R10, /* caller-saved tail */ +}; + +const u8 g_fp_order[10] = { + /* All xmm regs are caller-saved on SysV; preference order is xmm6 + * upward to keep the low arg/return regs (xmm0..5) clear for calls. */ + X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10, + X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15, +}; + +const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX, + X64_RCX, X64_R8, X64_R9}; + +/* ============================================================ + * Byte-level emit helpers. + * + * x64 instructions are variable length: optional legacy prefix(es), + * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional + * displacement, optional immediate. Helpers below build sequences + * into the active MCEmitter section, recording one Debug row per + * instruction-start. */ +static void emit1(MCEmitter* mc, u8 b) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + mc->emit_bytes(mc, &b, 1); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_u32le(MCEmitter* mc, u32 v) { + u8 b[4]; + b[0] = (u8)v; + b[1] = (u8)(v >> 8); + b[2] = (u8)(v >> 16); + b[3] = (u8)(v >> 24); + mc->emit_bytes(mc, b, 4); +} +static void emit_u64le(MCEmitter* mc, u64 v) { + u8 b[8]; + for (int i = 0; i < 8; ++i) b[i] = (u8)(v >> (i * 8)); + mc->emit_bytes(mc, b, 8); +} + +static u8 make_rex(int w, u32 reg, u32 index, u32 rm) { + u8 r = 0; + if (w) r |= X64_REX_W; + if (reg & 8) r |= X64_REX_R; + if (index & 8) r |= X64_REX_X; + if (rm & 8) r |= X64_REX_B; + return r ? (u8)(X64_REX_BASE | r) : 0; +} +void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { + u8 r = make_rex(w, reg, index, rm); + if (r) mc->emit_bytes(mc, &r, 1); +} +/* Force REX (even REX=0x40) — required for byte-reg encodings that + * promote SIL/DIL/etc. */ +void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { + u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) | + ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0)); + mc->emit_bytes(mc, &r, 1); +} + +u8 modrm(u32 mod, u32 reg, u32 rm) { + return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u)); +} +u8 sib(u32 scale, u32 index, u32 base) { + return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u)); +} + +static u32 disp_mod(u32 base, i32 disp) { + if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */ + if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */ + return 2u; /* [base + disp32] */ +} + +void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) { + u32 m = disp_mod(base, disp); + if ((base & 7u) == 4u) { + /* SIB byte required: index=4 (none), base=base. */ + u8 mr = modrm(m, reg, 4u); + mc->emit_bytes(mc, &mr, 1); + u8 s = sib(0, 4u, base); + mc->emit_bytes(mc, &s, 1); + } else { + u8 mr = modrm(m, reg, base); + mc->emit_bytes(mc, &mr, 1); + } + if (m == 1u) { + u8 d = (u8)(i8)disp; + mc->emit_bytes(mc, &d, 1); + } else if (m == 2u) { + emit_u32le(mc, (u32)disp); + } +} +void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) { + u8 mr = modrm(3u, reg, rm); + mc->emit_bytes(mc, &mr, 1); +} + +/* ---- specific instruction emitters ---- */ + +/* mov rd, rs (64-bit if w, else 32-bit). */ +void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, src, 0, dst); + u8 op = 0x89; /* MOV r/m, r */ + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, src, dst); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* mov reg, [base + disp]; size 1/2/4/8. */ +void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, + u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (size == 8) { + emit_rex(mc, 1, dst, 0, base); + u8 op = 0x8B; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, dst, base, disp); + } else if (size == 4) { + emit_rex(mc, 0, dst, 0, base); + u8 op = 0x8B; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, dst, base, disp); + } else if (size == 2) { + emit_rex(mc, 0, dst, 0, base); + u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, dst, base, disp); + } else if (size == 1) { + emit_rex(mc, 0, dst, 0, base); + u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, dst, base, disp); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* mov [base + disp], src; size 1/2/4/8. */ +void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (size == 8) { + emit_rex(mc, 1, src, 0, base); + u8 op = 0x89; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, src, base, disp); + } else if (size == 4) { + emit_rex(mc, 0, src, 0, base); + u8 op = 0x89; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, src, base, disp); + } else if (size == 2) { + u8 p = 0x66; + mc->emit_bytes(mc, &p, 1); + emit_rex(mc, 0, src, 0, base); + u8 op = 0x89; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, src, base, disp); + } else if (size == 1) { + /* Force REX so SIL/DIL/etc are addressable as byte regs. */ + emit_rex_force(mc, 0, src, 0, base); + u8 op = 0x88; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, src, base, disp); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, 1, dst, 0, base); + u8 op = 0x8D; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, dst, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r + * imm32) for !is64. Both 10/5 bytes. */ +void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (is64) { + emit_rex(mc, 1, 0, 0, dst); + u8 op = (u8)(0xB8 | (dst & 7)); + mc->emit_bytes(mc, &op, 1); + emit_u64le(mc, (u64)imm); + } else { + emit_rex(mc, 0, 0, 0, dst); + u8 op = (u8)(0xB8 | (dst & 7)); + mc->emit_bytes(mc, &op, 1); + emit_u32le(mc, (u32)imm); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/ + * CMP(39)/MOV(89)/TEST(85). */ +void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, src, 0, dst); + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, src, dst); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 op[2] = {0x0F, 0xAF}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 op = 0xF7; + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, sub, reg); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 op = 0xD3; + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, sub, reg); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* Shift r/m by imm8: opcode C1 /sub ib. sub: SHL=4, SHR=5, SAR=7. */ +void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[3]; + buf[0] = 0xC1; + buf[1] = modrm(3u, sub, reg); + buf[2] = imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_cqo_or_cdq(MCEmitter* mc, int w) { + if (w) { + u8 buf[2] = {X64_REX_BASE | X64_REX_W, 0x99}; + mc->emit_bytes(mc, buf, 2); + } else { + u8 op = 0x99; + mc->emit_bytes(mc, &op, 1); + } +} + +void emit_xor_self(MCEmitter* mc, int w, u32 r) { + emit_alu_rr(mc, w, 0x31, r, r); +} + +/* cmp r/m, imm8 (0x83 /7). */ +void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[3]; + buf[0] = 0x83; + buf[1] = modrm(3u, 7u, reg); + buf[2] = (u8)imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0, + * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */ +void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[3]; + buf[0] = 0x83; + buf[1] = modrm(3u, sub, reg); + buf[2] = (u8)imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */ +void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[6]; + buf[0] = 0x81; + buf[1] = modrm(3u, sub, reg); + buf[2] = (u8)(imm & 0xFF); + buf[3] = (u8)((imm >> 8) & 0xFF); + buf[4] = (u8)((imm >> 16) & 0xFF); + buf[5] = (u8)((imm >> 24) & 0xFF); + mc->emit_bytes(mc, buf, 6); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext). + * Both forms write the result back to the same `dst` register so the + * caller doesn't need an explicit copy beforehand — unlike the ALU + * forms which read-modify-write a single operand. */ +void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 buf[3]; + buf[0] = 0x6B; + buf[1] = modrm(3u, dst, src); + buf[2] = (u8)imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 buf[6]; + buf[0] = 0x69; + buf[1] = modrm(3u, dst, src); + buf[2] = (u8)(imm & 0xFF); + buf[3] = (u8)((imm >> 8) & 0xFF); + buf[4] = (u8)((imm >> 16) & 0xFF); + buf[5] = (u8)((imm >> 24) & 0xFF); + mc->emit_bytes(mc, buf, 6); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B + * imm8-sign-extended forms)? */ +int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; } +/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/ + * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to + * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those + * require a full materialization through x64_emit_load_imm. */ +int imm_fits_i32(i64 imm) { + return imm >= -2147483648LL && imm <= 2147483647LL; +} + +void emit_test_self(MCEmitter* mc, int w, u32 reg) { + emit_alu_rr(mc, w, 0x85, reg, reg); +} + +void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex_force(mc, 0, 0, 0, reg); + u8 op[2] = {0x0F, (u8)(0x90 | (cc & 0xF))}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, 0u, reg); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex_force(mc, 0, dst, 0, src); + u8 op[2] = {0x0F, 0xB6}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* movzx/movsx r→r. src_size is source byte width. */ +void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size, + u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (src_size == 4 && signed_ext) { + /* movsxd r64, r32: REX.W 0x63 ModRM */ + emit_rex(mc, 1, dst, 0, src); + u8 op = 0x63; + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, dst, src); + } else if (src_size == 4 && !signed_ext) { + /* zext 32→64 is `mov r32, r32` (clears high 32). */ + emit_rex(mc, 0, src, 0, dst); + u8 op = 0x89; + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, src, dst); + } else if (src_size == 1) { + emit_rex_force(mc, w, dst, 0, src); + u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + } else if (src_size == 2) { + emit_rex(mc, w, dst, 0, src); + u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + } + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +static void emit_ret(MCEmitter* mc) { + u8 op = 0xC3; + mc->emit_bytes(mc, &op, 1); +} +static void emit_leave(MCEmitter* mc) { + u8 op = 0xC9; + mc->emit_bytes(mc, &op, 1); +} + +/* ---- SSE scalar FP encoders ---- */ +void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (prefix) mc->emit_bytes(mc, &prefix, 1); + emit_rex(mc, 0, dst, 0, src); + u8 op[2] = {0x0F, opcode}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, + u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (prefix) mc->emit_bytes(mc, &prefix, 1); + emit_rex(mc, 0, dst, 0, base); + u8 op[2] = {0x0F, opcode}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, dst, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, + u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (prefix) mc->emit_bytes(mc, &prefix, 1); + emit_rex(mc, 0, src, 0, base); + u8 op[2] = {0x0F, opcode}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, src, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, + u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + if (prefix) mc->emit_bytes(mc, &prefix, 1); + emit_rex(mc, w, dst, 0, src); + u8 op[2] = {0x0F, opcode}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* ============================================================ + * Function lifecycle */ + +void x_func_begin(CGTarget* t, const CGFuncDesc* fd) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + mc->set_section(mc, fd->text_section_id); + mc->emit_align(mc, 16, 0x90); + + a->fd = fd; + a->func_start = mc->pos(mc); + a->next_param_int = 0; + a->next_param_fp = 0; + a->next_param_stack = 0; + a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; + a->has_alloca = 0; + a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; + a->cum_off = 0; + a->max_outgoing = 0; + xpool_init(&a->int_pool, g_int_order, 6u, 5u); + xpool_init(&a->fp_pool, g_fp_order, 10u, 0u); + a->nslots = 0; + a->nscopes = 0; + a->nalloca_patches = 0; + a->sret_ptr_slot = FRAME_SLOT_NONE; + a->reg_save_slot = FRAME_SLOT_NONE; + a->epilogue_label = mc->label_new(mc); + + mc->cfi_startproc(mc); + + /* Reserve a fixed-size prologue placeholder filled with NOPs. */ + a->prologue_pos = mc->pos(mc); + for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) emit1(mc, 0x90); + + /* sret: rdi at entry holds the destination pointer. Spill it to a + * hidden slot so the body can use rdi freely. */ + if (a->has_sret) { + FrameSlotDesc fsd = { + .type = NULL, .name = 0, .loc = {0, 0, 0}, + .size = 8, .align = 8, .kind = FS_SPILL, .flags = 0, + }; + a->sret_ptr_slot = x_frame_slot(t, &fsd); + /* Subsequent int args start at rsi (next_param_int = 1). */ + a->next_param_int = 1; + } + + /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then + * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves + * directly after the prologue placeholder so the original register + * args are preserved before x_param() spills the named ones. */ + if (a->is_variadic) { + FrameSlotDesc rsd = { + .type = NULL, .name = 0, .loc = {0, 0, 0}, + .size = 176, .align = 8, .kind = FS_SPILL, .flags = 0, + }; + a->reg_save_slot = x_frame_slot(t, &rsd); + XSlot* rs = x64_slot_get(a, a->reg_save_slot); + static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX, + X64_RCX, X64_R8, X64_R9}; + for (u32 i = 0; i < 6; ++i) { + emit_mov_store(mc, 8, gprs[i], X64_RBP, + -(i32)rs->off + (i32)(i * 8u)); + } + /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per + * FP slot, so the upper half of the 16-byte stride stays unused. */ + for (u32 i = 0; i < 8; ++i) { + emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP, + -(i32)rs->off + (i32)(48u + i * 16u)); + } + } +} + +static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); } + +void x_func_end(CGTarget* t) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + u32 cs_used = a->int_pool.hwm; + if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs; + u32 cs_size = cs_used * 8u; + + /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call, + * which means rsp ≡ 8 mod 16 inside the function (after the return + * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it + * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0 + * mod 16, so frame_size must be a multiple of 16. */ + u32 raw = a->max_outgoing + cs_size + a->cum_off; + u32 frame_size = align_up_u32(raw, 16u); + if (frame_size == 0) frame_size = 16; + + mc->label_place(mc, a->epilogue_label); + + /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */ + for (i32 i = (i32)cs_used - 1; i >= 0; --i) { + u32 reg = a->int_pool.order[i]; + i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off); + } + + /* leave; ret. */ + emit_leave(mc); + emit_ret(mc); + + /* Patch prologue placeholder. */ + u8 buf[X64_PROLOGUE_BYTES]; + for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) buf[i] = 0x90; + u32 wi = 0; + + /* push rbp (1 byte). */ + buf[wi++] = 0x55; + /* mov rbp, rsp: REX.W 89 E5. */ + buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = 0x89; + buf[wi++] = modrm(3u, X64_RSP, X64_RBP); + /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */ + buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = 0x81; + buf[wi++] = modrm(3u, 5u, X64_RSP); + buf[wi++] = (u8)frame_size; + buf[wi++] = (u8)(frame_size >> 8); + buf[wi++] = (u8)(frame_size >> 16); + buf[wi++] = (u8)(frame_size >> 24); + + /* sret: mov [rbp + disp32], rdi. */ + if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { + XSlot* s = x64_slot_get(a, a->sret_ptr_slot); + if (s) { + i32 off = -(i32)s->off; + if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow; + buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = 0x89; + buf[wi++] = modrm(2u, X64_RDI, X64_RBP); + buf[wi++] = (u8)off; + buf[wi++] = (u8)(off >> 8); + buf[wi++] = (u8)(off >> 16); + buf[wi++] = (u8)(off >> 24); + } + } + + /* Spill callee-saves. */ + for (u32 i = 0; i < cs_used; ++i) { + u32 reg = a->int_pool.order[i]; + i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow; + buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0)); + buf[wi++] = 0x89; + buf[wi++] = modrm(2u, (reg & 7u), X64_RBP); + buf[wi++] = (u8)off; + buf[wi++] = (u8)(off >> 8); + buf[wi++] = (u8)(off >> 16); + buf[wi++] = (u8)(off >> 24); + } + + if (0) { + overflow: + compiler_panic(t->c, a->loc, + "x64: prologue placeholder overflow (%u of %u bytes)", wi, + X64_PROLOGUE_BYTES); + } + obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf, + X64_PROLOGUE_BYTES); + + /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final + * max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round + * at every call site). */ + for (u32 i = 0; i < a->nalloca_patches; ++i) { + u8 dbuf[4]; + u32 m = a->max_outgoing; + dbuf[0] = (u8)m; + dbuf[1] = (u8)(m >> 8); + dbuf[2] = (u8)(m >> 16); + dbuf[3] = (u8)(m >> 24); + obj_patch(t->obj, a->fd->text_section_id, + a->alloca_patches[i].disp_pos, dbuf, 4); + } + + /* Define the function symbol. */ + u32 end = mc->pos(mc); + obj_symbol_define(t->obj, a->fd->sym, a->fd->text_section_id, + (u64)a->func_start, (u64)(end - a->func_start)); + + mc->cfi_endproc(mc); + a->fd = NULL; +} diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -0,0 +1,257 @@ +/* arch/x64/internal.h — private header shared by emit.c, alloc.c, ops.c. + * + * Contains: + * - XRegPool, XSlot, XScope, XAllocaPatch, XImpl struct definitions + * - impl_of() accessor + * - Small type helpers (static inline) + * - Forward declarations of cross-file functions + * + * NOT included by external consumers; use arch/x64.h for the public API. */ + +#pragma once + +#include <string.h> + +#include "arch/arch.h" +#include "arch/x64.h" +#include "arch/x64_isa.h" +#include "core/arena.h" +#include "core/pool.h" +#include "obj/obj.h" +#include "type/type.h" + +#define X64_PROLOGUE_BYTES 96u + +/* ============================================================ + * Custom register pool. */ + +typedef struct XRegPool { + u32 free; /* bit i set ⇔ alloc_order[i] is free */ + u32 hwm; /* highest index+1 ever allocated */ + const u8* order; /* alloc_order; first n_cs are callee-saved */ + u8 nregs; + u8 n_cs; + u8 pad[2]; +} XRegPool; + +/* ============================================================ + * XImpl and friends. */ + +typedef struct XSlot { + u32 off; /* bytes below rbp (positive); address = rbp - off */ + u32 size; + u32 align; + u8 kind; + u8 pad[3]; +} XSlot; + +typedef struct XScope { + u8 kind; + u8 has_else; + u8 pad[2]; + MCLabel else_label; + MCLabel end_label; + Label break_label; + Label continue_label; +} XScope; + +/* alloca emits a placeholder `lea dst, [rsp + 0]` whose disp32 is patched + * at func_end with the final max_outgoing value. disp_pos records the + * byte offset of that disp32 in the active text section. */ +typedef struct XAllocaPatch { + u32 disp_pos; +} XAllocaPatch; + +typedef struct XImpl { + CGTarget base; + SrcLoc loc; + const CGFuncDesc* fd; + + u32 func_start; + u32 prologue_pos; + MCLabel epilogue_label; + + XSlot* slots; + u32 nslots; + u32 slots_cap; + u32 cum_off; + u32 max_outgoing; + + u32 next_param_int; + u32 next_param_fp; + u32 next_param_stack; + u8 has_sret; + u8 has_alloca; + u8 is_variadic; + u8 pad0; + FrameSlot sret_ptr_slot; + FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */ + + XRegPool int_pool; + XRegPool fp_pool; + + XScope* scopes; + u32 nscopes; + u32 scopes_cap; + + XAllocaPatch* alloca_patches; + u32 nalloca_patches; + u32 alloca_patches_cap; +} XImpl; + +static inline XImpl* impl_of(CGTarget* t) { return (XImpl*)t; } + +extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); + +/* ============================================================ + * Type helpers (static inline — used in all three translation units). */ + +static inline int type_is_64(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 1; + default: + return 0; + } +} +static inline int type_is_fp_double(const Type* t) { + return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE); +} +static inline u32 type_byte_size(const Type* t) { + if (!t) return 4; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + case TY_BOOL: + return 1; + case TY_SHORT: + case TY_USHORT: + return 2; + case TY_INT: + case TY_UINT: + case TY_FLOAT: + return 4; + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_PTR: + case TY_DOUBLE: + return 8; + default: + return 8; + } +} +static inline int type_is_signed(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_LLONG: + return 1; + default: + return 0; + } +} + +static inline _Noreturn void x_panic(CGTarget* t, const char* what) { + SrcLoc loc = impl_of(t)->loc; + compiler_panic(t->c, loc, "x64: %s not implemented", what); +} + +/* ============================================================ + * Shared constant tables (defined in alloc.c, used in emit.c and ops.c). */ + +extern const u8 g_int_order[6]; +extern const u8 g_fp_order[10]; +extern const u32 g_int_arg_regs[6]; + +/* ============================================================ + * Cross-file function declarations. + * + * Functions that are defined in one translation unit but called from + * another cannot remain static; they are declared here. */ + +/* --- emit.c exports (lifecycle used by ops.c vtable constructor, + * encoding helpers used by alloc.c and ops.c) --- */ +void x_func_begin(CGTarget* t, const CGFuncDesc* fd); +void x_func_end(CGTarget* t); + +/* encoding helpers */ +void emit_u32le(MCEmitter* mc, u32 v); +void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm); +void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm); +u8 modrm(u32 mod, u32 reg, u32 rm); +u8 sib(u32 scale, u32 index, u32 base); +void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp); +void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm); +void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src); +void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base, + i32 disp); +void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp); +void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp); +void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm); +void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src); +void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src); +void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg); +void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg); +void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm); +void emit_cqo_or_cdq(MCEmitter* mc, int w); +void emit_xor_self(MCEmitter* mc, int w, u32 r); +void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm); +void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm); +void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm); +void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm); +void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm); +int imm_fits_i8(i64 imm); +int imm_fits_i32(i64 imm); +void emit_test_self(MCEmitter* mc, int w, u32 reg); +void emit_setcc(MCEmitter* mc, u32 cc, u32 reg); +void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src); +void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size, + u32 dst, u32 src); +void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src); +void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, + i32 disp); +void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, + i32 disp); +void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, + u32 src); + +/* --- alloc.c exports (used by emit.c and/or ops.c) --- */ +void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs); +XSlot* x64_slot_get(XImpl* a, FrameSlot fs); +FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); +Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty); +void x_free_reg(CGTarget* t, Reg r, RegClass cls); +void x_param(CGTarget* t, const CGParamDesc* p); +const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n); +void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); +void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); +Label x_label_new(CGTarget* t); +void x_label_place(CGTarget* t, Label l); +void emit_jmp_label(MCEmitter* mc, MCLabel l); +void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l); +void x_jump(CGTarget* t, Label l); +void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l); +void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b); +CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d); +void x_scope_else(CGTarget* t, CGScope s); +void x_scope_end(CGTarget* t, CGScope s); +void x_break_to(CGTarget* t, CGScope s); +void x_continue_to(CGTarget* t, CGScope s); +u32 x64_force_reg_int(CGTarget* t, Operand op, int w, u32 scratch); + +/* --- ops.c exports (used by alloc.c) --- */ +void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); +void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -0,0 +1,1916 @@ +/* arch/x64/ops.c — data movement, arithmetic, calls, atomics, intrinsics, + * and the vtable constructor x64_cgtarget_new. + * + * Covers: x_load_imm, x_load_const, x_copy, x_load, x_store, x_addr_of, + * x_tls_addr_of, x_copy_bytes, x_set_bytes, x_bitfield_load/store, + * x_binop, x_unop, x_convert, emit_arg_value, x_call, x_ret, + * x_alloca_, x_va_start_, x_va_arg_, x_va_end_, x_va_copy_, + * emit_lock_*, x_atomic_load/store/rmw/cas, x_fence, + * emit_popcnt, emit_bs, emit_bswap, emit_rol16_imm8, emit_xor_imm32, + * x_intrinsic, x_asm_block, x_set_loc, x_finalize, x_destroy, + * x64_cgtarget_new. */ + +#include <string.h> + +#include "arch/arch.h" +#include "arch/x64.h" +#include "arch/x64_isa.h" +#include "core/arena.h" +#include "core/pool.h" +#include "obj/obj.h" +#include "type/type.h" + +#include "arch/x64/internal.h" + +/* ============================================================ + * Data movement */ + +static void x_load_imm(CGTarget* t, Operand dst, i64 imm) { + int w = type_is_64(dst.type) ? 1 : 0; + x64_emit_load_imm(t->mc, w, dst.v.reg & 0xFu, imm); +} + +/* Materialize an FP literal: stash bytes in .rodata as a fresh local + * symbol, then load via RIP-relative movss/movsd. */ +static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) { + XImpl* a = impl_of(t); + if (dst.cls != RC_FP) + compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1"); + + Sym ro_name = pool_intern_cstr(t->c->global, ".rodata"); + ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u); + + u32 cur_section = t->mc->section_id; + t->mc->set_section(t->mc, ro); + u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4); + t->mc->emit_bytes(t->mc, cb.bytes, cb.size); + + char namebuf[64]; + static u32 lit_seq = 0; + int len = 0; + const char* prefix = ".LCFP_x64_"; + for (; prefix[len]; ++len) namebuf[len] = prefix[len]; + u32 v = lit_seq++; + char tmp[16]; + int tn = 0; + if (v == 0) + tmp[tn++] = '0'; + else + while (v) { + tmp[tn++] = '0' + (char)(v % 10); + v /= 10; + } + for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i]; + namebuf[len] = 0; + + Sym sname = pool_intern_cstr(t->c->global, namebuf); + ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off, + (u64)cb.size); + t->mc->set_section(t->mc, cur_section); + + /* movs{s,d} xmm, [rip+disp32]. Reloc R_PC32 with addend=-4 at the + * disp32 site so the linker resolves to target relative to end-of-insn. */ + u8 prefix2 = (cb.size == 8) ? 0xF2 : 0xF3; + u32 dst_x = dst.v.reg & 0xFu; + t->mc->emit_bytes(t->mc, &prefix2, 1); + emit_rex(t->mc, 0, dst_x, 0, 0); + u8 op[2] = {0x0F, 0x10}; + t->mc->emit_bytes(t->mc, op, 2); + u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */ + t->mc->emit_bytes(t->mc, &mr, 1); + u32 disp_pos = t->mc->pos(t->mc); + emit_u32le(t->mc, 0); + t->mc->emit_reloc_at(t->mc, cur_section, disp_pos, R_PC32, sym, -4, 1, 0); +} + +static void x_copy(CGTarget* t, Operand dst, Operand src) { + if (dst.cls == RC_FP || src.cls == RC_FP) { + u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; + emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu); + return; + } + int w = type_is_64(dst.type) ? 1 : 0; + emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu); +} + +static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) { + XImpl* a = impl_of(t); + if (addr.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, addr.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot"); + *out_off = -(i32)s->off; + return X64_RBP; + } + if (addr.kind == OPK_INDIRECT) { + *out_off = addr.v.ind.ofs; + return addr.v.ind.base & 0xFu; + } + compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported", + (int)addr.kind); +} + +static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) { + return obj_symbol_extern_via_got(t->c, t->obj, sym); +} + +/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or + * static-link extern symbols, emit `lea rd, [rip + disp32]` with + * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time + * — the PLT routing only fires when the linker actually needs the + * trampoline, i.e. function calls into a DSO). For undef externs in + * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot + * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by + * patching a single slot rather than touching .text. + * + * Addend -4 because the PC is end-of-instruction. When routing + * through the GOT we omit any extra addend on the reloc (most loaders + * disallow nonzero addends on GOT-load fixups); a follow-up `add` / + * `lea` would have to add it after the load if the codegen needed + * `&sym + nonzero`. In practice the caller only ever passes + * addend=0 for global references that go through the GOT path. */ +static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym, + i64 addend) { + if (x64_use_got_for_sym(t, sym)) { + /* mov rd, [rip + disp32] */ + emit_rex(t->mc, 1, dst_reg, 0, 0); + u8 op = 0x8B; + t->mc->emit_bytes(t->mc, &op, 1); + u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ + t->mc->emit_bytes(t->mc, &mr, 1); + u32 disp_pos = t->mc->pos(t->mc); + emit_u32le(t->mc, 0); + t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, + R_X64_REX_GOTPCRELX, sym, -4, 1, 0); + /* Apply any nonzero addend by adjusting the loaded value. */ + if (addend) { + i32 a = (i32)addend; + if (a >= -128 && a <= 127) { + /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */ + emit_rex(t->mc, 1, 0, 0, dst_reg); + u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))}; + t->mc->emit_bytes(t->mc, add_op, 2); + u8 ib = (u8)a; + t->mc->emit_bytes(t->mc, &ib, 1); + } else { + /* add r/m64, imm32 (REX.W + 0x81 /0 id) */ + emit_rex(t->mc, 1, 0, 0, dst_reg); + u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))}; + t->mc->emit_bytes(t->mc, add_op, 2); + emit_u32le(t->mc, (u32)a); + } + } + return; + } + emit_rex(t->mc, 1, dst_reg, 0, 0); + u8 op = 0x8D; + t->mc->emit_bytes(t->mc, &op, 1); + u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ + t->mc->emit_bytes(t->mc, &mr, 1); + u32 disp_pos = t->mc->pos(t->mc); + emit_u32le(t->mc, 0); + t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, R_X64_PLT32, sym, + addend - 4, 1, 0); +} + +void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + + if (addr.kind == OPK_GLOBAL) { + /* Materialize &sym into R11, then load from [r11]. */ + emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend); + if (dst.cls == RC_FP) { + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, X64_R11, 0); + } else { + int signed_ = type_is_signed(ma.type ? ma.type : addr.type); + emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, X64_R11, 0); + } + return; + } + + i32 off; + u32 base = addr_base(t, addr, &off); + if (dst.cls == RC_FP) { + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off); + } else { + int signed_ = type_is_signed(ma.type ? ma.type : addr.type); + emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off); + } +} + +void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { + u32 sz = ma.size ? ma.size : type_byte_size(addr.type); + + if (addr.kind == OPK_GLOBAL) { + /* Materialize &sym into R11, then store via [r11]. The IMM source + * branch below uses RAX as a scratch for the value, so R11 stays + * untouched between the LEA and the store. */ + emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend); + if (src.kind == OPK_IMM) { + int w = (sz == 8) ? 1 : 0; + x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm); + emit_mov_store(t->mc, sz, X64_RAX, X64_R11, 0); + return; + } + if (src.cls == RC_FP) { + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, X64_R11, 0); + return; + } + emit_mov_store(t->mc, sz, src.v.reg & 0xFu, X64_R11, 0); + return; + } + + i32 off; + u32 base = addr_base(t, addr, &off); + + if (src.kind == OPK_IMM) { + int w = (sz == 8) ? 1 : 0; + x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm); + emit_mov_store(t->mc, sz, X64_RAX, base, off); + return; + } + if (src.cls == RC_FP) { + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off); + return; + } + emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off); +} + +static void x_addr_of(CGTarget* t, Operand dst, Operand lv) { + XImpl* a = impl_of(t); + if (lv.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, lv.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 addr_of: bad slot"); + emit_lea(t->mc, dst.v.reg & 0xFu, X64_RBP, -(i32)s->off); + return; + } + if (lv.kind == OPK_INDIRECT) { + emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs); + return; + } + if (lv.kind == OPK_GLOBAL) { + emit_global_lea(t, dst.v.reg & 0xFu, lv.v.global.sym, lv.v.global.addend); + return; + } + x_panic(t, "addr_of: kind unsupported"); +} + +/* x86_64 TLS Local-Exec materialization. + * mov rd, fs:0 ; read thread pointer (FS base + 0) + * lea rd, [rd + sym@tpoff] ; add TP-relative offset + * The disp32 of the LEA carries an R_X64_TPOFF32 reloc; the linker fills + * in the signed TP-relative offset (negative under variant II — TLS image + * sits below the TCB that FS points at). */ +static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 rd = dst.v.reg & 0xFu; + + /* mov rd, qword ptr fs:[0] + * 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */ + u8 fs_prefix = 0x64; + mc->emit_bytes(mc, &fs_prefix, 1); + emit_rex(mc, 1, rd, 0, 0); + u8 op_mov = 0x8B; + mc->emit_bytes(mc, &op_mov, 1); + u8 mr1 = modrm(0u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr1, 1); + u8 s1 = sib(0u, 4u, 5u); + mc->emit_bytes(mc, &s1, 1); + emit_u32le(mc, 0); + + /* lea rd, [rd + disp32] + * [REX.W|REX.R|REX.B] 8D mod=10/reg=rd/rm=rd [SIB if rd&7==4] disp32 */ + emit_rex(mc, 1, rd, 0, rd); + u8 op_lea = 0x8D; + mc->emit_bytes(mc, &op_lea, 1); + u32 disp_pos; + if ((rd & 7u) == 4u) { + u8 mr2 = modrm(2u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr2, 1); + u8 s2 = sib(0u, 4u, rd & 7u); + mc->emit_bytes(mc, &s2, 1); + disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + } else { + u8 mr2 = modrm(2u, rd & 7u, rd & 7u); + mc->emit_bytes(mc, &mr2, 1); + disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + } + mc->emit_reloc_at(mc, sec, disp_pos, R_X64_TPOFF32, sym, addend, 0, 0); +} + +/* Aggregate ops — small unrolled memcpy/memset. */ +static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { + if (op.kind == OPK_REG) return op.v.reg & 0xFu; + if (op.kind == OPK_LOCAL) { + XImpl* a = impl_of(t); + XSlot* s = x64_slot_get(a, op.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 agg: bad slot"); + emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, + "x64 agg: address kind %d unsupported", (int)op.kind); +} + +static void x_copy_bytes(CGTarget* t, Operand da, Operand sa, + AggregateAccess g) { + u32 dr = agg_addr_reg(t, da, X64_R11); + u32 sr = agg_addr_reg(t, sa, (dr == X64_RAX) ? X64_RCX : X64_RAX); + u32 nbytes = g.size; + u32 i = 0; + while (i + 8 <= nbytes) { + emit_mov_load(t->mc, 8, 0, X64_RDX, sr, (i32)i); + emit_mov_store(t->mc, 8, X64_RDX, dr, (i32)i); + i += 8; + } + while (i + 4 <= nbytes) { + emit_mov_load(t->mc, 4, 0, X64_RDX, sr, (i32)i); + emit_mov_store(t->mc, 4, X64_RDX, dr, (i32)i); + i += 4; + } + while (i + 2 <= nbytes) { + emit_mov_load(t->mc, 2, 0, X64_RDX, sr, (i32)i); + emit_mov_store(t->mc, 2, X64_RDX, dr, (i32)i); + i += 2; + } + while (i < nbytes) { + emit_mov_load(t->mc, 1, 0, X64_RDX, sr, (i32)i); + emit_mov_store(t->mc, 1, X64_RDX, dr, (i32)i); + i += 1; + } +} + +static void x_set_bytes(CGTarget* t, Operand da, Operand bv, + AggregateAccess g) { + u32 dr = agg_addr_reg(t, da, X64_R11); + if (bv.kind != OPK_IMM) + compiler_panic(t->c, impl_of(t)->loc, + "x64 set_bytes: non-IMM byte not yet supported"); + u8 b = (u8)(bv.v.imm & 0xff); + u64 b64 = b; + b64 |= b64 << 8; + b64 |= b64 << 16; + b64 |= b64 << 32; + x64_emit_load_imm(t->mc, 1, X64_RAX, (i64)b64); + u32 nbytes = g.size; + u32 i = 0; + while (i + 8 <= nbytes) { + emit_mov_store(t->mc, 8, X64_RAX, dr, (i32)i); + i += 8; + } + while (i + 4 <= nbytes) { + emit_mov_store(t->mc, 4, X64_RAX, dr, (i32)i); + i += 4; + } + while (i + 2 <= nbytes) { + emit_mov_store(t->mc, 2, X64_RAX, dr, (i32)i); + i += 2; + } + while (i < nbytes) { + emit_mov_store(t->mc, 1, X64_RAX, dr, (i32)i); + i += 1; + } +} + +/* Load the storage unit, then extract the field by shifting it to the + * top of the register and shifting back. SAR for signed, SHR for unsigned. */ +static void x_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, + BitFieldAccess bf) { + u32 base = agg_addr_reg(t, record_addr, X64_R11); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + int w = (storage_bytes == 8u) ? 1 : 0; + u32 reg_size = w ? 64u : 32u; + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 rd = dst.v.reg & 0xFu; + + emit_mov_load(t->mc, storage_bytes, 0, rd, base, (i32)bf.storage_offset); + u8 left = (u8)(reg_size - lsb - width); + u8 right = (u8)(reg_size - width); + if (left) emit_shift_imm(t->mc, w, 4u, rd, left); + if (right) emit_shift_imm(t->mc, w, bf.signed_ ? 7u : 5u, rd, right); +} + +/* Read-modify-write: clear the field bits in the storage unit via AND ~mask, + * mask/shift the source into place, OR it in, write back. RAX holds the + * storage word; RCX is the staged value; RDX holds the source-side mask when + * needed. Avoids touching the base register. */ +static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src, + BitFieldAccess bf) { + u32 base = agg_addr_reg(t, record_addr, X64_R11); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + int w = (storage_bytes == 8u) ? 1 : 0; + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u64 ones = (width >= 64u) ? ~(u64)0 : (((u64)1 << width) - 1u); + u64 mask = ones << lsb; + + emit_mov_load(t->mc, storage_bytes, 0, X64_RAX, base, (i32)bf.storage_offset); + x64_emit_load_imm(t->mc, w, X64_RCX, (i64)~mask); + emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */ + + if (src.kind == OPK_IMM) { + u64 v = ((u64)src.v.imm & ones) << lsb; + x64_emit_load_imm(t->mc, w, X64_RCX, (i64)v); + } else if (src.kind == OPK_REG) { + emit_mov_rr(t->mc, w, X64_RCX, src.v.reg & 0xFu); + x64_emit_load_imm(t->mc, w, X64_RDX, (i64)ones); + emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */ + if (lsb) emit_shift_imm(t->mc, w, 4u, X64_RCX, (u8)lsb); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "x64 bitfield_store: src kind %d unsupported", + (int)src.kind); + } + emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */ + emit_mov_store(t->mc, storage_bytes, X64_RAX, base, (i32)bf.storage_offset); +} + +/* ============================================================ + * Arithmetic */ + +static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, + Operand b_op) { + MCEmitter* mc = t->mc; + + /* FP binops. */ + if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) { + u32 rd = dst.v.reg & 0xFu; + u32 ra = a_op.v.reg & 0xFu; + u32 rb = b_op.v.reg & 0xFu; + u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; + if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra); + u8 opcode; + switch (op) { + case BO_FADD: opcode = 0x58; break; + case BO_FSUB: opcode = 0x5C; break; + case BO_FMUL: opcode = 0x59; break; + case BO_FDIV: opcode = 0x5E; break; + default: opcode = 0x58; break; + } + emit_sse_rr(mc, prefix2, opcode, rd, rb); + return; + } + + int w = type_is_64(dst.type) ? 1 : 0; + u32 rd = dst.v.reg & 0xFu; + + /* Division: idiv/div uses rax/rdx implicitly. Route divisor through r11 + * if it would otherwise be rax/rdx. */ + if (op == BO_SDIV || op == BO_UDIV || op == BO_SREM || op == BO_UREM) { + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra); + u32 rb; + if (b_op.kind == OPK_REG) { + rb = b_op.v.reg & 0xFu; + if (rb == X64_RAX || rb == X64_RDX) { + emit_mov_rr(mc, w, X64_R11, rb); + rb = X64_R11; + } + } else if (b_op.kind == OPK_IMM) { + x64_emit_load_imm(mc, w, X64_R11, b_op.v.imm); + rb = X64_R11; + } else { + compiler_panic(t->c, impl_of(t)->loc, + "x64 div: divisor kind %d unsupported", (int)b_op.kind); + } + if (op == BO_SDIV || op == BO_SREM) { + emit_cqo_or_cdq(mc, w); + emit_f7_rm(mc, w, 7u, rb); /* idiv */ + } else { + emit_xor_self(mc, w, X64_RDX); + emit_f7_rm(mc, w, 6u, rb); /* div */ + } + u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX; + if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg); + return; + } + + /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1 + * /sub ib). Use the imm form when b is OPK_IMM and skip materializing + * into cl. */ + if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) { + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u); + if (b_op.kind == OPK_IMM) { + u32 width = w ? 64u : 32u; + emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u))); + return; + } + if (b_op.kind == OPK_REG) { + u32 rb = b_op.v.reg & 0xFu; + if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "x64 shift: count kind %d unsupported", (int)b_op.kind); + } + emit_shift_cl(mc, w, sub, rd); + return; + } + + /* For commutative ops, canonicalize IMM to the RHS so the imm-form + * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS + * still materializes. */ + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: + case BO_IMUL: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding + * reads-and-writes a single reg — copy ra → dst first, then `dst OP= + * imm`. For IMUL the imm form is three-operand (`dst = src * imm`) + * and reads from `ra` directly without the prep copy. */ + if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG && + (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR || + op == BO_XOR || op == BO_IMUL)) { + i64 imm = b_op.v.imm; + u32 ra = a_op.v.reg & 0xFu; + if (op == BO_IMUL) { + if (imm_fits_i8(imm)) { + emit_imul_imm8(mc, w, rd, ra, (i8)imm); + return; + } + if (imm_fits_i32(imm)) { + emit_imul_imm32(mc, w, rd, ra, (i32)imm); + return; + } + } else { + u32 sub; + switch (op) { + case BO_IADD: sub = 0u; break; + case BO_OR: sub = 1u; break; + case BO_AND: sub = 4u; break; + case BO_ISUB: sub = 5u; break; + case BO_XOR: sub = 6u; break; + default: sub = 0u; break; /* unreachable */ + } + if (imm_fits_i8(imm)) { + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_alu_imm8(mc, w, sub, rd, (i8)imm); + return; + } + if (imm_fits_i32(imm)) { + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_alu_imm32(mc, w, sub, rd, (i32)imm); + return; + } + } + /* Fall through to materialize for >32-bit literals. */ + } + + /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */ + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); + switch (op) { + case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break; + case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break; + case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break; + case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break; + case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break; + case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break; + default: + compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl", + (int)op); + } +} + +static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { + MCEmitter* mc = t->mc; + int w = type_is_64(dst.type) ? 1 : 0; + u32 rd = dst.v.reg & 0xFu; + /* IMM operand is legal per the CGTarget contract (arch.h); materialize + * into a scratch register when not already a register. cg folds + * literal unops upstream (cg_fold_unop), so this path is reached only + * when opt's emit hands us an unfolded literal. */ + u32 ra = x64_force_reg_int(t, a_op, w, X64_R11); + switch (op) { + case UO_NEG: + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_f7_rm(mc, w, 3u, rd); + return; + case UO_BNOT: + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_f7_rm(mc, w, 2u, rd); + return; + case UO_NOT: + /* !x → (x == 0) materialized as 0/1 in dst. */ + emit_test_self(mc, w, ra); + emit_setcc(mc, X64_CC_E, rd); + emit_movzx_r32_r8(mc, rd, rd); + return; + default: + compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl", + (int)op); + } +} + +static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 rd = dst.v.reg & 0xFu; + u32 rs = src.v.reg & 0xFu; + switch (k) { + case CV_SEXT: { + u32 src_bytes = type_byte_size(src.type); + int w = type_is_64(dst.type) ? 1 : 0; + emit_extend_rr(mc, w, /*signed=*/1, src_bytes, rd, rs); + return; + } + case CV_ZEXT: { + u32 src_bytes = type_byte_size(src.type); + int w = type_is_64(dst.type) ? 1 : 0; + emit_extend_rr(mc, w, /*signed=*/0, src_bytes, rd, rs); + return; + } + case CV_TRUNC: { + /* In-reg truncation: `mov r32, r32` clears high 32. Narrower stores + * select width themselves. */ + emit_mov_rr(mc, 0, rd, rs); + return; + } + case CV_ITOF_S: + case CV_ITOF_U: { + int w_src = type_is_64(src.type) ? 1 : 0; + u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; + if (k == CV_ITOF_U && w_src == 1) { + compiler_panic(t->c, a->loc, + "x64 convert: u64→fp not yet implemented"); + } + if (k == CV_ITOF_U) { + /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */ + emit_extend_rr(mc, 0, 0, 4, X64_R11, rs); + rs = X64_R11; + w_src = 1; + } + emit_sse_rr_w(mc, prefix2, 0x2A, w_src, rd, rs); + return; + } + case CV_FTOI_S: + case CV_FTOI_U: { + int w_dst = type_is_64(dst.type) ? 1 : 0; + u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3; + if (k == CV_FTOI_U && w_dst == 1) { + compiler_panic(t->c, a->loc, + "x64 convert: fp→u64 not yet implemented"); + } + emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs); + return; + } + case CV_FEXT: + emit_sse_rr(mc, 0xF3, 0x5A, rd, rs); + return; + case CV_FTRUNC: + emit_sse_rr(mc, 0xF2, 0x5A, rd, rs); + return; + case CV_BITCAST: { + /* movd/movq between xmm and GPR. */ + if (src.cls == RC_INT && dst.cls == RC_FP) { + int w = type_is_64(dst.type) ? 1 : 0; + emit_sse_rr_w(mc, 0x66, 0x6E, w, rd, rs); + } else if (src.cls == RC_FP && dst.cls == RC_INT) { + int w = type_is_64(src.type) ? 1 : 0; + emit_sse_rr_w(mc, 0x66, 0x7E, w, rs, rd); + } else { + compiler_panic(t->c, a->loc, + "x64 convert BITCAST: same-class not supported"); + } + return; + } + default: + compiler_panic(t->c, a->loc, "x64 convert kind %d unimpl", (int)k); + } +} + +/* ============================================================ + * Calls / return */ + +static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, + u32* next_fp, u32* stack_off) { + XImpl* a = impl_of(t); + /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */ + ABIArgInfo va_ai; + ABIArgPart va_pt; + const ABIArgInfo* ai = av->abi; + if (!ai) { + u32 sz = type_byte_size(av->type); + memset(&va_ai, 0, sizeof va_ai); + memset(&va_pt, 0, sizeof va_pt); + va_ai.kind = ABI_ARG_DIRECT; + va_ai.parts = &va_pt; + va_ai.nparts = 1; + va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; + va_pt.size = sz; + va_pt.align = sz; + va_pt.src_offset = 0; + ai = &va_ai; + } + if (ai->kind == ABI_ARG_IGNORE) return; + if (ai->kind == ABI_ARG_INDIRECT) { + /* Pass &av->storage_local in the next int arg reg. */ + u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX; + int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6); + /* Above is awkward — recompute clearly: */ + if (*next_int >= 6 + (a->has_sret ? 0 : 0)) { + /* (next_int was already bumped past 6) — stack route */ + } + to_stack = (dst_reg == X64_RAX); + if (av->storage.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot"); + emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off); + } else if (av->storage.kind == OPK_INDIRECT) { + emit_lea(t->mc, dst_reg, av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs); + } else { + compiler_panic(t->c, a->loc, + "x64 call: INDIRECT arg storage kind %d unsupported", + (int)av->storage.kind); + } + if (to_stack) { + emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); + *stack_off += 8; + } + return; + } + + for (u16 i = 0; i < ai->nparts; ++i) { + const ABIArgPart* pt = &ai->parts[i]; + u32 sz = pt->size; + if (pt->cls == ABI_CLASS_INT) { + int to_stack = (*next_int >= 6); + u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++]; + switch (av->storage.kind) { + case OPK_IMM: { + int w = (sz == 8) ? 1 : 0; + x64_emit_load_imm(t->mc, w, dst_reg, av->storage.v.imm); + break; + } + case OPK_REG: { + int w = (sz == 8) ? 1 : 0; + u32 sr = av->storage.v.reg & 0xFu; + if (sr != dst_reg) emit_mov_rr(t->mc, w, dst_reg, sr); + break; + } + case OPK_LOCAL: { + XSlot* s = x64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 call: bad arg slot"); + emit_mov_load(t->mc, sz, 0, dst_reg, X64_RBP, + -(i32)s->off + (i32)pt->src_offset); + break; + } + case OPK_INDIRECT: { + /* cg holds INDIRECT base regs in {RBX, R10, R12..R15}, disjoint + * from arg regs (RDI/RSI/RDX/RCX/R8/R9) and the RAX scratch, so + * the base survives across the part loop. */ + emit_mov_load(t->mc, sz, 0, dst_reg, av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); + break; + } + default: + compiler_panic(t->c, a->loc, + "x64 call: arg storage kind %d unsupported", + (int)av->storage.kind); + } + if (to_stack) { + emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); + *stack_off += 8; + } + } else if (pt->cls == ABI_CLASS_FP) { + int to_stack = (*next_fp >= 8); + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; + if (!to_stack) { + u32 dst_x = (*next_fp)++; + if (av->storage.kind == OPK_REG) { + u32 sx = av->storage.v.reg & 0xFu; + if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx); + } else if (av->storage.kind == OPK_INDIRECT) { + emit_sse_load(t->mc, prefix2, 0x10, dst_x, + av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); + } else { + compiler_panic(t->c, a->loc, + "x64 call: FP arg storage kind %d unsupported", + (int)av->storage.kind); + } + } else { + if (av->storage.kind == OPK_REG) { + emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu, + X64_RSP, (i32)*stack_off); + } else if (av->storage.kind == OPK_INDIRECT) { + /* Load through xmm15 (scratch — last in g_fp_order so cg won't + * have it live mid-call) then store. */ + emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, + av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); + emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP, + (i32)*stack_off); + } else { + compiler_panic(t->c, a->loc, + "x64 call: FP stack-arg storage kind %d unsupported", + (int)av->storage.kind); + } + *stack_off += 8; + } + } else { + compiler_panic(t->c, a->loc, "x64 call: ABI class %d unimpl", + (int)pt->cls); + } + } +} + +static void x_call(CGTarget* t, const CGCallDesc* d) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + u32 next_int = 0, next_fp = 0, stack_off = 0; + + /* sret: caller puts destination pointer in rdi. */ + if (d->abi && d->abi->has_sret) { + if (d->ret.storage.kind != OPK_LOCAL) { + compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL"); + } + XSlot* s = x64_slot_get(a, d->ret.storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot"); + emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off); + next_int = 1; + } + for (u32 i = 0; i < d->nargs; ++i) { + emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); + } + u32 needed = (stack_off + 15u) & ~15u; + if (needed > a->max_outgoing) a->max_outgoing = needed; + + /* Variadic calls: AL = number of XMM regs used. */ + if (d->abi && d->abi->variadic) { + x64_emit_load_imm(mc, 0, X64_RAX, (i64)next_fp); + } + + if (d->callee.kind == OPK_GLOBAL) { + /* call rel32: E8 + disp32 + R_X64_PLT32. */ + u8 op = 0xE8; + mc->emit_bytes(mc, &op, 1); + u32 disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32, + d->callee.v.global.sym, + d->callee.v.global.addend - 4, 1, 0); + } else if (d->callee.kind == OPK_REG) { + u32 r = d->callee.v.reg & 0xFu; + emit_rex(mc, 0, 0, 0, r); + u8 buf[2] = {0xFF, modrm(3u, 2u, r)}; + mc->emit_bytes(mc, buf, 2); + } else { + compiler_panic(t->c, a->loc, "x64 call: callee kind %d unsupported", + (int)d->callee.kind); + } + + /* Receive return value. */ + const ABIArgInfo* ri = &d->abi->ret; + if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return; + if (ri->nparts == 0) return; + + Operand rs = d->ret.storage; + u32 next_int_ret = 0, next_fp_ret = 0; + static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX}; + for (u16 i = 0; i < ri->nparts; ++i) { + const ABIArgPart* p = &ri->parts[i]; + u32 src_reg; + if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++]; + else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++); + else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl", + (int)p->cls); + + if (rs.kind == OPK_REG) { + if (ri->nparts != 1) { + compiler_panic(t->c, a->loc, + "x64 call: REG ret_storage with %u parts", + (unsigned)ri->nparts); + } + if (p->cls == ABI_CLASS_INT) { + int w = (p->size == 8) ? 1 : 0; + u32 dr = rs.v.reg & 0xFu; + if (dr != src_reg) emit_mov_rr(mc, w, dr, src_reg); + } else { + u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; + u32 dr = rs.v.reg & 0xFu; + if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg); + } + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot"); + base_reg = X64_RBP; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0xFu; + base_off = rs.v.ind.ofs; + } + i32 off = base_off + (i32)p->src_offset; + if (p->cls == ABI_CLASS_INT) { + emit_mov_store(mc, p->size, src_reg, base_reg, off); + } else { + u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; + emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off); + } + } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { + /* void ret placeholder — nothing to do. */ + } else { + compiler_panic(t->c, a->loc, + "x64 call: ret_storage kind %d unsupported", + (int)rs.kind); + } + } +} + +static void x_ret(CGTarget* t, const CGABIValue* val) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + + if (val) { + const ABIArgInfo* ri = val->abi; + if (ri && ri->kind == ABI_ARG_INDIRECT) { + /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */ + u32 src_base; + i32 src_base_off; + u32 nbytes; + if (val->storage.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot"); + src_base = X64_RBP; + src_base_off = -(i32)s->off; + nbytes = s->size; + } else if (val->storage.kind == OPK_INDIRECT) { + src_base = val->storage.v.ind.base & 0xFu; + src_base_off = val->storage.v.ind.ofs; + nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "x64 ret indirect: missing aggregate size"); + } + } else { + compiler_panic(t->c, a->loc, + "x64 ret indirect: storage kind %d unsupported", + (int)val->storage.kind); + } + if (a->sret_ptr_slot != FRAME_SLOT_NONE) { + XSlot* sp = x64_slot_get(a, a->sret_ptr_slot); + if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off); + } + u32 i = 0; + while (i + 8 <= nbytes) { + emit_mov_load(mc, 8, 0, X64_RAX, src_base, src_base_off + (i32)i); + emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i); + i += 8; + } + while (i + 4 <= nbytes) { + emit_mov_load(mc, 4, 0, X64_RAX, src_base, src_base_off + (i32)i); + emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i); + i += 4; + } + while (i + 2 <= nbytes) { + emit_mov_load(mc, 2, 0, X64_RAX, src_base, src_base_off + (i32)i); + emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i); + i += 2; + } + while (i < nbytes) { + emit_mov_load(mc, 1, 0, X64_RAX, src_base, src_base_off + (i32)i); + emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i); + i += 1; + } + /* Convention: return sret pointer in rax. */ + emit_mov_rr(mc, 1, X64_RAX, X64_RDI); + } else if (val->storage.kind == OPK_REG) { + if (val->storage.cls == RC_FP) { + u8 prefix2 = type_is_fp_double(val->storage.type) ? 0xF2 : 0xF3; + u32 sr = val->storage.v.reg & 0xFu; + if (sr != X64_XMM0) emit_sse_rr(mc, prefix2, 0x10, X64_XMM0, sr); + } else { + int w = type_is_64(val->storage.type) ? 1 : 0; + u32 sr = val->storage.v.reg & 0xFu; + if (sr != X64_RAX) emit_mov_rr(mc, w, X64_RAX, sr); + } + } else if (val->storage.kind == OPK_IMM) { + int w = type_is_64(val->storage.type) ? 1 : 0; + x64_emit_load_imm(mc, w, X64_RAX, val->storage.v.imm); + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { + /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */ + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + XSlot* s = x64_slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot"); + base_reg = X64_RBP; + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0xFu; + base_off = val->storage.v.ind.ofs; + } + const ABIArgInfo* ri2 = val->abi; + u32 next_int_ret = 0, next_fp_ret = 0; + static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX}; + for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { + const ABIArgPart* pt = &ri2->parts[i]; + i32 off = base_off + (i32)pt->src_offset; + if (pt->cls == ABI_CLASS_INT) { + emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++], + base_reg, off); + } else if (pt->cls == ABI_CLASS_FP) { + u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3; + emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++), + base_reg, off); + } else { + compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl", + (int)pt->cls); + } + } + } + } + emit_jmp_label(mc, a->epilogue_label); +} + +/* ============================================================ + * Alloca / VLA. + * + * Layout (low → high addresses, after a `sub rsp, aligned_size`): + * [rsp + 0, +max_outgoing): outgoing-arg area + * [rsp + max_outgoing, +max_outgoing +aligned_size): newly allocated block + * + * max_outgoing is only known at func_end (it is the max across all + * x_call sites in the function), so each alloca emits a placeholder + * `lea dst, [rsp + 0]` whose 4-byte disp is patched at func_end. The + * epilogue restores rsp via `leave` (mov rsp, rbp; pop rbp), so no + * extra dance is needed when alloca is present. */ + +static void emit_lea_rsp_disp32(MCEmitter* mc, u32 dst, u32* out_disp_pos) { + /* Force the disp32 form (mod=10, rm=SIB, base=rsp, no index, scale=0) + * regardless of the displacement value so func_end has a fixed-width + * field to patch. 8 bytes: REX.W [+R] | 0x8D | ModRM | SIB | disp32. */ + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, 1, dst, 0, X64_RSP); + u8 op = 0x8D; + mc->emit_bytes(mc, &op, 1); + u8 mr = modrm(2u, dst & 7u, 4u); + mc->emit_bytes(mc, &mr, 1); + u8 s = sib(0, 4u, X64_RSP); + mc->emit_bytes(mc, &s, 1); + *out_disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +static void x_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (d.kind != OPK_REG) + compiler_panic(t->c, a->loc, "x64 alloca: dst must be REG"); + if (align > 16) { + compiler_panic(t->c, a->loc, + "x64 alloca: align %u > 16 not yet supported", align); + } + + if (sz.kind == OPK_IMM) { + i64 v = sz.v.imm; + if (v < 0) compiler_panic(t->c, a->loc, "x64 alloca: negative size"); + u64 aligned = ((u64)v + 15u) & ~(u64)15u; + if (aligned == 0) aligned = 16; + /* sub rsp, imm32 : REX.W 0x81 /5 imm32 (7 bytes). */ + emit_rex(mc, 1, 0, 0, X64_RSP); + u8 buf[2] = {0x81, modrm(3u, 5u, X64_RSP)}; + mc->emit_bytes(mc, buf, 2); + emit_u32le(mc, (u32)aligned); + } else if (sz.kind == OPK_REG) { + u32 sz_reg = sz.v.reg & 0xFu; + /* rax = (sz_reg + 15) & ~15 */ + emit_lea(mc, X64_RAX, sz_reg, 15); + /* and rax, -16 : REX.W 0x83 /4 imm8(0xF0). */ + emit_rex(mc, 1, 0, 0, X64_RAX); + u8 abuf[3] = {0x83, modrm(3u, 4u, X64_RAX), 0xF0}; + mc->emit_bytes(mc, abuf, 3); + /* sub rsp, rax */ + emit_alu_rr(mc, 1, 0x29, X64_RSP, X64_RAX); + } else { + compiler_panic(t->c, a->loc, "x64 alloca: size kind %d unsupported", + (int)sz.kind); + } + + /* lea dst, [rsp + max_outgoing] — placeholder, patched at func_end. */ + if (a->nalloca_patches == a->alloca_patches_cap) { + u32 ncap = a->alloca_patches_cap ? a->alloca_patches_cap * 2u : 4u; + XAllocaPatch* nb = arena_array(t->c->tu, XAllocaPatch, ncap); + if (a->alloca_patches) + memcpy(nb, a->alloca_patches, sizeof(XAllocaPatch) * a->nalloca_patches); + a->alloca_patches = nb; + a->alloca_patches_cap = ncap; + } + u32 disp_pos; + emit_lea_rsp_disp32(mc, d.v.reg & 0xFu, &disp_pos); + a->alloca_patches[a->nalloca_patches].disp_pos = disp_pos; + a->nalloca_patches++; + a->has_alloca = 1; +} + +/* SysV AMD64 __va_list_tag (24 bytes, 8-aligned): + * off 0 u32 gp_offset next free GP slot in reg_save_area (0..48) + * off 4 u32 fp_offset next free FP slot (48..176) + * off 8 ptr overflow_arg_area pointer to next stack-passed arg + * off 16 ptr reg_save_area pointer to the 176-byte save area + * + * The reg_save_area layout (filled in func_begin): + * +0..+40 : rdi, rsi, rdx, rcx, r8, r9 (8B each) + * +48..+168 : xmm0..xmm7 at 16B stride (low 8B written via movsd) + * + * va_arg dispatches on dst class. When the relevant offset reaches its + * max (48 for GP, 176 for FP), fall through to overflow_arg_area at + * 8-byte stride. */ + +static void x_va_start_(CGTarget* t, Operand ap_op) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (!a->is_variadic) + compiler_panic(t->c, a->loc, "x64 va_start: function not variadic"); + u32 ap = ap_op.v.reg & 0xFu; + XSlot* rs = x64_slot_get(a, a->reg_save_slot); + if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot"); + + /* gp_offset = next_param_int * 8 */ + x64_emit_load_imm(mc, 0, X64_RAX, (i64)(a->next_param_int * 8u)); + emit_mov_store(mc, 4, X64_RAX, ap, 0); + /* fp_offset = 48 + next_param_fp * 16 */ + x64_emit_load_imm(mc, 0, X64_RAX, (i64)(48u + a->next_param_fp * 16u)); + emit_mov_store(mc, 4, X64_RAX, ap, 4); + /* overflow_arg_area = rbp + 16 + next_param_stack */ + emit_lea(mc, X64_RAX, X64_RBP, (i32)(16u + a->next_param_stack)); + emit_mov_store(mc, 8, X64_RAX, ap, 8); + /* reg_save_area = rbp - reg_save_slot.off */ + emit_lea(mc, X64_RAX, X64_RBP, -(i32)rs->off); + emit_mov_store(mc, 8, X64_RAX, ap, 16); +} + +static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op, + const Type* ty) { + MCEmitter* mc = t->mc; + u32 ap = ap_op.v.reg & 0xFu; + u32 sz = type_byte_size(ty); + int is_fp = (dst.cls == RC_FP); + u32 offs_field = is_fp ? 4u : 0u; + u32 max_offs = is_fp ? 176u : 48u; + u32 stride = is_fp ? 16u : 8u; + u32 dr = dst.v.reg & 0xFu; + + MCLabel L_stack = mc->label_new(mc); + MCLabel L_done = mc->label_new(mc); + + /* eax = ap[offs_field]; cmp eax, max_offs; jae L_stack. */ + emit_mov_load(mc, 4, 0, X64_RAX, ap, (i32)offs_field); + if (max_offs <= 127u) { + emit_cmp_imm8(mc, 0, X64_RAX, (i8)max_offs); + } else { + /* cmp eax, imm32 : 0x3D imm32 (5 bytes, EAX-specific form). */ + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 op = 0x3D; + mc->emit_bytes(mc, &op, 1); + emit_u32le(mc, max_offs); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); + } + emit_jcc_label(mc, X64_CC_AE, L_stack); + + /* Reg path: + * r11 = ap[16] (reg_save_area) + * r11 = r11 + rax + * load dst from [r11 + 0] + * eax += stride; ap[offs_field] = eax + * jmp L_done */ + emit_mov_load(mc, 8, 0, X64_R11, ap, 16); + emit_alu_rr(mc, 1, 0x01, X64_R11, X64_RAX); + if (is_fp) { + u8 prefix = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0); + } else { + int sx = type_is_signed(ty); + emit_mov_load(mc, sz, sx, dr, X64_R11, 0); + } + /* add eax, imm8 : 0x83 /0 imm8 (no REX needed for eax). */ + { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 buf[3] = {0x83, modrm(3u, 0u, X64_RAX), (u8)stride}; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); + } + emit_mov_store(mc, 4, X64_RAX, ap, (i32)offs_field); + emit_jmp_label(mc, L_done); + + /* L_stack: + * r11 = ap[8] (overflow_arg_area) + * load dst from [r11 + 0] + * r11 += 8; ap[8] = r11 */ + mc->label_place(mc, L_stack); + emit_mov_load(mc, 8, 0, X64_R11, ap, 8); + if (is_fp) { + u8 prefix = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0); + } else { + int sx = type_is_signed(ty); + emit_mov_load(mc, sz, sx, dr, X64_R11, 0); + } + /* add r11, 8 : REX.WB 0x83 /0 imm8. */ + { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B); + mc->emit_bytes(mc, &rex, 1); + u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8}; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); + } + emit_mov_store(mc, 8, X64_R11, ap, 8); + + mc->label_place(mc, L_done); +} + +static void x_va_end_(CGTarget* t, Operand a) { + (void)t; + (void)a; +} + +static void x_va_copy_(CGTarget* t, Operand d, Operand s) { + MCEmitter* mc = t->mc; + u32 dr = d.v.reg & 0xFu; + u32 sr = s.v.reg & 0xFu; + /* va_list is 24 bytes; three 8B loads + stores via rax. */ + for (u32 i = 0; i < 24u; i += 8u) { + emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); + } +} + +/* ============================================================ + * Atomics (Group K). + * + * x86 has a strong memory model: plain MOV is acquire on loads and + * release on stores, so most MemOrders need no extra fence. The + * exception is SEQ_CST stores, which need a full StoreLoad barrier — + * realized either via XCHG (which has implicit LOCK) or MOV+MFENCE. + * All LOCK-prefixed RMWs (XADD/XCHG/CMPXCHG) act as full barriers, + * subsuming any MemOrder the front end requests. */ + +static void emit_lock_prefix(MCEmitter* mc) { + u8 b = 0xF0; + mc->emit_bytes(mc, &b, 1); +} + +static void emit_mfence(MCEmitter* mc) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 b[3] = {0x0F, 0xAE, 0xF0}; + mc->emit_bytes(mc, b, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +static void emit_ud2(MCEmitter* mc) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 b[2] = {0x0F, 0x0B}; + mc->emit_bytes(mc, b, 2); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* LOCK XADD [base+disp], src. Opcode 0F C1 /r (32/64-bit; sets src=prior, + * mem=mem+src). */ +static void emit_lock_xadd(MCEmitter* mc, int w, u32 src, u32 base, i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_lock_prefix(mc); + emit_rex(mc, w, src, 0, base); + u8 op[2] = {0x0F, 0xC1}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, src, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* XCHG [base+disp], src. Opcode 87 /r. LOCK is implicit when the + * destination is memory, but we emit it explicitly for clarity. */ +static void emit_lock_xchg_mem(MCEmitter* mc, int w, u32 src, u32 base, + i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_lock_prefix(mc); + emit_rex(mc, w, src, 0, base); + u8 op = 0x87; + mc->emit_bytes(mc, &op, 1); + emit_mem_operand(mc, src, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* LOCK CMPXCHG [base+disp], src. Opcode 0F B1 /r. Compares RAX with [mem]; + * if equal, [mem]=src and ZF=1; else RAX=[mem] and ZF=0. */ +static void emit_lock_cmpxchg(MCEmitter* mc, int w, u32 src, u32 base, + i32 disp) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_lock_prefix(mc); + emit_rex(mc, w, src, 0, base); + u8 op[2] = {0x0F, 0xB1}; + mc->emit_bytes(mc, op, 2); + emit_mem_operand(mc, src, base, disp); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* POPCNT rd, rs. Encoding: F3 0F B8 /r. */ +static void emit_popcnt(MCEmitter* mc, int w, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 p = 0xF3; + mc->emit_bytes(mc, &p, 1); + emit_rex(mc, w, dst, 0, src); + u8 op[2] = {0x0F, 0xB8}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* BSF/BSR rd, rs. opcode2 = 0xBC (BSF) or 0xBD (BSR). */ +static void emit_bs(MCEmitter* mc, int w, u8 opcode2, u32 dst, u32 src) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 op[2] = {0x0F, opcode2}; + mc->emit_bytes(mc, op, 2); + emit_rm_reg(mc, dst, src); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* BSWAP r32/r64. Opcode 0F C8+r; REX.W for r64; REX.B if reg>=8. */ +static void emit_bswap(MCEmitter* mc, int w, u32 reg) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 op[2] = {0x0F, (u8)(0xC8 + (reg & 7))}; + mc->emit_bytes(mc, op, 2); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* ROL r/m16, imm8. Used to swap bytes in a 16-bit value (ROL by 8). */ +static void emit_rol16_imm8(MCEmitter* mc, u32 reg, u8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 p = 0x66; + mc->emit_bytes(mc, &p, 1); + emit_rex(mc, 0, 0, 0, reg); + u8 buf[3]; + buf[0] = 0xC1; + buf[1] = modrm(3u, 0u, reg & 7u); + buf[2] = imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* XOR r/m, imm32 — opcode 81 /6. Used to compute (bits-1) - x via XOR. */ +static void emit_xor_imm32(MCEmitter* mc, int w, u32 reg, i32 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 op = 0x81; + mc->emit_bytes(mc, &op, 1); + emit_rm_reg(mc, 6u, reg); + emit_u32le(mc, (u32)imm); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* Resolve an atomic addr operand to (base, disp) for a memory operand. + * Accepts OPK_REG (pointer in reg, disp=0), OPK_LOCAL, or OPK_INDIRECT. */ +static u32 atomic_addr_base(CGTarget* t, Operand addr, i32* out_disp) { + if (addr.kind == OPK_REG) { + *out_disp = 0; + return addr.v.reg & 0xFu; + } + return addr_base(t, addr, out_disp); +} + +static void x_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, + MemOrder ord) { + MCEmitter* mc = t->mc; + (void)ord; /* x86: plain MOV satisfies all orders for loads. */ + u32 sz = ma.size ? ma.size : type_byte_size(dst.type); + i32 disp; + u32 base = atomic_addr_base(t, addr, &disp); + int signed_ = type_is_signed(ma.type ? ma.type : dst.type); + emit_mov_load(mc, sz, signed_, dst.v.reg & 0xFu, base, disp); +} + +static void x_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess ma, + MemOrder ord) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 sz = ma.size ? ma.size : type_byte_size(src.type); + int w = (sz == 8) ? 1 : 0; + i32 disp; + u32 base = atomic_addr_base(t, addr, &disp); + + /* Materialize src into a register. */ + u32 sr; + if (src.kind == OPK_IMM) { + x64_emit_load_imm(mc, w, X64_R11, src.v.imm); + sr = X64_R11; + } else if (src.kind == OPK_REG) { + sr = src.v.reg & 0xFu; + } else { + compiler_panic(t->c, a->loc, "x64 atomic_store: src kind %d unsupported", + (int)src.kind); + } + + if (ord == MO_SEQ_CST) { + /* SEQ_CST store: XCHG implicitly fences. Move src into r11 so the + * caller's reg is unmodified, then xchg [mem], r11. */ + if (sr != X64_R11) emit_mov_rr(mc, w, X64_R11, sr); + emit_lock_xchg_mem(mc, w, X64_R11, base, disp); + return; + } + /* Plain store covers RELAXED / RELEASE. */ + emit_mov_store(mc, sz, sr, base, disp); +} + +static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, + Operand val, MemAccess ma, MemOrder ord) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */ + u32 sz = ma.size ? ma.size : type_byte_size(dst.type); + int w = (sz == 8) ? 1 : 0; + i32 disp; + u32 base = atomic_addr_base(t, addr, &disp); + u32 dr = dst.v.reg & 0xFu; + + /* Materialize val into r11 (it's our working temp). For SUB we negate + * it so the XADD does the subtraction. */ + if (val.kind == OPK_IMM) { + i64 v = val.v.imm; + if (op == AO_SUB) v = -v; + x64_emit_load_imm(mc, w, X64_R11, v); + } else if (val.kind == OPK_REG) { + u32 vr = val.v.reg & 0xFu; + if (vr != X64_R11) emit_mov_rr(mc, w, X64_R11, vr); + if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */ + } else { + compiler_panic(t->c, a->loc, "x64 atomic_rmw: val kind %d unsupported", + (int)val.kind); + } + + if (op == AO_ADD || op == AO_SUB) { + /* LOCK XADD [base], r11 — afterwards r11 holds prior. */ + emit_lock_xadd(mc, w, X64_R11, base, disp); + if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11); + return; + } + if (op == AO_XCHG) { + emit_lock_xchg_mem(mc, w, X64_R11, base, disp); + if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11); + return; + } + + /* AND/OR/XOR/NAND: CMPXCHG retry loop. + * + * mov rax, [mem] + * .retry: + * mov rcx, rax ; new = prior + * <op> rcx, r11 ; combine with val + * [NAND: not rcx] + * lock cmpxchg [mem], rcx + * jne .retry + * mov dr, rax + * + * rax = prior (cmpxchg implicit), rcx = new (scratch), r11 = val. */ + emit_mov_load(mc, sz, 0, X64_RAX, base, disp); + MCLabel L_retry = mc->label_new(mc); + mc->label_place(mc, L_retry); + emit_mov_rr(mc, w, X64_RCX, X64_RAX); + switch (op) { + case AO_AND: + emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11); + break; + case AO_OR: + emit_alu_rr(mc, w, 0x09, X64_RCX, X64_R11); + break; + case AO_XOR: + emit_alu_rr(mc, w, 0x31, X64_RCX, X64_R11); + break; + case AO_NAND: + emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11); + emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */ + break; + default: + compiler_panic(t->c, a->loc, "x64 atomic_rmw: op %d unimpl", (int)op); + } + emit_lock_cmpxchg(mc, w, X64_RCX, base, disp); + emit_jcc_label(mc, X64_CC_NE, L_retry); + if (dr != X64_RAX) emit_mov_rr(mc, w, dr, X64_RAX); +} + +static void x_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, + Operand expected, Operand desired, MemAccess ma, + MemOrder succ, MemOrder fail) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + (void)succ; + (void)fail; + u32 sz = ma.size ? ma.size : type_byte_size(prior.type); + int w = (sz == 8) ? 1 : 0; + i32 disp; + u32 base = atomic_addr_base(t, addr, &disp); + + /* RAX = expected. */ + if (expected.kind == OPK_IMM) { + x64_emit_load_imm(mc, w, X64_RAX, expected.v.imm); + } else if (expected.kind == OPK_REG) { + u32 er = expected.v.reg & 0xFu; + if (er != X64_RAX) emit_mov_rr(mc, w, X64_RAX, er); + } else { + compiler_panic(t->c, a->loc, "x64 atomic_cas: exp kind %d unsupported", + (int)expected.kind); + } + /* R11 = desired. */ + if (desired.kind == OPK_IMM) { + x64_emit_load_imm(mc, w, X64_R11, desired.v.imm); + } else if (desired.kind == OPK_REG) { + u32 dr2 = desired.v.reg & 0xFu; + if (dr2 != X64_R11) emit_mov_rr(mc, w, X64_R11, dr2); + } else { + compiler_panic(t->c, a->loc, "x64 atomic_cas: des kind %d unsupported", + (int)desired.kind); + } + + emit_lock_cmpxchg(mc, w, X64_R11, base, disp); + + /* ok = ZF (success). */ + u32 ok_r = ok.v.reg & 0xFu; + emit_setcc(mc, X64_CC_E, ok_r); + emit_movzx_r32_r8(mc, ok_r, ok_r); + + /* prior = rax. */ + u32 pr = prior.v.reg & 0xFu; + if (pr != X64_RAX) emit_mov_rr(mc, w, pr, X64_RAX); +} + +static void x_fence(CGTarget* t, MemOrder o) { + /* x86: only SEQ_CST needs an explicit StoreLoad barrier. RELAXED is + * a no-op; ACQUIRE/RELEASE/ACQ_REL are satisfied by plain MOV. */ + if (o == MO_SEQ_CST) emit_mfence(t->mc); +} + +/* ============================================================ + * Intrinsics (Group L). */ + +static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, + const Operand* args, u32 na) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + (void)nd; + (void)na; + + switch (kind) { + case INTRIN_POPCOUNT: { + Operand src = args[0]; + Operand dst = dsts[0]; + int w = type_is_64(src.type) ? 1 : 0; + emit_popcnt(mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu); + return; + } + case INTRIN_CTZ: { + /* BSF gives the index of the lowest set bit (undefined for 0). */ + Operand src = args[0]; + Operand dst = dsts[0]; + int w = type_is_64(src.type) ? 1 : 0; + emit_bs(mc, w, 0xBC, dst.v.reg & 0xFu, src.v.reg & 0xFu); + return; + } + case INTRIN_CLZ: { + /* BSR gives the index of the highest set bit; clz = (bits-1) - bsr. + * XOR with (bits-1) computes the subtraction for in-range values. */ + Operand src = args[0]; + Operand dst = dsts[0]; + int w = type_is_64(src.type) ? 1 : 0; + u32 dr = dst.v.reg & 0xFu; + emit_bs(mc, w, 0xBD, dr, src.v.reg & 0xFu); + emit_xor_imm32(mc, w, dr, w ? 63 : 31); + return; + } + case INTRIN_BSWAP16: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 dr = dst.v.reg & 0xFu; + u32 sr = src.v.reg & 0xFu; + if (dr != sr) emit_mov_rr(mc, 0, dr, sr); + emit_rol16_imm8(mc, dr, 8); + return; + } + case INTRIN_BSWAP32: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 dr = dst.v.reg & 0xFu; + u32 sr = src.v.reg & 0xFu; + if (dr != sr) emit_mov_rr(mc, 0, dr, sr); + emit_bswap(mc, 0, dr); + return; + } + case INTRIN_BSWAP64: { + Operand src = args[0]; + Operand dst = dsts[0]; + u32 dr = dst.v.reg & 0xFu; + u32 sr = src.v.reg & 0xFu; + if (dr != sr) emit_mov_rr(mc, 1, dr, sr); + emit_bswap(mc, 1, dr); + return; + } + case INTRIN_MEMCPY: + case INTRIN_MEMMOVE: { + /* args = (dst_addr, src_addr, n_bytes). v1: const n, REG ptrs. */ + Operand da = args[0], sa = args[1], nb = args[2]; + if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic(t->c, a->loc, + "x64 intrinsic: %s with non-const n or non-REG ptr", + kind == INTRIN_MEMCPY ? "memcpy" : "memmove"); + } + u32 dr = da.v.reg & 0xFu; + u32 sr = sa.v.reg & 0xFu; + u32 n = (u32)nb.v.imm; + if (kind == INTRIN_MEMCPY) { + u32 i = 0; + while (i + 8 <= n) { + emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); + i += 8; + } + while (i + 4 <= n) { + emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); + i += 4; + } + while (i + 2 <= n) { + emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); + i += 2; + } + while (i < n) { + emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); + i += 1; + } + } else { + /* memmove: copy backward so dst>src overlap is safe. */ + u32 i = n; + while (i >= 8) { + i -= 8; + emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); + } + while (i >= 4) { + i -= 4; + emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); + } + while (i >= 2) { + i -= 2; + emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); + } + while (i >= 1) { + i -= 1; + emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i); + emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); + } + } + return; + } + case INTRIN_MEMSET: { + /* args = (dst_addr, byte, n). */ + Operand da = args[0], bv = args[1], nb = args[2]; + if (da.kind != OPK_REG || nb.kind != OPK_IMM) { + compiler_panic(t->c, a->loc, + "x64 intrinsic: memset with non-const n / non-REG ptr"); + } + u32 dr = da.v.reg & 0xFu; + u32 n = (u32)nb.v.imm; + /* Build a 64-bit value with the byte broadcast across all 8 bytes. */ + if (bv.kind == OPK_IMM) { + u8 byte = (u8)(bv.v.imm & 0xffu); + u64 b64 = byte; + b64 |= b64 << 8; + b64 |= b64 << 16; + b64 |= b64 << 32; + x64_emit_load_imm(mc, 1, X64_RAX, (i64)b64); + } else if (bv.kind == OPK_REG) { + /* Broadcast low byte of bv across 8 bytes: rax = bv * 0x0101010101010101. */ + x64_emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll); + emit_mov_rr(mc, 1, X64_RAX, bv.v.reg & 0xFu); + emit_imul_rr(mc, 1, X64_RAX, X64_R11); + } else { + compiler_panic(t->c, a->loc, + "x64 intrinsic: memset byte kind %d unsupported", + (int)bv.kind); + } + u32 i = 0; + while (i + 8 <= n) { + emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); + i += 8; + } + while (i + 4 <= n) { + emit_mov_store(mc, 4, X64_RAX, dr, (i32)i); + i += 4; + } + while (i + 2 <= n) { + emit_mov_store(mc, 2, X64_RAX, dr, (i32)i); + i += 2; + } + while (i < n) { + emit_mov_store(mc, 1, X64_RAX, dr, (i32)i); + i += 1; + } + return; + } + case INTRIN_PREFETCH: + /* Drop the hint. */ + return; + case INTRIN_ASSUME_ALIGNED: { + /* dst = src (alignment is a hint only). */ + Operand src = args[0]; + Operand dst = dsts[0]; + u32 dr = dst.v.reg & 0xFu; + u32 sr = src.v.reg & 0xFu; + if (dr != sr) emit_mov_rr(mc, 1, dr, sr); + return; + } + case INTRIN_EXPECT: { + /* dst = val; expected hint dropped. */ + Operand val = args[0]; + Operand dst = dsts[0]; + int w = type_is_64(dst.type) ? 1 : 0; + u32 dr = dst.v.reg & 0xFu; + if (val.kind == OPK_REG) { + u32 sr = val.v.reg & 0xFu; + if (sr != dr) emit_mov_rr(mc, w, dr, sr); + } else if (val.kind == OPK_IMM) { + x64_emit_load_imm(mc, w, dr, val.v.imm); + } else { + compiler_panic(t->c, a->loc, + "x64 intrinsic: expect val kind %d unsupported", + (int)val.kind); + } + return; + } + case INTRIN_UNREACHABLE: + case INTRIN_TRAP: + emit_ud2(mc); + return; + case INTRIN_ADD_OVERFLOW: + case INTRIN_SUB_OVERFLOW: { + /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */ + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int w = type_is_64(dval.type) ? 1 : 0; + u32 rd = dval.v.reg & 0xFu; + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); + u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29; + emit_alu_rr(mc, w, op, rd, rb); + u32 dovf_r = dovf.v.reg & 0xFu; + emit_setcc(mc, X64_CC_O, dovf_r); + emit_movzx_r32_r8(mc, dovf_r, dovf_r); + return; + } + case INTRIN_MUL_OVERFLOW: { + /* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed + * two-operand form: low 32 bits of product go to dst, OF set if + * the result didn't fit. i64 not yet supported. */ + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int w = type_is_64(dval.type) ? 1 : 0; + if (w) { + compiler_panic(t->c, a->loc, + "x64 intrinsic: mul_overflow on i64 not yet supported"); + } + u32 rd = dval.v.reg & 0xFu; + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); + emit_imul_rr(mc, w, rd, rb); + u32 dovf_r = dovf.v.reg & 0xFu; + emit_setcc(mc, X64_CC_O, dovf_r); + emit_movzx_r32_r8(mc, dovf_r, dovf_r); + return; + } + default: + compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported", + (int)kind); + } +} +static void x_asm_block(CGTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 no, Operand* oo, + const AsmConstraint* ins, u32 ni, const Operand* io, + const Sym* clobs, u32 nc) { + (void)tmpl; + (void)outs; + (void)no; + (void)oo; + (void)ins; + (void)ni; + (void)io; + (void)clobs; + (void)nc; + x_panic(t, "asm_block"); +} + +static void x_set_loc(CGTarget* t, SrcLoc l) { + ((XImpl*)t)->loc = l; + if (t->mc) t->mc->set_loc(t->mc, l); +} + +static void x_finalize(CGTarget* t) { (void)t; } +static void x_destroy(CGTarget* t) { (void)t; } + +static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } + +CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { + XImpl* x = arena_new(c->tu, XImpl); + memset(x, 0, sizeof *x); + + CGTarget* t = &x->base; + t->c = c; + t->obj = o; + t->mc = m; + + t->func_begin = x_func_begin; + t->func_end = x_func_end; + + t->alloc_reg = x_alloc_reg; + t->free_reg = x_free_reg; + t->frame_slot = x_frame_slot; + t->param = x_param; + t->clobbers = x_clobbers; + t->spill_reg = x_spill_reg; + t->reload_reg = x_reload_reg; + + t->label_new = x_label_new; + t->label_place = x_label_place; + t->jump = x_jump; + t->cmp_branch = x_cmp_branch; + + t->scope_begin = x_scope_begin; + t->scope_else = x_scope_else; + t->scope_end = x_scope_end; + t->break_to = x_break_to; + t->continue_to = x_continue_to; + + t->load_imm = x_load_imm; + t->load_const = x_load_const; + t->copy = x_copy; + t->load = x_load; + t->store = x_store; + t->addr_of = x_addr_of; + t->tls_addr_of = x_tls_addr_of; + t->copy_bytes = x_copy_bytes; + t->set_bytes = x_set_bytes; + t->bitfield_load = x_bitfield_load; + t->bitfield_store = x_bitfield_store; + + t->binop = x_binop; + t->unop = x_unop; + t->cmp = x_cmp; + t->convert = x_convert; + + t->call = x_call; + t->ret = x_ret; + + t->alloca_ = x_alloca_; + t->va_start_ = x_va_start_; + t->va_arg_ = x_va_arg_; + t->va_end_ = x_va_end_; + t->va_copy_ = x_va_copy_; + + t->setjmp_ = NULL; + t->longjmp_ = NULL; + + t->atomic_load = x_atomic_load; + t->atomic_store = x_atomic_store; + t->atomic_rmw = x_atomic_rmw; + t->atomic_cas = x_atomic_cas; + t->fence = x_fence; + + t->intrinsic = x_intrinsic; + t->asm_block = x_asm_block; + + t->set_loc = x_set_loc; + t->finalize = x_finalize; + t->destroy = x_destroy; + + compiler_defer(c, cgt_cleanup, t); + return t; +} diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -109,8 +109,87 @@ struct Linker { CompilerCleanup* deferred; /* registered by link_new */ }; -/* Defined in link_layout.c. */ +/* ---- GC liveness (link_resolve.c) ---------------------------------------- */ + +typedef struct GcLive { + u8** marks; /* marks[input_idx][obj_sec_id]; 0/1, sized to nsec_per_input[ii] + */ + u32* nsec; /* obj_section_count per input */ + u32 ninputs; +} GcLive; + +typedef struct GcQueue { + u64* items; /* (u64) packed: hi32 = input_idx, lo32 = obj_sec_id */ + u32 n; + u32 cap; +} GcQueue; + +/* ---- Cross-file helpers (link_layout.c → link_reloc_layout.c) ------------ */ + +/* Four-bucket segment partitioning by permission (defined in link_layout.c). */ +typedef enum SegBucket { + SEG_RX = 0, /* SF_ALLOC | SF_EXEC */ + SEG_R = 1, /* SF_ALLOC, no EXEC, no WRITE */ + SEG_RW = 2, /* SF_ALLOC | SF_WRITE (incl. BSS) */ + SEG_TLS = 3, /* SF_ALLOC | SF_TLS (.tdata + .tbss) */ + SEG_NBUCKETS = 4, +} SegBucket; + +/* section_kept: 1 for allocatable progbits/nobits sections (link_layout.c). */ +int link_section_kept(const Section* s); +/* bucket_for: map section flags to SegBucket (link_layout.c). */ +SegBucket link_bucket_for(u16 flags); +/* layout_page_size: page size for segment alignment (link_layout.c). */ +u64 link_layout_page_size(Linker* l); + +/* Append a fresh symbol slot and return its id (link_layout.c). */ +LinkSymId link_append_symbol(LinkImage* img, const LinkSymbol* tmpl); +/* Append a fresh reloc slot and return it (link_layout.c). */ +LinkRelocApply* link_append_reloc_slot(LinkImage* img); + +/* Emit or upsert a synthetic global boundary symbol (link_layout.c). */ +void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, + u64 vaddr); + +/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. + * Defined in link_resolve.c; used by link_reloc_layout.c. */ +int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off, + size_t* out_len, int* out_is_start); + +/* GC liveness helpers (link_resolve.c). */ +int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j); + +/* Segment/section growth helpers for iplt (link_reloc_layout.c). */ +u32 link_iplt_alloc_segments(LinkImage* img, u32 nseg); +u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec); + +/* ---- Public entries (link_resolve.c) -------------------------------------- */ void link_ingest_archives(struct Linker*); +void link_resolve_symbols(struct Linker*, LinkImage*); +void link_resolve_undefs(struct Linker*, LinkImage*); +void link_gc_compute(struct Linker*, LinkImage*, GcLive*); +void link_gc_live_alloc(GcLive* g, struct Linker* l, Heap* h); +void link_gc_live_free(GcLive* g, Heap* h); +void link_gc_drop_dead_globals(struct Linker*, LinkImage*, const GcLive*); + +/* ---- Public entries (link_layout.c) --------------------------------------- */ +void link_layout_sections(struct Linker*, LinkImage*, const GcLive*); +void link_layout_commons(struct Linker*, LinkImage*); +void link_emit_segment_bytes(struct Linker*, LinkImage*); + +/* ---- Public entries (link_reloc_layout.c) --------------------------------- */ +void link_assign_symbol_vaddrs(struct Linker*, LinkImage*); +void link_emit_array_boundaries(struct Linker*, LinkImage*); +void link_emit_tls_boundaries(struct Linker*, LinkImage*); +void link_emit_encoding_section_boundaries(struct Linker*, LinkImage*); +void link_layout_jit_stubs(struct Linker*, LinkImage*, u32 map_size, + LinkSymId** stub_map_out); +void link_layout_got(struct Linker*, LinkImage*, u32 map_size, + LinkSymId** got_map_out); +void link_layout_iplt(struct Linker*, LinkImage*); +void link_emit_relocations(struct Linker*, LinkImage*, const LinkSymId* got_map, + const LinkSymId* stub_map); +void link_resolve_entry(struct Linker*, LinkImage*); /* Defined in link.c. Walks the Linker's inputs and records each input's * ObjBuilder on the LinkImage so the JIT debug view can reach its diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -1,4 +1,6 @@ -/* link_resolve: builds a fresh LinkImage from the Linker's inputs. +/* link_layout.c — section bucketing, vaddr assignment, scripted layout, + * COMMON BSS allocation, segment-byte copying, and the top-level + * link_resolve orchestration function. * * Image-relative discipline: every vaddr / file_offset on the produced * image treats the image as based at 0. Consumers (link_emit_elf, @@ -32,26 +34,15 @@ static SrcLoc no_loc(void) { * back to 16 KiB otherwise — large enough for any current Linux/aarch64 * loader. A future cross-link with mismatched host/target page sizes * will need a target-derived value here instead. */ -static u64 layout_page_size(Linker* l) { +u64 link_layout_page_size(Linker* l) { const CfreeExecMem* m = (l && l->c && l->c->env) ? l->c->env->execmem : NULL; if (m && m->page_size) return (u64)m->page_size; return 0x4000u; } -/* Four-bucket segment partitioning by permission. TLS sections live - * in their own bucket: they're emitted as a PT_LOAD (so the kernel - * maps the .tdata template) and then referenced by a PT_TLS phdr; - * symbols in TLS sections need separate vaddr-to-offset arithmetic - * for TLSLE relocs. */ -typedef enum SegBucket { - SEG_RX = 0, /* SF_ALLOC | SF_EXEC */ - SEG_R = 1, /* SF_ALLOC, no EXEC, no WRITE */ - SEG_RW = 2, /* SF_ALLOC | SF_WRITE (incl. BSS) */ - SEG_TLS = 3, /* SF_ALLOC | SF_TLS (.tdata + .tbss) */ - SEG_NBUCKETS = 4, -} SegBucket; - -static int section_kept(const Section* s) { +/* Four-bucket segment partitioning: see SegBucket in link_internal.h. */ + +int link_section_kept(const Section* s) { /* This cut keeps allocatable progbits/nobits sections only. Debug, * symtab/strtab, group, and note sections are dropped — none of * them participate in a static ET_EXEC layout. */ @@ -61,7 +52,7 @@ static int section_kept(const Section* s) { return 0; } -static SegBucket bucket_for(u16 flags) { +SegBucket link_bucket_for(u16 flags) { if (flags & SF_TLS) return SEG_TLS; if (flags & SF_EXEC) return SEG_RX; if (flags & SF_WRITE) return SEG_RW; @@ -82,7 +73,7 @@ static LinkSymbol* append_symbol_slot(LinkImage* img) { return s; } -static LinkSymId append_symbol(LinkImage* img, const LinkSymbol* tmpl) { +LinkSymId link_append_symbol(LinkImage* img, const LinkSymbol* tmpl) { LinkSymbol* s = append_symbol_slot(img); LinkSymId id = s->id; *s = *tmpl; @@ -90,593 +81,12 @@ static LinkSymId append_symbol(LinkImage* img, const LinkSymbol* tmpl) { return id; } -static LinkRelocApply* append_reloc_slot(LinkImage* img) { +LinkRelocApply* link_append_reloc_slot(LinkImage* img) { LinkRelocApply* r = LinkRelocs_push(&img->relocs, NULL); if (!r) compiler_panic(img->c, no_loc(), "link: oom growing relocs"); return r; } -/* ---- per-input symbol/section maps ---- */ - -static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) { - Heap* h = img->heap; - m->nsym = nsym; - m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId)); - if (!m->sym) - compiler_panic(img->c, no_loc(), "link: oom on input symbol map"); - memset(m->sym, 0, sizeof(*m->sym) * nsym); - m->nsection = nsection; - m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection, - _Alignof(LinkSectionId)); - if (!m->section) - compiler_panic(img->c, no_loc(), "link: oom on input section map"); - memset(m->section, 0, sizeof(*m->section) * nsection); -} - -/* ---- pass 1: collect symbols and pick section layout ---- */ - -/* Defined-symbol replacement policy: a stronger binding wins; a - * stronger binding seen second replaces the existing record in place. - * Two SB_GLOBAL definitions of the same name are a hard error. */ -static int bind_strength(u8 bind) { - /* Higher == stronger. */ - switch (bind) { - case SB_GLOBAL: - return 3; - case SB_WEAK: - return 2; - case SB_LOCAL: - return 1; - default: - return 0; - } -} - -static void resolve_symbols(Linker* l, LinkImage* img) { - u32 ii; - /* Per-input pass: register every ObjSym (locals included), and - * insert defined globals/weaks into img->globals. Locals stay - * out of the hash. */ - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - LinkInput* in = LinkInputs_at(&l->inputs, ii); - ObjBuilder* ob = in->obj; - InputMap* m = &img->input_maps[ii]; - u32 nsym = obj_section_count(ob); - (void)nsym; - ObjSymIter* it; - ObjSymEntry e; - - /* DSO inputs do not contribute symbol definitions to the image — - * their exports satisfy undefs through resolve_undefs's - * DSO-search path, which marks the consuming LinkSymbols as - * imported. Skipping here keeps DSO names out of img->globals - * so a static-side defined symbol of the same name doesn't - * collide and a DSO export doesn't accidentally win. */ - if (in->kind == LINK_INPUT_DSO_BYTES) continue; - - /* obj.h: ObjSymId 0 is the "none" sentinel; the iterator skips - * it. We need an upper bound for the per-input symbol map, - * which is the builder's nsymbols (count incl. id-0 sentinel). - * Walk via the iterator to learn how many real entries there - * are; allocate the map to a safe upper bound by counting. */ - u32 nsyms_in_input = 0; - it = obj_symiter_new(ob); - while (obj_symiter_next(it, &e)) ++nsyms_in_input; - obj_symiter_free(it); - - map_alloc(img, m, nsyms_in_input + 1u /* +1 for id-0 slot */, - obj_section_count(ob)); - - it = obj_symiter_new(ob); - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - LinkSymbol rec; - LinkSymId existing; - /* Same prune as elf_emit / macho_emit: an extern declaration that - * the TU never relocated against is not a real linker input. The - * in-memory cc->link path skips the file emitter, so we apply the - * same filter here. The "logical undef" predicate (no section AND - * not SK_ABS/SK_COMMON) covers both `SK_UNDEF` (already-normalized - * by the readers) and the SK_FUNC/SK_OBJ-with-no-section shape the - * cgtarget mints for `extern` declarations. */ - { - int is_logical_undef = (s->section_id == OBJ_SEC_NONE) && - (s->kind != SK_ABS) && (s->kind != SK_COMMON); - if (is_logical_undef && !s->referenced && - (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { - continue; - } - } - /* "Defined" means: not SK_UNDEF AND has a backing storage — a - * containing section, an absolute value, or COMMON reservation. - * cgtarget paths emit SK_FUNC / SK_OBJ for an `extern` - * declaration's bookkeeping symbol with section_id = 0; those are - * still undefs from the linker's perspective. ELF's read_elf - * already normalizes those to SK_UNDEF; this check unifies the - * in-memory pipeline with that. SK_FILE (STT_FILE) is a defined - * local marker carrying a source filename at SHN_ABS — it has no - * section, but it is not undef. */ - int is_def = (s->kind != SK_UNDEF) && - (s->kind == SK_ABS || s->kind == SK_COMMON || - s->kind == SK_FILE || - s->section_id != OBJ_SEC_NONE); - - memset(&rec, 0, sizeof(rec)); - rec.name = s->name; - rec.input_id = in->id; - rec.obj_sym = e.id; - rec.section_id = LINK_SEC_NONE; /* filled later */ - rec.value = s->value; - rec.size = s->size; - rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u; - rec.bind = (u8)s->bind; - rec.kind = (u8)s->kind; - rec.defined = (u8)is_def; - rec.vaddr = 0; - - if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && - s->name != 0) { - /* Try to insert. On collision, apply replacement - * policy in-place against the existing LinkSymbol. */ - LinkSymId fresh = (LinkSymId)(LinkSyms_count(&img->syms) + 1u); - if (symhash_insert(&img->globals, s->name, fresh, &existing)) { - /* No collision — append a new slot. */ - m->sym[e.id] = append_symbol(img, &rec); - } else { - LinkSymbol* prev = LinkSyms_at(&img->syms, existing - 1); - int new_strength = bind_strength((u8)s->bind); - int old_strength = bind_strength(prev->bind); - /* COMMON symbols coalesce: largest size wins. */ - if (prev->kind == SK_COMMON && rec.kind == SK_COMMON) { - if (rec.size > prev->size) { - u32 new_align = (rec.common_align > prev->common_align) - ? rec.common_align - : prev->common_align; - rec.id = existing; - rec.common_align = new_align; - *prev = rec; - } - m->sym[e.id] = existing; - } else if (rec.kind == SK_COMMON) { - /* Strong def beats COMMON — keep existing. */ - m->sym[e.id] = existing; - } else if (prev->kind == SK_COMMON) { - /* Strong def beats COMMON — override. */ - rec.id = existing; - *prev = rec; - m->sym[e.id] = existing; - } else if (new_strength > old_strength) { - /* This def wins; keep the existing LinkSymId - * stable so prior references resolve, but - * update the contents. */ - rec.id = existing; - *prev = rec; - m->sym[e.id] = existing; - } else if (new_strength == old_strength && - new_strength == bind_strength(SB_GLOBAL)) { - /* Two strong defs — fatal. */ - size_t namelen; - const char* nm = pool_str(l->c->global, s->name, &namelen); - compiler_panic(l->c, no_loc(), - "link: duplicate definition of " - "global symbol '%.*s'", - (int)namelen, nm); - } else { - /* New def is weaker — keep existing, point - * the per-input map at the existing id so - * relocations from this input still resolve. */ - m->sym[e.id] = existing; - } - } - } else { - /* Locals + undefs each get their own LinkSymId. Globals - * with name == 0 (rare; unnamed temporaries promoted - * to global by some assemblers) also land here. */ - m->sym[e.id] = append_symbol(img, &rec); - } - } - obj_symiter_free(it); - } -} - -/* Search the DSO inputs for a defined exported symbol matching - * `name`. Returns the LinkInputId of the first DSO that exports - * `name` (with its name interned in the same global pool, so a Sym - * comparison is sufficient), or LINK_INPUT_NONE if no DSO matches. - * Walks DSOs in input order so a leftmost-wins rule applies — same - * behaviour as GNU ld for ambiguous DSO exports. */ -static LinkInputId find_dso_export(Linker* l, Sym name) { - u32 ii; - ObjSymIter* it; - ObjSymEntry e; - if (name == 0) return LINK_INPUT_NONE; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - LinkInput* in = LinkInputs_at(&l->inputs, ii); - if (in->kind != LINK_INPUT_DSO_BYTES) continue; - it = obj_symiter_new(in->obj); - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->name != name) continue; - if (s->kind == SK_UNDEF) continue; - if (s->bind == SB_LOCAL) continue; - obj_symiter_free(it); - return in->id; - } - obj_symiter_free(it); - } - return LINK_INPUT_NONE; -} - -static void resolve_undefs(Linker* l, LinkImage* img) { - u32 i; - /* For every symbol that's still SK_UNDEF and visible by name, look - * it up in the global hash. If still undef, try the resolver. If - * still undef, fatal. */ - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->defined) continue; - if (s->name != 0) { - LinkSymId hit = symhash_get(&img->globals, s->name); - if (hit != LINK_SYM_NONE && hit != s->id) { - LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); - if (def->defined) { - /* Re-point this undef at the global definition by - * copying the resolved fields. The id remains - * stable so per-input maps don't need to be - * rewritten — relocations just look up via this - * symbol's eventual vaddr. */ - s->section_id = def->section_id; - s->value = def->value; - s->vaddr = def->vaddr; - s->kind = def->kind; - s->bind = def->bind; - s->defined = 1; - continue; - } - } - } - /* Dynamic-link match: a DSO input exports this name. The symbol - * stays "structurally undefined" — the static linker never - * computes a vaddr for it — but we mark it imported so the panic - * path below leaves it alone, and so later phases (PLT/GOT slot - * synthesis, .rela.dyn emit) know to wire it through dynamic - * relocs. The DSO's input id ends up in DT_NEEDED via the - * input's `soname` field. The actual JUMP_SLOT / GLOB_DAT / - * needs_plt / needs_got decisions land in Phases 4–5 alongside - * the synthetic-section work. */ - if (s->name != 0) { - LinkInputId dso = find_dso_export(l, s->name); - if (dso != LINK_INPUT_NONE) { - s->imported = 1; - s->dso_input_id = dso; - continue; - } - } - if (l->resolver && s->name != 0) { - size_t namelen; - const char* nm = pool_str(l->c->global, s->name, &namelen); - /* The resolver expects a NUL-terminated C string; pool - * strings are NUL-terminated by pool_intern. */ - (void)namelen; - void* p = l->resolver(l->resolver_user, nm); - if (p) { - s->kind = SK_ABS; - s->vaddr = (u64)(uintptr_t)p; - s->defined = 1; - continue; - } - } - if (s->bind == SB_WEAK) { - /* Weak undef resolves to NULL — references that go through - * the GOT see a zero slot (case 16_weak_undef). Mark as - * SK_ABS with vaddr=0 so emit/JIT skip the relative-base - * adjustments. */ - s->kind = SK_ABS; - s->vaddr = 0; - s->defined = 1; - continue; - } - { - size_t namelen; - const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen) - : (namelen = 0, ""); - /* Strip the format's C-mangle byte so the diagnostic shows the - * source-level name (matches decl.c's emit policy). */ - obj_format_demangle_c(l->c, &nm, &namelen); - compiler_panic(l->c, no_loc(), "link: undefined reference to '%.*s'", - (int)namelen, nm); - } - } -} - -/* ---- pass 1b: --gc-sections liveness ---- - * - * Granularity is the input section: pairs (input_idx, ObjSecId). - * Roots: - * 1. The section defining the entry symbol. - * 2. Every SSEM_INIT_ARRAY / SSEM_FINI_ARRAY / SSEM_PREINIT_ARRAY - * (these are KEEP() in standard linker scripts). - * 3. SF_RETAIN sections (SHF_GNU_RETAIN, i.e. clang's - * __attribute__((retain)) / used). - * Edges: - * For each live section, every reloc whose target sym has a - * defining section pulls that section live. References whose - * target name is __start_<X> / __stop_<X> with valid C-identifier - * <X> additionally pull every section named <X> live. - * Iterates to a fixed point. When l->gc_sections is 0, every kept - * section is marked live unconditionally so downstream passes share - * a single "is this section live?" predicate. - * - * The mark table is consumed by layout_sections (skips dead sections), - * by gc_drop_dead_globals (clears `defined` on syms whose section was - * dropped), and indirectly by emit_reloc_records / layout_got (which - * filter through m->section[j] == LINK_SEC_NONE since dead sections - * never get a LinkSectionId). */ - -typedef struct GcLive { - u8** marks; /* marks[input_idx][obj_sec_id]; 0/1, sized to nsec_per_input[ii] - */ - u32* nsec; /* obj_section_count per input */ - u32 ninputs; -} GcLive; - -typedef struct GcQueue { - u64* items; /* (u64) packed: hi32 = input_idx, lo32 = obj_sec_id */ - u32 n; - u32 cap; -} GcQueue; - -#define GC_PACK(ii, j) (((u64)(u32)(ii) << 32) | (u32)(j)) -#define GC_II(p) ((u32)((p) >> 32)) -#define GC_J(p) ((ObjSecId)((p) & 0xffffffffu)) - -static void gc_queue_push(GcQueue* q, Heap* h, u32 ii, ObjSecId j) { - if (VEC_GROW(h, q->items, q->cap, q->n + 1u)) - return; /* skip; caller iterates to fixed point */ - q->items[q->n++] = GC_PACK(ii, j); -} - -static void gc_live_alloc(GcLive* g, Linker* l, Heap* h) { - u32 ii; - g->ninputs = LinkInputs_count(&l->inputs); - g->marks = - LinkInputs_count(&l->inputs) - ? (u8**)h->alloc(h, sizeof(*g->marks) * LinkInputs_count(&l->inputs), - _Alignof(u8*)) - : NULL; - g->nsec = - LinkInputs_count(&l->inputs) - ? (u32*)h->alloc(h, sizeof(*g->nsec) * LinkInputs_count(&l->inputs), - _Alignof(u32)) - : NULL; - if (LinkInputs_count(&l->inputs) && (!g->marks || !g->nsec)) - compiler_panic(l->c, no_loc(), "link: oom on gc live map"); - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - u32 nsec = obj_section_count(LinkInputs_at(&l->inputs, ii)->obj); - g->nsec[ii] = nsec; - g->marks[ii] = (u8*)h->alloc(h, nsec ? nsec : 1u, 1); - if (!g->marks[ii]) compiler_panic(l->c, no_loc(), "link: oom on gc marks"); - memset(g->marks[ii], 0, nsec); - } -} - -static void gc_live_free(GcLive* g, Heap* h) { - u32 ii; - if (g->marks) { - for (ii = 0; ii < g->ninputs; ++ii) - if (g->marks[ii]) - h->free(h, g->marks[ii], g->nsec[ii] ? g->nsec[ii] : 1u); - h->free(h, g->marks, sizeof(*g->marks) * g->ninputs); - } - if (g->nsec) h->free(h, g->nsec, sizeof(*g->nsec) * g->ninputs); -} - -static int gc_live_get(const GcLive* g, u32 ii, ObjSecId j) { - if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return 0; - return g->marks[ii][j]; -} - -static void gc_mark(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjSecId j) { - if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return; - if (g->marks[ii][j]) return; - g->marks[ii][j] = 1; - gc_queue_push(q, h, ii, j); -} - -/* From a LinkSymId, find the (input_idx, obj_sec_id) of its defining - * section. Returns 1 on hit. Recurses one level through img->globals - * for undef symbols whose name resolves to a global definition. */ -static int gc_def_site(LinkImage* img, Linker* l, LinkSymId id, u32* out_ii, - ObjSecId* out_sid) { - const LinkSymbol* s; - ObjBuilder* ob; - const ObjSym* osym; - if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return 0; - s = LinkSyms_at(&img->syms, id - 1); - if (!s->defined) { - LinkSymId hit; - if (s->name == 0) return 0; - hit = symhash_get(&img->globals, s->name); - if (hit == LINK_SYM_NONE || hit == s->id) return 0; - return gc_def_site(img, l, hit, out_ii, out_sid); - } - if (s->kind == SK_ABS || s->kind == SK_COMMON) return 0; - if (s->input_id == LINK_INPUT_NONE) return 0; /* synthesized */ - ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; - osym = obj_symbol_get(ob, s->obj_sym); - if (!osym || osym->section_id == OBJ_SEC_NONE) return 0; - *out_ii = (u32)(s->input_id - 1u); - *out_sid = osym->section_id; - return 1; -} - -/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. - * On hit, *out_off is the offset of <X> within `s`, *out_len its - * length, and *out_is_start is 1 for __start_ / 0 for __stop_. - * out_is_start may be NULL when the caller doesn't need to - * distinguish (e.g. GC, which retains for either prefix). */ -static int gc_split_start_stop(const char* s, size_t n, size_t* out_off, - size_t* out_len, int* out_is_start) { - static const char START[] = "__start_"; - static const char STOP[] = "__stop_"; - size_t off, len, i; - int is_start; - if (n > sizeof(START) - 1u && memcmp(s, START, sizeof(START) - 1u) == 0) { - off = sizeof(START) - 1u; - is_start = 1; - } else if (n > sizeof(STOP) - 1u && memcmp(s, STOP, sizeof(STOP) - 1u) == 0) { - off = sizeof(STOP) - 1u; - is_start = 0; - } else { - return 0; - } - len = n - off; - if (len == 0) return 0; - { - char c = s[off]; - if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) - return 0; - } - for (i = 1; i < len; ++i) { - char c = s[off + i]; - if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9'))) - return 0; - } - *out_off = off; - *out_len = len; - if (out_is_start) *out_is_start = is_start; - return 1; -} - -static void gc_promote_by_section_name(Linker* l, GcLive* g, GcQueue* q, - Heap* h, Sym section_name) { - u32 ii, j; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - u32 nsec = obj_section_count(ob); - for (j = 1; j < nsec; ++j) { - const Section* s = obj_section_get(ob, j); - if (!s || !section_kept(s)) continue; - if (s->name != section_name) continue; - gc_mark(g, q, h, ii, j); - } - } -} - -static void gc_compute(Linker* l, LinkImage* img, GcLive* g) { - u32 ii, j, k; - GcQueue q; - Heap* h = img->heap; - - /* GC disabled: every kept section becomes live. Downstream passes - * use the same is-live predicate, so this keeps logic uniform. */ - if (!l->gc_sections) { - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - u32 nsec = obj_section_count(ob); - for (j = 1; j < nsec; ++j) { - const Section* s = obj_section_get(ob, j); - if (s && section_kept(s)) g->marks[ii][j] = 1; - } - } - return; - } - - memset(&q, 0, sizeof(q)); - - /* Static roots: SF_RETAIN + init/fini/preinit_array. */ - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - u32 nsec = obj_section_count(ob); - for (j = 1; j < nsec; ++j) { - const Section* s = obj_section_get(ob, j); - int root; - if (!s || !section_kept(s)) continue; - root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY || - s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY; - if (root) gc_mark(g, &q, h, ii, j); - } - } - - /* Entry symbol's defining section. Linker default entry is "_start" - * (set in link_new); the JIT path overrides via link_set_entry. */ - if (l->entry_name != 0) { - LinkSymId id = symhash_get(&img->globals, l->entry_name); - u32 tii; - ObjSecId tsid; - if (gc_def_site(img, l, id, &tii, &tsid)) gc_mark(g, &q, h, tii, tsid); - } - - /* Worklist: pop a live section, mark every section reachable through - * its relocs. Each reloc's target may also be a __start_/__stop_ - * encoding-section reference, in which case sections of that name - * become live. */ - while (q.n > 0) { - u64 v = q.items[--q.n]; - u32 cii = GC_II(v); - ObjSecId cj = GC_J(v); - ObjBuilder* ob = LinkInputs_at(&l->inputs, cii)->obj; - InputMap* m = &img->input_maps[cii]; - u32 total = obj_reloc_total(ob); - (void)obj_section_count; - if (!total) continue; - for (k = 0; k < total; ++k) { - const Reloc* r = obj_reloc_at(ob, k); - LinkSymId target; - const LinkSymbol* tsym; - u32 tii; - ObjSecId tsid; - if (r->section_id != cj) continue; - if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; - target = m->sym[r->sym]; - if (target == LINK_SYM_NONE) continue; - tsym = LinkSyms_at(&img->syms, target - 1); - - if (tsym->name != 0) { - size_t namelen, off, ilen; - const char* nm = pool_str(l->c->global, tsym->name, &namelen); - if (gc_split_start_stop(nm, namelen, &off, &ilen, NULL)) { - Sym secname = pool_intern(l->c->global, nm + off, ilen); - gc_promote_by_section_name(l, g, &q, h, secname); - } - } - - if (gc_def_site(img, l, target, &tii, &tsid)) - gc_mark(g, &q, h, tii, tsid); - } - } - - if (q.items) h->free(h, q.items, sizeof(*q.items) * q.cap); -} - -/* After layout, clear `defined` on every LinkSymbol whose contributing - * input section was dropped. The global hash entry stays — lookups - * (cfree_jit_lookup, link_symbol_lookup) gate on `defined`. */ -static void gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) { - u32 i; - if (!l->gc_sections) return; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - ObjBuilder* ob; - const ObjSym* osym; - ObjSecId osid; - if (!s->defined) continue; - if (s->kind == SK_ABS || s->kind == SK_COMMON) continue; - if (s->input_id == LINK_INPUT_NONE) continue; - ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; - osym = obj_symbol_get(ob, s->obj_sym); - if (!osym) continue; - osid = osym->section_id; - if (osid == OBJ_SEC_NONE) continue; - if (gc_live_get(g, (u32)(s->input_id - 1u), osid)) continue; - /* Section was dropped — sym vanishes from the output. */ - s->defined = 0; - s->vaddr = 0; - s->section_id = LINK_SEC_NONE; - } -} - /* ---- pass 2: section assignment + segment layout ---- */ typedef struct SecRef { @@ -709,12 +119,12 @@ typedef struct PlaceEntry { u8 pad[3]; } PlaceEntry; -static void layout_sections_scripted(Linker* l, LinkImage* img, - const GcLive* g); +static void link_layout_sections_scripted(Linker* l, LinkImage* img, + const GcLive* g); -static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) { +void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) { if (l->script) { - layout_sections_scripted(l, img, g); + link_layout_sections_scripted(l, img, g); return; } Heap* h = img->heap; @@ -726,7 +136,7 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; for (j = 1; j < obj_section_count(ob); ++j) { const Section* s = obj_section_get(ob, j); - if (s && section_kept(s) && gc_live_get(g, ii, j)) ++total_kept; + if (s && link_section_kept(s) && link_gc_live_get(g, ii, j)) ++total_kept; } } @@ -750,11 +160,11 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; for (j = 1; j < obj_section_count(ob); ++j) { const Section* s = obj_section_get(ob, j); - if (!s || !section_kept(s) || !gc_live_get(g, ii, j)) continue; + if (!s || !link_section_kept(s) || !link_gc_live_get(g, ii, j)) continue; entries[e].input_idx = ii; entries[e].obj_sec_id = j; entries[e].name = s->name; - entries[e].bucket = bucket_for(s->flags); + entries[e].bucket = link_bucket_for(s->flags); entries[e].placed = 0; ++e; } @@ -870,7 +280,7 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) { if (!seg_count[b]) continue; nat_align = seg_align[b] ? seg_align[b] : 1u; align = (u64)nat_align; - if (align < layout_page_size(l)) align = layout_page_size(l); + if (align < link_layout_page_size(l)) align = link_layout_page_size(l); cursor = ALIGN_UP(cursor, (u64)(align)); seg = &img->segments[img->nsegments]; @@ -939,12 +349,11 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) { * placing matched input sections at the dot location counter. One * LinkSegment per non-DISCARD output section maps 1:1 to a PT_LOAD on * emit. Symbol assignments (top-level and in-section) materialize as - * defined LinkSymbol globals via upsert_global_sym (the same upsert - * pattern emit_boundary_sym uses). + * defined LinkSymbol globals via link_emit_boundary_sym. * * Discard handling: `/DISCARD/` matches input sections by glob and * leaves their per-input m->section[id] entry as LINK_SEC_NONE — the - * downstream emit_reloc_records / link_symbols_to_sections passes + * downstream emit_reloc_records / link_assign_symbol_vaddrs passes * already treat that as "section dropped" so they're naturally * excluded from segments, gc, and reloc apply. */ @@ -1039,28 +448,60 @@ static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot, } } -/* Format-aware C-symbol mangling for linker-synthesized boundaries. - * Defers to obj_format_c_mangle so the boundary symbols match the - * mangling decl.c emits for the corresponding `extern` references. */ +/* Format-aware C-symbol mangling for linker-synthesized boundaries. */ static Sym boundary_name(Linker* l, const char* name) { return obj_format_c_mangle(l->c, name); } -static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name, - u64 vaddr); /* defined below */ +/* Upsert a global symbol with the given absolute vaddr. Satisfies any + * prior undef ref in place; fans out to per-input duplicate name slots. */ +void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, + u64 vaddr) { + Sym sym = boundary_name(l, name); + LinkSymId id = symhash_get(&img->globals, sym); + LinkSymbol rec; + u32 i, n; + memset(&rec, 0, sizeof(rec)); + rec.name = sym; + rec.kind = SK_OBJ; + rec.defined = 1; + rec.vaddr = vaddr; + rec.bind = SB_GLOBAL; + if (id != LINK_SYM_NONE) { + *LinkSyms_at(&img->syms, id - 1) = rec; + LinkSyms_at(&img->syms, id - 1)->id = id; + } else { + LinkSymId fresh = link_append_symbol(img, &rec); + symhash_insert(&img->globals, sym, fresh, &id); + } + n = LinkSyms_count(&img->syms); + for (i = 0; i < n; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->name != sym) continue; + if (s->id == id) continue; + if (s->bind == SB_LOCAL) continue; + s->section_id = LINK_SEC_NONE; + s->value = 0; + s->vaddr = vaddr; + s->kind = SK_OBJ; + s->defined = 1; + s->imported = 0; + } +} + +/* link_define_boundary: public alias used by link_dyn.c. */ +void link_define_boundary(Linker* l, LinkImage* img, const char* name, + u64 vaddr) { + link_emit_boundary_sym(l, img, name, vaddr); +} -/* Upsert a global symbol with the given absolute vaddr. Mirrors the - * emit_boundary_sym pattern: satisfies any prior undef ref in place; - * fans out to per-input duplicate name slots. */ +/* Upsert a global symbol (mirror of emit_boundary_sym, used by apply_asn). */ static void upsert_global_sym(Linker* l, LinkImage* img, const char* name, u64 vaddr) { - emit_boundary_sym(l, img, name, vaddr); + link_emit_boundary_sym(l, img, name, vaddr); } -/* Apply one CfreeLinkAssignment. CFREE_LAS_DOT updates *dot; SYM / - * PROVIDE upserts a global. PROVIDE only fires when the name isn't - * already strongly defined; for v1 we accept it as equivalent to SYM - * (no input to kernel.lds defines these names). */ +/* Apply one CfreeLinkAssignment. */ static void apply_asn(Linker* l, LinkImage* img, u64* dot, const CfreeLinkAssignment* asn) { int err = 0; @@ -1082,38 +523,28 @@ static void apply_asn(Linker* l, LinkImage* img, u64* dot, } static int input_match_section(const CfreeLinkInputMatch* m, const char* nm) { - /* file_pattern is ignored for v1 — kernel.lds uses `*(...)` only. */ return match_glob(m->section_pattern, nm); } -static void layout_sections_scripted(Linker* l, LinkImage* img, - const GcLive* g) { +static void link_layout_sections_scripted(Linker* l, LinkImage* img, + const GcLive* g) { Heap* h = img->heap; const CfreeLinkScript* script = l->script; u64 dot = 0; - /* Scripted layout: vaddrs are absolute (driven by `dot`), but file - * offsets follow a separate cursor packed sequentially after the - * eventual ehdr+phdrs. The writer adds headers_load to file_offsets - * (only) post-layout. */ u64 file_cursor = 0; u32 ii, j, k, si; u32 total_kept = 0; img->scripted = 1; - /* Pass 0: count GC-live, kept, allocatable input sections — the - * upper bound on placeable LinkSections. The actual count placed - * may be lower (DISCARD sinks, unmatched). */ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; for (j = 1; j < obj_section_count(ob); ++j) { const Section* s = obj_section_get(ob, j); - if (s && section_kept(s) && gc_live_get(g, ii, j)) ++total_kept; + if (s && link_section_kept(s) && link_gc_live_get(g, ii, j)) ++total_kept; } } - /* Pre-allocate img->sections at the upper bound; img->nsections - * tracks the actual count placed. */ img->sections = total_kept ? (LinkSection*)h->alloc( h, sizeof(*img->sections) * total_kept, _Alignof(LinkSection)) @@ -1121,8 +552,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, if (total_kept && !img->sections) compiler_panic(img->c, no_loc(), "link: oom on sections"); - /* Per-section "claimed" bitmap to enforce single-claim across the - * whole script. Indexed by [input_idx][obj_sec_id]. */ u8** claimed = NULL; if (LinkInputs_count(&l->inputs)) { u32 ni = LinkInputs_count(&l->inputs); @@ -1138,17 +567,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, } } - /* Pass 1: top-level dot assignments establish the base address - * before any placement. SYM/PROVIDE assignments at the top level - * are deferred to pass 3 so they capture the post-placement dot - * (e.g. `_end = .` at the script's tail). */ for (k = 0; k < script->ntop_asns; ++k) { const CfreeLinkAssignment* a = &script->top_asns[k]; if (a->kind == CFREE_LAS_DOT) apply_asn(l, img, &dot, a); } - /* Pre-allocate img->segments at the upper bound (one per non-DISCARD - * output section). */ u32 nseg_max = 0; for (si = 0; si < script->nsections; ++si) if (strcmp(script->sections[si].name, "/DISCARD/") != 0) ++nseg_max; @@ -1174,16 +597,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, sizeof(*img->segment_bytes_cap) * nseg_max); } - /* Pass 2: walk output sections in declaration order. */ for (si = 0; si < script->nsections; ++si) { const CfreeLinkOutputSection* os = &script->sections[si]; int is_discard = (strcmp(os->name, "/DISCARD/") == 0); if (is_discard) { - /* Mark every matched input section as claimed. We don't add - * them to img->sections; their m->section[id] stays - * LINK_SEC_NONE so reloc-apply / link_symbols_to_sections - * naturally skip them. */ u32 mi; for (mi = 0; mi < os->ninputs; ++mi) { const CfreeLinkInputMatch* im = &os->inputs[mi]; @@ -1205,11 +623,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, continue; } - /* Non-DISCARD output section. Process all in-section asns first - * (header ALIGN encoded as the first dot-asn, plus any - * `__bss_start = .` style early captures), then walk inputs in - * declaration order, claiming matches across all inputs in input - * order. Each placed input section advances dot. */ u64 sec_start_dot; u32 perms = 0; LinkSegmentId seg_id = (LinkSegmentId)(img->nsegments + 1u); @@ -1220,13 +633,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, u32 nsec_in_seg = 0; u32 first_section_idx = img->nsections; - /* Apply in-section asns (pre-input). */ for (k = 0; k < os->nasns; ++k) { apply_asn(l, img, &dot, &os->asns[k]); } sec_start_dot = dot; - /* Walk input matches; for each, scan all inputs in input order. */ { u32 mi; for (mi = 0; mi < os->ninputs; ++mi) { @@ -1243,9 +654,9 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, LinkSection* ls; LinkSectionId lsid; if (claimed[ii][j]) continue; - if (!gc_live_get(g, ii, j)) continue; + if (!link_gc_live_get(g, ii, j)) continue; s = obj_section_get(ob, j); - if (!s || !section_kept(s)) continue; + if (!s || !link_section_kept(s)) continue; nm = pool_str(l->c->global, s->name, &nl); if (!nm) continue; if (!input_match_section(im, nm)) continue; @@ -1268,8 +679,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, ls->align = align; ls->name = s->name; ls->sem = s->sem; - /* file_offset within the segment buffer: distance from - * sec_start_dot. NOBITS contributes no file bytes. */ ls->file_offset = ofs - sec_start_dot; ls->input_offset = ls->file_offset; m->section[j] = lsid; @@ -1285,11 +694,7 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, } } - /* Materialize the segment for this output section. Empty output - * sections (no input matched) are dropped — they'd produce an - * empty PT_LOAD which the loader rejects. */ if (nsec_in_seg == 0) { - /* Roll back nsections (no entries appended in the empty case). */ continue; } @@ -1298,8 +703,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, seg->id = seg_id; seg->flags = SF_ALLOC | perms; seg->vaddr = sec_start_dot; - /* Page-align each segment's file offset so the writer can keep file - * offset and vaddr congruent mod page size for the runtime loader. */ file_cursor = ALIGN_UP(file_cursor, (u64)PAGE_SIZE); seg->file_offset = file_cursor; seg->mem_size = mem_size_accum; @@ -1316,10 +719,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum); } - /* Shift each section's vaddr/file_offset onto the segment's - * absolute base. Sections were laid out with vaddr = absolute - * dot already, so vaddr is correct as-is; file_offset needs - * to become absolute (segment-base + relative). */ { u32 fi; for (fi = first_section_idx; fi < img->nsections; ++fi) { @@ -1331,15 +730,12 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, img->nsegments++; } - /* Pass 3: top-level SYM / PROVIDE assignments capture the - * post-placement dot (e.g. `_end = .`). */ for (k = 0; k < script->ntop_asns; ++k) { const CfreeLinkAssignment* a = &script->top_asns[k]; if (a->kind == CFREE_LAS_SYM || a->kind == CFREE_LAS_PROVIDE) apply_asn(l, img, &dot, a); } - /* Free claim map. */ if (claimed) { u32 ni = LinkInputs_count(&l->inputs); for (ii = 0; ii < ni; ++ii) { @@ -1351,14 +747,12 @@ static void layout_sections_scripted(Linker* l, LinkImage* img, } /* ---- pass 2b: COMMON symbol BSS allocation ---- */ -/* After segments are laid out, extend the RW segment's BSS area to - * accommodate any SK_COMMON symbols that have no section yet. */ -static void layout_commons(Linker* l, LinkImage* img) { + +void link_layout_commons(Linker* l, LinkImage* img) { u32 i; (void)l; LinkSegment* rw_seg = NULL; - /* Find the RW segment. */ for (i = 0; i < img->nsegments; ++i) { if (img->segments[i].flags & SF_WRITE) { rw_seg = &img->segments[i]; @@ -1366,7 +760,6 @@ static void layout_commons(Linker* l, LinkImage* img) { } } - /* First pass: check if we even have COMMON symbols. */ { int has_common = 0; for (i = 0; i < LinkSyms_count(&img->syms); ++i) @@ -1378,7 +771,6 @@ static void layout_commons(Linker* l, LinkImage* img) { if (!has_common) return; } - /* If no RW segment exists, create one. */ if (!rw_seg) { u32 nseg = img->nsegments + 1u; LinkSegment* segs; @@ -1389,7 +781,7 @@ static void layout_commons(Linker* l, LinkImage* img) { u64 end = img->segments[i].vaddr + img->segments[i].mem_size; if (end > vaddr) vaddr = end; } - vaddr = ALIGN_UP(vaddr, (u64)(layout_page_size(l))); + vaddr = ALIGN_UP(vaddr, (u64)(link_layout_page_size(l))); segs = (LinkSegment*)img->heap->realloc( img->heap, img->segments, sizeof(*img->segments) * img->nsegments, sizeof(*img->segments) * nseg, _Alignof(LinkSegment)); @@ -1415,20 +807,12 @@ static void layout_commons(Linker* l, LinkImage* img) { rw_seg->file_offset = vaddr; rw_seg->file_size = 0; rw_seg->mem_size = 0; - rw_seg->align = (u32)layout_page_size(l); + rw_seg->align = (u32)link_layout_page_size(l); img->segment_bytes[img->nsegments] = NULL; img->segment_bytes_cap[img->nsegments] = 0; img->nsegments++; } - /* Synthesize a single NOBITS LinkSection that wraps every COMMON - * symbol. Without a backing section, COMMON symbols carry a vaddr - * but no section_id — Mach-O's shift_sections rebases by - * section_id and would leave their vaddrs at pre-shift coordinates, - * with the __got slot pointing into the wrong segment. Giving each - * common a section_id lets link_symbols_to_sections recompute vaddr - * from section.vaddr + value, and plan_layout / shift_sections - * picks it up like any other writable zerofill section. */ { Heap* h = img->heap; u64 bss_start = rw_seg->vaddr + rw_seg->mem_size; @@ -1437,7 +821,6 @@ static void layout_commons(Linker* l, LinkImage* img) { LinkSection* commsec; LinkSectionId comm_lsid; - /* First sweep computes layout + max alignment. */ for (i = 0; i < LinkSyms_count(&img->syms); ++i) { LinkSymbol* s = LinkSyms_at(&img->syms, i); u32 align; @@ -1449,7 +832,6 @@ static void layout_commons(Linker* l, LinkImage* img) { bss_cursor += s->size ? s->size : 1u; } - /* Append the synthetic NOBITS LinkSection. */ { u32 new_nsec = img->nsections + 1u; LinkSection* nsec = (LinkSection*)h->realloc( @@ -1476,13 +858,12 @@ static void layout_commons(Linker* l, LinkImage* img) { commsec->sem = SSEM_NOBITS; img->nsections++; - /* Second sweep wires each common to the synthetic section. */ for (i = 0; i < LinkSyms_count(&img->syms); ++i) { LinkSymbol* s = LinkSyms_at(&img->syms, i); if (s->kind != SK_COMMON || !s->defined) continue; s->section_id = comm_lsid; s->vaddr = bss_start + s->value; - s->kind = SK_OBJ; /* no longer COMMON once placed */ + s->kind = SK_OBJ; } rw_seg->mem_size = bss_cursor - rw_seg->vaddr; @@ -1490,14 +871,13 @@ static void layout_commons(Linker* l, LinkImage* img) { } } -/* Copy each input section's bytes into its segment buffer. BSS - * sections contribute no file bytes. */ -static void emit_segment_bytes(Linker* l, LinkImage* img) { +/* Copy each input section's bytes into its segment buffer. */ +void link_emit_segment_bytes(Linker* l, LinkImage* img) { u32 j; for (j = 0; j < img->nsections; ++j) { LinkSection* ls = &img->sections[j]; ObjBuilder* ob; - if (ls->input_id == LINK_INPUT_NONE) continue; /* synthetic (e.g. .bss.common) */ + if (ls->input_id == LINK_INPUT_NONE) continue; ob = LinkInputs_at(&l->inputs, ls->input_id - 1)->obj; const Section* s = obj_section_get(ob, ls->obj_section_id); LinkSegment* seg = &img->segments[ls->segment_id - 1]; @@ -1510,1662 +890,18 @@ static void emit_segment_bytes(Linker* l, LinkImage* img) { } } -/* ---- pass 3: assign symbol vaddrs (now that section vaddrs are final) ---- - * - * Map per-input ObjSecId -> LinkSectionId on every defined symbol, then - * compute its final image-relative vaddr. Run after resolve_symbols and - * layout_sections so both the per-input maps and section vaddrs exist. */ -static void link_symbols_to_sections(Linker* l, LinkImage* img) { - u32 ii; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - LinkInput* in = LinkInputs_at(&l->inputs, ii); - ObjBuilder* ob = in->obj; - InputMap* m = &img->input_maps[ii]; - ObjSymIter* it; - ObjSymEntry e; - /* DSO inputs were skipped in resolve_symbols — their per-input - * map is unallocated. They contribute no defined LinkSymbols - * either, so there's nothing to map to a section. */ - if (in->kind == LINK_INPUT_DSO_BYTES) continue; - it = obj_symiter_new(ob); - while (obj_symiter_next(it, &e)) { - LinkSymId lsid = m->sym[e.id]; - LinkSymbol* ls; - if (lsid == LINK_SYM_NONE) continue; - ls = LinkSyms_at(&img->syms, lsid - 1); - if (!ls->defined) continue; - if (ls->kind == SK_ABS && ls->vaddr != 0) continue; - if (e.sym->section_id == OBJ_SEC_NONE) continue; - /* Only update from this input if this is the input that - * contributed the winning definition. */ - if (ls->input_id != LinkInputs_at(&l->inputs, ii)->id) continue; - ls->section_id = m->section[e.sym->section_id]; - } - obj_symiter_free(it); - } - /* Now compute vaddrs. */ - { - u32 i; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->kind == SK_ABS && s->vaddr != 0) continue; - if (!s->defined) continue; - if (s->section_id == LINK_SEC_NONE) continue; - s->vaddr = img->sections[s->section_id - 1].vaddr + s->value; - } - } - /* Resolve undef-against-global once defs are addressed. */ - { - u32 i; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->defined) continue; - if (s->name == 0) continue; - { - LinkSymId hit = symhash_get(&img->globals, s->name); - if (hit != LINK_SYM_NONE && hit != s->id) { - LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); - if (def->defined) { - s->section_id = def->section_id; - s->value = def->value; - s->vaddr = def->vaddr; - s->kind = def->kind; - s->defined = 1; - } - } - } - } - } -} - -/* ---- pass 3b: linker-synthesized boundary symbols ---- */ - -void link_define_boundary(Linker* l, LinkImage* img, const char* name, - u64 vaddr) { - emit_boundary_sym(l, img, name, vaddr); -} - -static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name, - u64 vaddr) { - Sym sym = boundary_name(l, name); - LinkSymId id = symhash_get(&img->globals, sym); - LinkSymbol rec; - u32 i, n; - memset(&rec, 0, sizeof(rec)); - rec.name = sym; - rec.kind = SK_OBJ; - rec.defined = 1; - rec.vaddr = vaddr; - rec.bind = SB_GLOBAL; - if (id != LINK_SYM_NONE) { - /* Satisfy any existing undef reference. */ - *LinkSyms_at(&img->syms, id - 1) = rec; - LinkSyms_at(&img->syms, id - 1)->id = id; - } else { - LinkSymId fresh = append_symbol(img, &rec); - symhash_insert(&img->globals, sym, fresh, &id); - } - /* Per-input undef LinkSymbols are stored in their own slots - * (resolve_symbols never folds undefs into the def's slot). When - * an emit_boundary_sym call runs after resolve_undefs (e.g. - * layout_iplt's __start_iplt_pairs / __stop_iplt_pairs), each - * undef ref already carries a stale vaddr (zero, from a - * weak-zero resolve, or whatever the prior def held). Walk - * img->syms by name and re-copy so downstream consumers - * (layout_got's GOT-slot ABS64 fills, emit_reloc_records) see - * the new vaddr. Locals never share names with globals so the - * bind check just guards the unusual case of a local with the - * same name. */ - n = LinkSyms_count(&img->syms); - for (i = 0; i < n; ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->name != sym) continue; - if (s->id == id) continue; - if (s->bind == SB_LOCAL) continue; - s->section_id = LINK_SEC_NONE; - s->value = 0; - s->vaddr = vaddr; - s->kind = SK_OBJ; - s->defined = 1; - /* If resolve_undefs previously matched this name as an import - * from a DSO (e.g. libc.so exports _DYNAMIC for its own image), - * the linker-supplied definition wins — clear the import marker - * so apply_all_relocs treats it as a normal defined symbol. */ - s->imported = 0; - } -} - -static void emit_array_boundaries(Linker* l, LinkImage* img) { - u32 ii, j; - /* Per-semantic: track [min_vaddr, max_vaddr+size]. */ - u64 init_start = (u64)-1, init_end = 0; - u64 fini_start = (u64)-1, fini_end = 0; - u64 preinit_start = (u64)-1, preinit_end = 0; - - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - InputMap* m = &img->input_maps[ii]; - for (j = 1; j < obj_section_count(ob); ++j) { - const Section* s = obj_section_get(ob, j); - LinkSectionId ls_id; - const LinkSection* ls; - u64 start, end; - if (!s) continue; - if (s->sem != SSEM_INIT_ARRAY && s->sem != SSEM_FINI_ARRAY && - s->sem != SSEM_PREINIT_ARRAY) - continue; - ls_id = m->section[j]; - if (ls_id == LINK_SEC_NONE) continue; - ls = &img->sections[ls_id - 1]; - start = ls->vaddr; - end = ls->vaddr + ls->size; - if (s->sem == SSEM_INIT_ARRAY) { - if (start < init_start) init_start = start; - if (end > init_end) init_end = end; - } else if (s->sem == SSEM_FINI_ARRAY) { - if (start < fini_start) fini_start = start; - if (end > fini_end) fini_end = end; - } else { - if (start < preinit_start) preinit_start = start; - if (end > preinit_end) preinit_end = end; - } - } - } - - /* Synthetic init/fini/preinit sections (e.g. layout_iplt's - * .preinit_array entry pointing at __cfree_ifunc_init) carry - * input_id == LINK_INPUT_NONE and aren't visible through the - * input_maps loop above; fold them in here so the boundary - * symbols cover them too. */ - { - u32 i; - for (i = 0; i < img->nsections; ++i) { - const LinkSection* ls = &img->sections[i]; - u64 start, end; - if (ls->input_id != LINK_INPUT_NONE) continue; - if (ls->sem != SSEM_INIT_ARRAY && ls->sem != SSEM_FINI_ARRAY && - ls->sem != SSEM_PREINIT_ARRAY) - continue; - start = ls->vaddr; - end = ls->vaddr + ls->size; - if (ls->sem == SSEM_INIT_ARRAY) { - if (start < init_start) init_start = start; - if (end > init_end) init_end = end; - } else if (ls->sem == SSEM_FINI_ARRAY) { - if (start < fini_start) fini_start = start; - if (end > fini_end) fini_end = end; - } else { - if (start < preinit_start) preinit_start = start; - if (end > preinit_end) preinit_end = end; - } - } - } - - if (init_start == (u64)-1) { - init_start = 0; - init_end = 0; - } - if (fini_start == (u64)-1) { - fini_start = 0; - fini_end = 0; - } - if (preinit_start == (u64)-1) { - preinit_start = 0; - preinit_end = 0; - } - - emit_boundary_sym(l, img, "__init_array_start", init_start); - emit_boundary_sym(l, img, "__init_array_end", init_end); - emit_boundary_sym(l, img, "__fini_array_start", fini_start); - emit_boundary_sym(l, img, "__fini_array_end", fini_end); - emit_boundary_sym(l, img, "__preinit_array_start", preinit_start); - emit_boundary_sym(l, img, "__preinit_array_end", preinit_end); -} - -/* Synthesize TLS boundary symbols so the freestanding _start can size - * and seed the per-thread block: - * __tdata_start, __tdata_end : image vaddrs of the .tdata template - * (memcpy source for the new TLS block). - * __tbss_size : SK_ABS holding the .tbss byte count - * (memset target after the .tdata copy). - * All three are always emitted. When no TLS exists they resolve to - * zero, which makes the _start TLS prologue a no-op. The .tdata - * extent is the file portion of the TLS segment; the .tbss extent is - * the trailing memsz - filesz tail. */ -static void emit_tls_boundaries(Linker* l, LinkImage* img) { - u64 tdata_start = img->tls_vaddr; - u64 tdata_end = img->tls_vaddr + img->tls_filesz; - u64 tbss_size = img->tls_memsz - img->tls_filesz; - Sym sym_size = pool_intern_cstr(l->c->global, "__tbss_size"); - LinkSymId id; - LinkSymbol rec; - - emit_boundary_sym(l, img, "__tdata_start", tdata_start); - emit_boundary_sym(l, img, "__tdata_end", tdata_end); - - /* __tbss_size is an absolute count, not an address: SK_ABS so - * shift_image_addresses leaves it alone and the symbol's value - * IS the size when read as `(size_t)__tbss_size`. */ - id = symhash_get(&img->globals, sym_size); - memset(&rec, 0, sizeof(rec)); - rec.name = sym_size; - rec.kind = SK_ABS; - rec.bind = SB_GLOBAL; - rec.defined = 1; - rec.vaddr = tbss_size; - if (id != LINK_SYM_NONE) { - *LinkSyms_at(&img->syms, id - 1) = rec; - LinkSyms_at(&img->syms, id - 1)->id = id; - } else { - LinkSymId fresh = append_symbol(img, &rec); - symhash_insert(&img->globals, sym_size, fresh, &id); - } -} - -/* ---- pass 3c: __start_<X>/__stop_<X> encoding-section boundaries ---- - * - * For every undef LinkSymbol whose name is __start_<X>/__stop_<X> with - * <X> a valid C identifier, find the span of every output LinkSection - * sourced from an input section named <X>, and resolve the symbol to - * its low (start) or high (stop) vaddr. Sections that were dropped by - * GC don't contribute (m->section[j] == LINK_SEC_NONE). */ -static void emit_encoding_section_boundaries(Linker* l, LinkImage* img) { - u32 i, ii, j; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* sym = LinkSyms_at(&img->syms, i); - const char* nm; - size_t namelen, off, ilen; - int is_start; - Sym secname; - u64 lo = (u64)-1; - u64 hi = 0; - int found = 0; - if (sym->defined) continue; - if (sym->name == 0) continue; - nm = pool_str(l->c->global, sym->name, &namelen); - if (!gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) continue; - secname = pool_intern(l->c->global, nm + off, ilen); - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - InputMap* m = &img->input_maps[ii]; - for (j = 1; j < obj_section_count(ob); ++j) { - const Section* s = obj_section_get(ob, j); - LinkSectionId ls_id; - const LinkSection* ls; - u64 start, end; - if (!s || s->name != secname) continue; - ls_id = m->section[j]; - if (ls_id == LINK_SEC_NONE) continue; - ls = &img->sections[ls_id - 1]; - start = ls->vaddr; - end = ls->vaddr + ls->size; - if (start < lo) lo = start; - if (end > hi) hi = end; - found = 1; - } - } - if (!found) continue; - sym->kind = SK_OBJ; - sym->bind = SB_GLOBAL; - sym->defined = 1; - sym->vaddr = is_start ? lo : hi; - } -} - -/* ---- pass 4: relocation records ---- */ - -static u8 reloc_width(RelocKind k) { - switch (k) { - case R_ABS32: - case R_REL32: - case R_PC32: - case R_GOT32: - case R_PLT32: - case R_X64_PLT32: - case R_X64_32S: - case R_X64_TPOFF32: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - case R_X64_GOTPC32: - return 4; - case R_ABS64: - case R_REL64: - case R_PC64: - case R_X64_TPOFF64: - return 8; - case R_AARCH64_ABS16: - case R_AARCH64_PREL16: - return 2; - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: - case R_AARCH64_CONDBR19: - case R_AARCH64_TSTBR14: - case R_AARCH64_LD_PREL_LO19: - case R_AARCH64_ADR_PREL_LO21: - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: - case R_AARCH64_ADD_ABS_LO12_NC: - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - case R_AARCH64_ADR_GOT_PAGE: - case R_AARCH64_LD64_GOT_LO12_NC: - case R_AARCH64_TLSLE_ADD_TPREL_HI12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - case R_AARCH64_TLVP_LOAD_PAGE21: - case R_AARCH64_TLVP_LOAD_PAGEOFF12: - return 4; - case R_RV_HI20: - case R_RV_LO12_I: - case R_RV_LO12_S: - case R_RV_BRANCH: - case R_RV_JAL: - case R_RV_PCREL_HI20: - case R_RV_PCREL_LO12_I: - case R_RV_PCREL_LO12_S: - case R_RV_GOT_HI20: - case R_RV_TPREL_HI20: - case R_RV_TPREL_LO12_I: - case R_RV_TPREL_LO12_S: - return 4; - case R_RV_CALL: - return 8; - case R_RV_RVC_BRANCH: - case R_RV_RVC_JUMP: - return 2; - /* Marker relocs that don't alter site bytes; width nonzero so the - * apply path treats them as recognized. */ - case R_RV_RELAX: - case R_RV_TPREL_ADD: - return 4; - /* RISC-V ADD/SUB/SET fixup relocs — modify the section bytes in - * place. Width is the byte count touched. SUB6/SET6 modify one - * byte (the low 6 bits) like SET8/SUB8. */ - case R_RV_ADD8: - case R_RV_SUB8: - case R_RV_SUB6: - case R_RV_SET6: - case R_RV_SET8: - return 1; - case R_RV_ADD16: - case R_RV_SUB16: - case R_RV_SET16: - return 2; - case R_RV_ADD32: - case R_RV_SUB32: - case R_RV_SET32: - return 4; - case R_RV_ADD64: - case R_RV_SUB64: - return 8; - default: - return 0; - } -} - -static int reloc_uses_got(u16 kind) { - switch (kind) { - case R_AARCH64_ADR_GOT_PAGE: - case R_AARCH64_LD64_GOT_LO12_NC: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - case R_RV_GOT_HI20: - return 1; - default: - return 0; - } -} - -/* Forward decls — defined alongside layout_iplt below. */ -static u32 layout_iplt_alloc_segments(LinkImage* img, u32 nseg); -static u32 layout_iplt_alloc_sections(LinkImage* img, u32 nsec); - -/* ---- pass: JIT call stubs ---- - * - * For the JIT path on AArch64, route every CALL26/JUMP26 against a - * resolver-supplied or weak-undef symbol (SK_ABS) through a 12-byte - * stub colocated with .text inside the JIT mapping. The stub is - * ADRP x16, slot ; LDR x16,[x16,#:lo12:slot] ; BR x16 - * and the slot is an 8-byte GOT entry filled by a per-slot R_ABS64 - * reloc against a synthetic resolver-pointer LinkSymbol (whose vaddr - * is the original SK_ABS target's vaddr — a host pointer for - * resolver-supplied symbols, 0 for weak-undef). - * - * Rationale: without this routing, CALL26 to a resolver-supplied host - * function (e.g. libc `printf` from `cfree run`) trips link_reloc's - * ±128 MiB range check, since the JIT mapping is arbitrarily far from - * the host VA the resolver returned. - * - * The stub_map output is a sparse array indexed by LinkSymId - * (size = LinkSyms_count(&img->syms)+1 at pass entry; the new stub / - * slot / resolver_rec LinkSymbols are never themselves looked up - * through this map). emit_reloc_records consults it to redirect - * CALL26/JUMP26 targets. - * - * Runs after resolve_undefs (SK_ABS is set) and before - * emit_reloc_records (so the redirect takes effect). Only runs on - * AArch64 JIT (`!emit_static_exe`); the exe path covers the same - * shape via PLT (ELF) / stubs (Mach-O). - * - * Address-taking via GOT_LOAD still resolves to the original - * resolver-supplied vaddr (the GOT slot's R_ABS64 against the - * non-redirected symbol). Address-taking via direct PCREL would land - * on the stub instead, but clang does not emit non-GOT-routed - * pointer-to-extern on AArch64. */ -static void layout_jit_call_stubs(Linker* l, LinkImage* img, u32 map_size, - LinkSymId** stub_map_out) { - Heap* h = img->heap; - const LinkArchDesc* arch; - LinkSymId* stub_map; - LinkSymId* targets = NULL; - u32 ntarget = 0, tcap = 0; - u32 ii, k, i; - u64 page; - u64 base_vaddr = 0; - u64 stubs_vaddr, slots_vaddr; - u64 stubs_size, slots_size; - u32 stubs_seg_idx, slots_seg_idx; - u32 seg_base, sec_base; - LinkSegment* stubs_seg; - LinkSegment* slots_seg; - LinkSection* stubs_sec; - LinkSection* slots_sec; - u8* stubs_bytes; - - *stub_map_out = NULL; - if (l->emit_static_exe) return; - if (l->c->target.arch != CFREE_ARCH_ARM_64) return; - - arch = link_arch_desc_for(l->c); - if (!arch) return; - - stub_map = (LinkSymId*)h->alloc(h, sizeof(*stub_map) * map_size, - _Alignof(LinkSymId)); - if (!stub_map) compiler_panic(img->c, no_loc(), "link: oom on stub map"); - memset(stub_map, 0, sizeof(*stub_map) * map_size); - - /* Pass A: collect unique SK_ABS targets of CALL26/JUMP26. */ - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - InputMap* m = &img->input_maps[ii]; - u32 total = obj_reloc_total(ob); - if (!total) continue; - for (k = 0; k < total; ++k) { - const Reloc* r = obj_reloc_at(ob, k); - const Section* s = obj_section_get(ob, r->section_id); - LinkSymId target; - const LinkSymbol* tgt; - if (!s || !section_kept(s)) continue; - if (m->section[r->section_id] == LINK_SEC_NONE) continue; - if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue; - if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; - target = m->sym[r->sym]; - if (target == LINK_SYM_NONE) continue; - tgt = LinkSyms_at(&img->syms, target - 1); - if (!tgt || tgt->kind != SK_ABS) continue; - if (stub_map[target] != LINK_SYM_NONE) continue; - if (VEC_GROW(h, targets, tcap, ntarget + 1u)) - compiler_panic(img->c, no_loc(), "link: oom on stub target list"); - targets[ntarget] = target; - /* Sentinel marker; replaced with the stub's LinkSymId in pass C. */ - stub_map[target] = (LinkSymId)(ntarget + 1u); - ntarget++; - } - } - - if (ntarget == 0) { - if (targets) h->free(h, targets, sizeof(*targets) * tcap); - h->free(h, stub_map, sizeof(*stub_map) * map_size); - return; - } - /* Reset sentinels — pass C writes real stub LinkSymIds. */ - for (i = 0; i < ntarget; ++i) stub_map[targets[i]] = LINK_SYM_NONE; - - /* Pass B: allocate RX stubs segment + RW slots segment. Both land - * page-aligned after the current image tail; layout_iplt may run - * before us (IFUNC), and layout_got after — none of those passes - * shift segments allocated here. */ - page = layout_page_size(l); - for (i = 0; i < img->nsegments; ++i) { - u64 end = img->segments[i].vaddr + img->segments[i].mem_size; - if (end > base_vaddr) base_vaddr = end; - } - base_vaddr = ALIGN_UP(base_vaddr, (u64)page); - stubs_vaddr = base_vaddr; - stubs_size = (u64)ntarget * (u64)arch->iplt_stub_size; - slots_vaddr = ALIGN_UP(stubs_vaddr + stubs_size, (u64)page); - slots_size = (u64)ntarget * 8u; - - seg_base = layout_iplt_alloc_segments(img, 2u); - stubs_seg_idx = seg_base + 0u; - slots_seg_idx = seg_base + 1u; - - stubs_seg = &img->segments[stubs_seg_idx]; - memset(stubs_seg, 0, sizeof(*stubs_seg)); - stubs_seg->id = (LinkSegmentId)(stubs_seg_idx + 1u); - stubs_seg->flags = SF_ALLOC | SF_EXEC; - stubs_seg->file_offset = stubs_vaddr; - stubs_seg->vaddr = stubs_vaddr; - stubs_seg->file_size = stubs_size; - stubs_seg->mem_size = stubs_size; - stubs_seg->align = (u32)page; - stubs_seg->nsections = 1; - img->segment_bytes[stubs_seg_idx] = (u8*)h->alloc(h, (size_t)stubs_size, 16); - img->segment_bytes_cap[stubs_seg_idx] = (size_t)stubs_size; - if (!img->segment_bytes[stubs_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on jit stubs bytes"); - memset(img->segment_bytes[stubs_seg_idx], 0, (size_t)stubs_size); - - slots_seg = &img->segments[slots_seg_idx]; - memset(slots_seg, 0, sizeof(*slots_seg)); - slots_seg->id = (LinkSegmentId)(slots_seg_idx + 1u); - slots_seg->flags = SF_ALLOC | SF_WRITE; - slots_seg->file_offset = slots_vaddr; - slots_seg->vaddr = slots_vaddr; - slots_seg->file_size = slots_size; - slots_seg->mem_size = slots_size; - slots_seg->align = (u32)page; - slots_seg->nsections = 1; - img->segment_bytes[slots_seg_idx] = (u8*)h->alloc(h, (size_t)slots_size, 16); - img->segment_bytes_cap[slots_seg_idx] = (size_t)slots_size; - if (!img->segment_bytes[slots_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on jit stub slots bytes"); - memset(img->segment_bytes[slots_seg_idx], 0, (size_t)slots_size); - img->nsegments += 2u; - - sec_base = layout_iplt_alloc_sections(img, 2u); - stubs_sec = &img->sections[sec_base + 0u]; - memset(stubs_sec, 0, sizeof(*stubs_sec)); - stubs_sec->id = (LinkSectionId)(sec_base + 0u + 1u); - stubs_sec->input_id = LINK_INPUT_NONE; - stubs_sec->obj_section_id = OBJ_SEC_NONE; - stubs_sec->segment_id = stubs_seg->id; - stubs_sec->input_offset = 0; - stubs_sec->file_offset = stubs_vaddr; - stubs_sec->vaddr = stubs_vaddr; - stubs_sec->size = stubs_size; - stubs_sec->flags = SF_ALLOC | SF_EXEC; - stubs_sec->align = 4; - stubs_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_stubs"); - stubs_sec->sem = SSEM_PROGBITS; - - slots_sec = &img->sections[sec_base + 1u]; - memset(slots_sec, 0, sizeof(*slots_sec)); - slots_sec->id = (LinkSectionId)(sec_base + 1u + 1u); - slots_sec->input_id = LINK_INPUT_NONE; - slots_sec->obj_section_id = OBJ_SEC_NONE; - slots_sec->segment_id = slots_seg->id; - slots_sec->input_offset = 0; - slots_sec->file_offset = slots_vaddr; - slots_sec->vaddr = slots_vaddr; - slots_sec->size = slots_size; - slots_sec->flags = SF_ALLOC | SF_WRITE; - slots_sec->align = 8; - slots_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_slots"); - slots_sec->sem = SSEM_PROGBITS; - img->nsections += 2u; - - /* Pass C: per target, emit stub bytes, synthesize slot + resolver - * LinkSymbols, and queue the 3 relocs that wire them together. */ - stubs_bytes = img->segment_bytes[stubs_seg_idx]; - for (i = 0; i < ntarget; ++i) { - LinkSymId orig = targets[i]; - LinkSymbol* orig_sym = LinkSyms_at(&img->syms, orig - 1); - u64 stub_vaddr = stubs_vaddr + (u64)i * (u64)arch->iplt_stub_size; - u64 slot_vaddr = slots_vaddr + (u64)i * 8u; - LinkSymbol slot_rec, resolver_rec, stub_rec; - LinkSymId slot_id, resolver_id, stub_id; - LinkArchIPltReloc stub_relocs[2]; - u32 nstub_relocs; - LinkRelocApply rrec; - u8* stub_dst = stubs_bytes + (size_t)i * (size_t)arch->iplt_stub_size; - u32 ri; - - nstub_relocs = - arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, stub_relocs); - - memset(&slot_rec, 0, sizeof(slot_rec)); - slot_rec.kind = SK_OBJ; - slot_rec.bind = SB_LOCAL; - slot_rec.defined = 1; - slot_rec.section_id = slots_sec->id; - slot_rec.vaddr = slot_vaddr; - slot_rec.size = 8; - slot_id = append_symbol(img, &slot_rec); - - /* Preserve the original SK_ABS vaddr (host pointer / NULL) for the - * slot's R_ABS64. Redirecting the original LinkSymbol would - * change semantics for non-call references (e.g. data loads). */ - memset(&resolver_rec, 0, sizeof(resolver_rec)); - resolver_rec.kind = SK_ABS; - resolver_rec.bind = SB_LOCAL; - resolver_rec.defined = 1; - resolver_rec.vaddr = orig_sym->vaddr; - resolver_id = append_symbol(img, &resolver_rec); - - memset(&stub_rec, 0, sizeof(stub_rec)); - stub_rec.kind = SK_FUNC; - stub_rec.bind = SB_LOCAL; - stub_rec.defined = 1; - stub_rec.section_id = stubs_sec->id; - stub_rec.vaddr = stub_vaddr; - stub_rec.size = arch->iplt_stub_size; - stub_id = append_symbol(img, &stub_rec); - stub_map[orig] = stub_id; - - /* Stub→slot relocs (ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC). */ - for (ri = 0; ri < nstub_relocs; ++ri) { - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = stubs_sec->id; - rrec.offset = (u32)(i * arch->iplt_stub_size) + - stub_relocs[ri].offset_in_stub; - rrec.width = stub_relocs[ri].width; - rrec.write_vaddr = stub_vaddr + stub_relocs[ri].offset_in_stub; - rrec.write_file_offset = rrec.write_vaddr; - rrec.kind = stub_relocs[ri].kind; - rrec.target = slot_id; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - } - - /* Slot R_ABS64 against resolver_rec (preserves original vaddr). */ - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = slots_sec->id; - rrec.offset = (u32)(i * 8u); - rrec.width = 8; - rrec.write_vaddr = slot_vaddr; - rrec.write_file_offset = slot_vaddr; - rrec.kind = R_ABS64; - rrec.target = resolver_id; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - } - - if (targets) h->free(h, targets, sizeof(*targets) * tcap); - *stub_map_out = stub_map; -} - -static void emit_reloc_records(Linker* l, LinkImage* img, - const LinkSymId* got_map, - const LinkSymId* stub_map) { - u32 ii; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - InputMap* m = &img->input_maps[ii]; - u32 total = obj_reloc_total(ob); - u32 k; - if (total == 0) continue; - for (k = 0; k < total; ++k) { - const Reloc* r = obj_reloc_at(ob, k); - const Section* s = obj_section_get(ob, r->section_id); - LinkSymId target; - LinkSection* ls; - LinkRelocApply rec; - if (!s || !section_kept(s)) continue; - /* Skip relocs whose containing section was GC'd. */ - if (m->section[r->section_id] == LINK_SEC_NONE) continue; - /* RISC-V marker relocs (RELAX, TPREL_ADD, ALIGN) reference no - * symbol — they annotate the prior reloc for relaxation, TLS - * thread-pointer ADD folding, or alignment-aware code shrinking. - * We don't relax, so drop them entirely. */ - if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD || - r->kind == R_RV_ALIGN) - continue; - if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) - compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol"); - target = m->sym[r->sym]; - if (target == LINK_SYM_NONE) - compiler_panic(l->c, no_loc(), - "link: reloc references unmapped symbol"); - /* GOT-based relocs target the synthetic .got slot, not the - * symbol itself. The slot is filled by a per-slot R_ABS64 - * reloc emitted by layout_got. */ - if (got_map && reloc_uses_got(r->kind)) { - LinkSymId slot = got_map[target]; - if (slot == LINK_SYM_NONE) - compiler_panic(l->c, no_loc(), "link: GOT slot missing for symbol"); - target = slot; - } - /* JIT path: CALL26/JUMP26 against a resolver-supplied (or - * weak-undef) SK_ABS target is routed through a per-target stub - * synthesized by layout_jit_call_stubs. The stub is colocated - * with .text inside the JIT mapping so the branch displacement - * fits ±128 MiB even when the real target is a host pointer - * arbitrarily far away. stub_map is sparse — only entries for - * targets a CALL26/JUMP26 was actually emitted against are - * populated. */ - if (stub_map && (r->kind == R_AARCH64_CALL26 || - r->kind == R_AARCH64_JUMP26)) { - LinkSymId stub = stub_map[target]; - if (stub != LINK_SYM_NONE) target = stub; - } - ls = &img->sections[m->section[r->section_id] - 1]; - memset(&rec, 0, sizeof(rec)); - rec.input_id = LinkInputs_at(&l->inputs, ii)->id; - rec.section_id = r->section_id; - rec.link_section_id = ls->id; - rec.offset = r->offset; - rec.width = reloc_width((RelocKind)r->kind); - rec.write_vaddr = ls->vaddr + r->offset; - rec.write_file_offset = ls->file_offset + r->offset; - rec.kind = (RelocKind)r->kind; - rec.target = target; - rec.addend = r->addend; - if (rec.width == 0) - compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u", - (unsigned)r->kind); - *append_reloc_slot(img) = rec; - } - } -} - -/* ---- pass 3c: GOT layout ---- - * - * Static-PIC GOT for cases where clang emits R_AARCH64_ADR_GOT_PAGE + - * R_AARCH64_LD64_GOT_LO12_NC (typical for weak-extern references). We - * append a fresh RW segment carrying one 8-byte slot per unique target - * symbol, synthesize a LinkSymbol per slot (so emit_reloc_records can - * redirect the GOT-page/LO12 reloc to the slot), and emit a per-slot - * R_ABS64 reloc that fills the slot with the symbol's resolved runtime - * vaddr at apply time. Weak-undef targets stay at vaddr 0 so the slot - * carries NULL. - * - * The returned `got_map_out` is a sparse array of size - * (LinkSyms_count(&img->syms)+1) indexed by LinkSymId, holding the slot's - * synthetic LinkSymId (or LINK_SYM_NONE for symbols that don't need a slot). - * Caller frees. */ -static void layout_got(Linker* l, LinkImage* img, u32 map_size, - LinkSymId** got_map_out) { - Heap* h = img->heap; - LinkSymId* got_map; - LinkSymId* slot_targets = NULL; - u32 slot_cap = 0; - u32 nslot = 0; - u32 ii, j, k; - u64 page; - u64 base_vaddr = 0; - u64 got_size; - LinkSegment* gotseg; - LinkSection* gotsec; - u32 gotseg_idx; - u32 si; - - *got_map_out = NULL; - - /* map_size is the caller's pre-pass symbol count (+ 1 for the 1-based - * LinkSymId space). Synthetic syms appended below are never indexed - * through got_map, so the map is correctly sized despite further - * growth of img->syms. */ - got_map = (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size, - _Alignof(LinkSymId)); - if (!got_map) compiler_panic(img->c, no_loc(), "link: oom on got map"); - memset(got_map, 0, sizeof(*got_map) * map_size); - - /* Pass A: scan input relocs for GOT-using kinds. */ - - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - InputMap* m = &img->input_maps[ii]; - u32 total = obj_reloc_total(ob); - if (!total) continue; - for (k = 0; k < total; ++k) { - const Reloc* r = obj_reloc_at(ob, k); - const Section* s = obj_section_get(ob, r->section_id); - LinkSymId target; - if (!s || !section_kept(s)) continue; - if (m->section[r->section_id] == LINK_SEC_NONE) continue; - if (!reloc_uses_got(r->kind)) continue; - if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; - target = m->sym[r->sym]; - if (target == LINK_SYM_NONE) continue; - if (got_map[target] != LINK_SYM_NONE) continue; - if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u)) - compiler_panic(img->c, no_loc(), "link: oom on got slot list"); - slot_targets[nslot] = target; - /* Mark sentinel; replaced with real slot LinkSymId below. */ - got_map[target] = (LinkSymId)(nslot + 1u); - nslot++; - } - } - - if (nslot == 0) { - if (slot_targets) - h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); - h->free(h, got_map, sizeof(*got_map) * map_size); - return; - } - - /* Reset got_map markers — we'll fill in real slot ids in pass C. */ - for (si = 0; si < nslot; ++si) got_map[slot_targets[si]] = LINK_SYM_NONE; - - /* Pass B: append a new RW segment for .got, page-aligned after the - * existing segment span. */ - page = layout_page_size(l); - for (j = 0; j < img->nsegments; ++j) { - u64 end = img->segments[j].vaddr + img->segments[j].mem_size; - if (end > base_vaddr) base_vaddr = end; - } - base_vaddr = ALIGN_UP(base_vaddr, (u64)(page)); - got_size = (u64)nslot * 8u; - - { - u32 new_nseg = img->nsegments + 1u; - LinkSegment* nsegs = (LinkSegment*)h->realloc( - h, img->segments, sizeof(*img->segments) * img->nsegments, - sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); - u8** nsbufs = (u8**)h->realloc( - h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, - sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); - size_t* nscaps = (size_t*)h->realloc( - h, img->segment_bytes_cap, - sizeof(*img->segment_bytes_cap) * img->nsegments, - sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); - if (!nsegs || !nsbufs || !nscaps) - compiler_panic(img->c, no_loc(), "link: oom on got segment"); - img->segments = nsegs; - img->segment_bytes = nsbufs; - img->segment_bytes_cap = nscaps; - } - - gotseg_idx = img->nsegments; - gotseg = &img->segments[gotseg_idx]; - memset(gotseg, 0, sizeof(*gotseg)); - gotseg->id = (LinkSegmentId)(gotseg_idx + 1u); - gotseg->flags = SF_ALLOC | SF_WRITE; - gotseg->file_offset = base_vaddr; - gotseg->vaddr = base_vaddr; - gotseg->file_size = got_size; - gotseg->mem_size = got_size; - gotseg->align = (u32)page; - gotseg->nsections = 1; - - img->segment_bytes[gotseg_idx] = (u8*)h->alloc(h, (size_t)got_size, 16); - img->segment_bytes_cap[gotseg_idx] = (size_t)got_size; - if (!img->segment_bytes[gotseg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on got bytes"); - memset(img->segment_bytes[gotseg_idx], 0, (size_t)got_size); - img->nsegments++; - - /* Pass C: append the synthetic .got LinkSection. */ - { - u32 new_nsec = img->nsections + 1u; - LinkSection* nsections = (LinkSection*)h->realloc( - h, img->sections, sizeof(*img->sections) * img->nsections, - sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); - if (!nsections) - compiler_panic(img->c, no_loc(), "link: oom on got section"); - img->sections = nsections; - } - gotsec = &img->sections[img->nsections]; - memset(gotsec, 0, sizeof(*gotsec)); - gotsec->id = (LinkSectionId)(img->nsections + 1u); - gotsec->input_id = LINK_INPUT_NONE; - gotsec->obj_section_id = OBJ_SEC_NONE; - gotsec->segment_id = gotseg->id; - gotsec->input_offset = 0; - gotsec->file_offset = base_vaddr; - gotsec->vaddr = base_vaddr; - gotsec->size = got_size; - gotsec->flags = SF_ALLOC | SF_WRITE; - gotsec->align = 8; - gotsec->name = pool_intern_cstr(img->c->global, ".got"); - gotsec->sem = SSEM_PROGBITS; - img->nsections++; - - /* Pass D: per slot, synthesize a LinkSymbol and emit the R_ABS64 - * reloc that fills it at apply time. */ - for (si = 0; si < nslot; ++si) { - LinkSymId orig = slot_targets[si]; - u64 slot_vaddr = base_vaddr + (u64)si * 8u; - LinkSymbol sym_rec; - LinkRelocApply rrec; - LinkSymId slot_id; - - memset(&sym_rec, 0, sizeof(sym_rec)); - sym_rec.name = 0; - sym_rec.kind = SK_OBJ; - sym_rec.bind = SB_LOCAL; - sym_rec.defined = 1; - sym_rec.section_id = gotsec->id; - sym_rec.vaddr = slot_vaddr; - sym_rec.size = 8; - slot_id = append_symbol(img, &sym_rec); - got_map[orig] = slot_id; - - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = gotsec->id; - rrec.offset = (u32)(si * 8u); - rrec.width = 8; - rrec.write_vaddr = slot_vaddr; - rrec.write_file_offset = base_vaddr + (u64)si * 8u; - rrec.kind = R_ABS64; - rrec.target = orig; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - } - - if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); - - *got_map_out = got_map; -} - -/* ---- pass 3d: STT_GNU_IFUNC trampoline (.iplt + .igot.plt + .iplt.pairs) -- - * - * Per defined SK_IFUNC symbol we synthesize: - * - A 12-byte stub in a fresh RX segment (.iplt): three AArch64 - * instructions that load an 8-byte function pointer and tail-call - * it. Encoded as ADRP x16 / LDR x16,[x16] / BR x16 with the - * ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC immediates left zero — - * the existing reloc machinery patches them against a synthetic - * LinkSymbol whose vaddr is the matching slot. - * - An 8-byte slot in a fresh RW segment (.igot.plt), zero-initialized. - * - A 16-byte (resolver_ptr, slot_ptr) entry in a parallel RW - * section .iplt.pairs (also page-aligned segment for cleanliness), - * filled at apply time via two R_ABS64 relocs. The boundary - * symbols __start_iplt_pairs / __stop_iplt_pairs cover the span - * so the rt member ifunc_init.c can iterate it. - * - * The IFUNC LinkSymbol's vaddr is then redirected to the stub. The - * legacy in-image img->iplt_pairs[] table is also populated so the - * JIT path's pre-resolver can call each resolver and store its - * return value into the slot's runtime address — that path doesn't - * use the .iplt.pairs data section. - * - * When emit_static_exe is set (cfree_link_exe path), an additional - * 8-byte SSEM_PREINIT_ARRAY section is synthesized that holds one - * R_ABS64 reloc against __cfree_ifunc_init. Preinit runs strictly - * before .init_array, so user ctors that call IFUNCs see their - * .igot.plt slots already filled. - * - * Invariant: runs after link_symbols_to_sections so the resolver's - * vaddr is final; before emit_array_boundaries so the synthetic - * .init_array entry contributes to __init_array_start/end; before - * resolve_undefs so cross-TU undef references see the post-redirect - * (stub) vaddr. */ - -static u32 layout_iplt_alloc_segments(LinkImage* img, u32 nseg) { - Heap* h = img->heap; - u32 base = img->nsegments; - u32 new_nseg = base + nseg; - LinkSegment* nsegs = (LinkSegment*)h->realloc( - h, img->segments, sizeof(*img->segments) * img->nsegments, - sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); - u8** nsbufs = (u8**)h->realloc( - h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, - sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); - size_t* nscaps = (size_t*)h->realloc( - h, img->segment_bytes_cap, - sizeof(*img->segment_bytes_cap) * img->nsegments, - sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); - if (!nsegs || !nsbufs || !nscaps) - compiler_panic(img->c, no_loc(), "link: oom on iplt segments"); - img->segments = nsegs; - img->segment_bytes = nsbufs; - img->segment_bytes_cap = nscaps; - /* Caller fills slots [base..base+nseg). */ - return base; -} - -static u32 layout_iplt_alloc_sections(LinkImage* img, u32 nsec) { - Heap* h = img->heap; - u32 base = img->nsections; - u32 new_nsec = base + nsec; - LinkSection* nsections = (LinkSection*)h->realloc( - h, img->sections, sizeof(*img->sections) * img->nsections, - sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); - if (!nsections) - compiler_panic(img->c, no_loc(), "link: oom on iplt sections"); - img->sections = nsections; - return base; -} - -static void layout_iplt(Linker* l, LinkImage* img) { - Heap* h = img->heap; - u32 i; - u32 nifunc = 0; - u64 page; - u64 base_vaddr = 0; - u64 iplt_vaddr, igot_vaddr, pairs_vaddr; - u64 iplt_size, igot_size, pairs_size; - u64 init_vaddr = 0, init_size = 0; - u32 iplt_seg_idx, igot_seg_idx, pairs_seg_idx; - u32 init_seg_idx = 0; - u32 seg_base, sec_base; - LinkSegment* iplt_seg; - LinkSegment* igot_seg; - LinkSegment* pairs_seg; - LinkSegment* init_seg = NULL; - LinkSection* iplt_sec; - LinkSection* igot_sec; - LinkSection* pairs_sec; - LinkSection* init_sec = NULL; - u8* iplt_bytes; - u32 slot_idx; - int emit_init_array = l->emit_static_exe; - LinkSymId ifunc_init_sym = LINK_SYM_NONE; - Sym ifunc_init_name = 0; - Sym pairs_section_name; - Sym init_section_name; - const LinkArchDesc* arch = link_arch_desc_for(l->c); - if (!arch) - compiler_panic(img->c, no_loc(), - "link: layout_iplt: no arch descriptor for arch %u", - (u32)l->c->target.arch); - - /* Pass A: count canonical IFUNC defs. resolve_undefs copies - * the def's kind into each cross-TU undef LinkSymbol of the - * same name, so we'd over-count without the symhash_get check - * (matches the dedup in pass B). */ - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - const LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->kind != SK_IFUNC || !s->defined) continue; - if (s->name != 0) { - LinkSymId canonical = symhash_get(&img->globals, s->name); - if (canonical != LINK_SYM_NONE && canonical != s->id) continue; - } - ++nifunc; - } - if (nifunc == 0) return; - - page = layout_page_size(l); - - /* Pick a base vaddr after every existing segment. */ - for (i = 0; i < img->nsegments; ++i) { - u64 end = img->segments[i].vaddr + img->segments[i].mem_size; - if (end > base_vaddr) base_vaddr = end; - } - - base_vaddr = ALIGN_UP(base_vaddr, (u64)(page)); - iplt_vaddr = base_vaddr; - iplt_size = (u64)nifunc * (u64)arch->iplt_stub_size; - igot_vaddr = ALIGN_UP(iplt_vaddr + iplt_size, (u64)(page)); - igot_size = (u64)nifunc * 8u; - pairs_vaddr = ALIGN_UP(igot_vaddr + igot_size, (u64)(page)); - pairs_size = (u64)nifunc * 16u; - - /* When emitting a static ET_EXEC, locate (or fail-late on) the - * __cfree_ifunc_init symbol now and reserve a 1-entry - * .init_array section right after .iplt.pairs in its own - * page-aligned RW segment. The lookup must succeed: archive - * pre-seeding in link_ingest_archives ensured the rt member is - * pulled when any input defines an IFUNC. */ - if (emit_init_array) { - ifunc_init_name = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); - ifunc_init_sym = symhash_get(&img->globals, ifunc_init_name); - if (ifunc_init_sym == LINK_SYM_NONE || - !LinkSyms_at(&img->syms, ifunc_init_sym - 1)->defined) { - compiler_panic(img->c, no_loc(), - "link: STT_GNU_IFUNC requires '__cfree_ifunc_init' " - "to be defined (link in libcfree_rt.a or provide " - "your own implementation)"); - } - init_vaddr = ALIGN_UP(pairs_vaddr + pairs_size, (u64)(page)); - init_size = 8u; - } - - /* Allocate segments: [iplt RX, igot RW, pairs RW] + optional [init RW]. */ - { - u32 nseg = emit_init_array ? 4u : 3u; - seg_base = layout_iplt_alloc_segments(img, nseg); - } - iplt_seg_idx = seg_base + 0u; - igot_seg_idx = seg_base + 1u; - pairs_seg_idx = seg_base + 2u; - if (emit_init_array) init_seg_idx = seg_base + 3u; - - iplt_seg = &img->segments[iplt_seg_idx]; - memset(iplt_seg, 0, sizeof(*iplt_seg)); - iplt_seg->id = (LinkSegmentId)(iplt_seg_idx + 1u); - iplt_seg->flags = SF_ALLOC | SF_EXEC; - iplt_seg->file_offset = iplt_vaddr; - iplt_seg->vaddr = iplt_vaddr; - iplt_seg->file_size = iplt_size; - iplt_seg->mem_size = iplt_size; - iplt_seg->align = (u32)page; - iplt_seg->nsections = 1; - img->segment_bytes[iplt_seg_idx] = (u8*)h->alloc(h, (size_t)iplt_size, 16); - img->segment_bytes_cap[iplt_seg_idx] = (size_t)iplt_size; - if (!img->segment_bytes[iplt_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on iplt bytes"); - memset(img->segment_bytes[iplt_seg_idx], 0, (size_t)iplt_size); - - igot_seg = &img->segments[igot_seg_idx]; - memset(igot_seg, 0, sizeof(*igot_seg)); - igot_seg->id = (LinkSegmentId)(igot_seg_idx + 1u); - igot_seg->flags = SF_ALLOC | SF_WRITE; - igot_seg->file_offset = igot_vaddr; - igot_seg->vaddr = igot_vaddr; - igot_seg->file_size = igot_size; - igot_seg->mem_size = igot_size; - igot_seg->align = (u32)page; - igot_seg->nsections = 1; - img->segment_bytes[igot_seg_idx] = (u8*)h->alloc(h, (size_t)igot_size, 16); - img->segment_bytes_cap[igot_seg_idx] = (size_t)igot_size; - if (!img->segment_bytes[igot_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on igot bytes"); - memset(img->segment_bytes[igot_seg_idx], 0, (size_t)igot_size); - - pairs_seg = &img->segments[pairs_seg_idx]; - memset(pairs_seg, 0, sizeof(*pairs_seg)); - pairs_seg->id = (LinkSegmentId)(pairs_seg_idx + 1u); - pairs_seg->flags = SF_ALLOC | SF_WRITE; - pairs_seg->file_offset = pairs_vaddr; - pairs_seg->vaddr = pairs_vaddr; - pairs_seg->file_size = pairs_size; - pairs_seg->mem_size = pairs_size; - pairs_seg->align = (u32)page; - pairs_seg->nsections = 1; - img->segment_bytes[pairs_seg_idx] = (u8*)h->alloc(h, (size_t)pairs_size, 16); - img->segment_bytes_cap[pairs_seg_idx] = (size_t)pairs_size; - if (!img->segment_bytes[pairs_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on iplt.pairs bytes"); - memset(img->segment_bytes[pairs_seg_idx], 0, (size_t)pairs_size); - - if (emit_init_array) { - init_seg = &img->segments[init_seg_idx]; - memset(init_seg, 0, sizeof(*init_seg)); - init_seg->id = (LinkSegmentId)(init_seg_idx + 1u); - init_seg->flags = SF_ALLOC | SF_WRITE; - init_seg->file_offset = init_vaddr; - init_seg->vaddr = init_vaddr; - init_seg->file_size = init_size; - init_seg->mem_size = init_size; - init_seg->align = (u32)page; - init_seg->nsections = 1; - img->segment_bytes[init_seg_idx] = (u8*)h->alloc(h, (size_t)init_size, 16); - img->segment_bytes_cap[init_seg_idx] = (size_t)init_size; - if (!img->segment_bytes[init_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on iplt init_array bytes"); - memset(img->segment_bytes[init_seg_idx], 0, (size_t)init_size); - } - img->nsegments += emit_init_array ? 4u : 3u; - - /* Allocate sections: same shape, one section per segment. */ - { - u32 nsec = emit_init_array ? 4u : 3u; - sec_base = layout_iplt_alloc_sections(img, nsec); - } - - pairs_section_name = pool_intern_cstr(l->c->global, ".iplt.pairs"); - init_section_name = obj_secname_preinit_array(l->c); - - iplt_sec = &img->sections[sec_base + 0u]; - memset(iplt_sec, 0, sizeof(*iplt_sec)); - iplt_sec->id = (LinkSectionId)(sec_base + 0u + 1u); - iplt_sec->input_id = LINK_INPUT_NONE; - iplt_sec->obj_section_id = OBJ_SEC_NONE; - iplt_sec->segment_id = iplt_seg->id; - iplt_sec->input_offset = 0; - iplt_sec->file_offset = iplt_vaddr; - iplt_sec->vaddr = iplt_vaddr; - iplt_sec->size = iplt_size; - iplt_sec->flags = SF_ALLOC | SF_EXEC; - iplt_sec->align = 4; - iplt_sec->name = pool_intern_cstr(l->c->global, ".iplt"); - iplt_sec->sem = SSEM_PROGBITS; - - igot_sec = &img->sections[sec_base + 1u]; - memset(igot_sec, 0, sizeof(*igot_sec)); - igot_sec->id = (LinkSectionId)(sec_base + 1u + 1u); - igot_sec->input_id = LINK_INPUT_NONE; - igot_sec->obj_section_id = OBJ_SEC_NONE; - igot_sec->segment_id = igot_seg->id; - igot_sec->input_offset = 0; - igot_sec->file_offset = igot_vaddr; - igot_sec->vaddr = igot_vaddr; - igot_sec->size = igot_size; - igot_sec->flags = SF_ALLOC | SF_WRITE; - igot_sec->align = 8; - igot_sec->name = pool_intern_cstr(l->c->global, ".igot.plt"); - igot_sec->sem = SSEM_PROGBITS; - - pairs_sec = &img->sections[sec_base + 2u]; - memset(pairs_sec, 0, sizeof(*pairs_sec)); - pairs_sec->id = (LinkSectionId)(sec_base + 2u + 1u); - pairs_sec->input_id = LINK_INPUT_NONE; - pairs_sec->obj_section_id = OBJ_SEC_NONE; - pairs_sec->segment_id = pairs_seg->id; - pairs_sec->input_offset = 0; - pairs_sec->file_offset = pairs_vaddr; - pairs_sec->vaddr = pairs_vaddr; - pairs_sec->size = pairs_size; - pairs_sec->flags = SF_ALLOC | SF_WRITE; - pairs_sec->align = 8; - pairs_sec->name = pairs_section_name; - pairs_sec->sem = SSEM_PROGBITS; - - if (emit_init_array) { - init_sec = &img->sections[sec_base + 3u]; - memset(init_sec, 0, sizeof(*init_sec)); - init_sec->id = (LinkSectionId)(sec_base + 3u + 1u); - init_sec->input_id = LINK_INPUT_NONE; - init_sec->obj_section_id = OBJ_SEC_NONE; - init_sec->segment_id = init_seg->id; - init_sec->input_offset = 0; - init_sec->file_offset = init_vaddr; - init_sec->vaddr = init_vaddr; - init_sec->size = init_size; - init_sec->flags = SF_ALLOC | SF_WRITE; - init_sec->align = 8; - init_sec->name = init_section_name; - init_sec->sem = SSEM_PREINIT_ARRAY; - } - img->nsections += emit_init_array ? 4u : 3u; - - /* __start_iplt_pairs / __stop_iplt_pairs span the .iplt.pairs - * section (start inclusive, end exclusive). The rt member's - * __cfree_ifunc_init iterates this span. */ - emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr); - emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size); - - /* Allocate the in-image iplt_pairs table (resolver_vaddr, - * slot_vaddr) per IFUNC, in the same iteration order as the stub - * layout. Used by the JIT path's pre-resolution; the ELF emit - * path uses the .iplt.pairs data section instead. */ - img->iplt_pairs = (u64*)h->alloc( - h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64)); - if (!img->iplt_pairs) - compiler_panic(img->c, no_loc(), "link: oom on iplt pairs"); - img->niplt = nifunc; - - iplt_bytes = img->segment_bytes[iplt_seg_idx]; - slot_idx = 0; - - /* Pass B: per IFUNC def, write the stub bytes, synthesize a - * slot LinkSymbol + a synthetic resolver-pointer LinkSymbol, and - * emit the relocs. The IFUNC LinkSymbol is then redirected to - * the stub so external references call into the trampoline - * instead of the resolver directly. - * - * Per-name dedup: resolve_undefs copies the def's kind into each - * undef LinkSymbol of the same name, so a cross-TU undef of an - * IFUNC also reads as SK_IFUNC + defined here. Skip those by - * keeping only the canonical entry from img->globals — undef - * copies pick up the post-redirect fields in the propagation - * pass at the end of this function. */ - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - u64 stub_vaddr; - u64 slot_vaddr; - u64 pair_vaddr; - u64 resolver_vaddr; - LinkSectionId resolver_section; - u64 resolver_value; - LinkSymbol slot_rec; - LinkSymbol resolver_rec; - LinkSymId slot_id; - LinkSymId resolver_id; - LinkRelocApply rrec; - u8* stub_dst; - - if (s->kind != SK_IFUNC || !s->defined) continue; - if (s->name != 0) { - LinkSymId canonical = symhash_get(&img->globals, s->name); - if (canonical != LINK_SYM_NONE && canonical != s->id) continue; - } - - stub_vaddr = iplt_vaddr + (u64)slot_idx * 12u; - slot_vaddr = igot_vaddr + (u64)slot_idx * 8u; - pair_vaddr = pairs_vaddr + (u64)slot_idx * 16u; - resolver_vaddr = s->vaddr; - resolver_section = s->section_id; - resolver_value = s->value; - - img->iplt_pairs[2u * slot_idx + 0] = resolver_vaddr; - img->iplt_pairs[2u * slot_idx + 1] = slot_vaddr; - - /* Stub bytes and any apply-time relocs are arch-specific; the - * descriptor's emit_iplt_stub returns the relocs (offset / width / - * kind within the stub) and the caller fills in the section / - * vaddr fields below. Arches that can encode the stub→slot - * displacement inline (x64, rv64) report 0 relocs; aa64 reports 2 - * (ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC). */ - stub_dst = iplt_bytes + (size_t)slot_idx * (size_t)arch->iplt_stub_size; - LinkArchIPltReloc iplt_relocs[2]; - u32 niplt_relocs = - arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, iplt_relocs); - - /* Synthetic local symbol for the .igot.plt slot. */ - memset(&slot_rec, 0, sizeof(slot_rec)); - slot_rec.name = 0; - slot_rec.kind = SK_OBJ; - slot_rec.bind = SB_LOCAL; - slot_rec.defined = 1; - slot_rec.section_id = igot_sec->id; - slot_rec.vaddr = slot_vaddr; - slot_rec.size = 8; - slot_id = append_symbol(img, &slot_rec); - - /* Synthetic local symbol for the resolver address (captured - * pre-redirect so the .iplt.pairs ABS64 reloc can target - * something whose vaddr shifts with the image base alongside - * the section it lives in). */ - memset(&resolver_rec, 0, sizeof(resolver_rec)); - resolver_rec.name = 0; - resolver_rec.kind = SK_FUNC; - resolver_rec.bind = SB_LOCAL; - resolver_rec.defined = 1; - resolver_rec.section_id = resolver_section; - resolver_rec.value = resolver_value; - resolver_rec.vaddr = resolver_vaddr; - resolver_rec.size = 0; - resolver_id = append_symbol(img, &resolver_rec); - - /* Apply-time fixups for arches that can't encode the stub→slot - * displacement inline. The arch reported (offset_in_stub, width, - * kind) for each; everything else (section, target, vaddrs) is - * the linker's job. */ - { - u32 ri; - for (ri = 0; ri < niplt_relocs; ++ri) { - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = iplt_sec->id; - rrec.offset = (u32)(slot_idx * arch->iplt_stub_size) + - iplt_relocs[ri].offset_in_stub; - rrec.width = iplt_relocs[ri].width; - rrec.write_vaddr = stub_vaddr + iplt_relocs[ri].offset_in_stub; - rrec.write_file_offset = rrec.write_vaddr; - rrec.kind = iplt_relocs[ri].kind; - rrec.target = slot_id; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - } - } - - /* .iplt.pairs[i].resolver = &resolver (R_ABS64) */ - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = pairs_sec->id; - rrec.offset = (u32)(slot_idx * 16u); - rrec.width = 8; - rrec.write_vaddr = pair_vaddr; - rrec.write_file_offset = pair_vaddr; - rrec.kind = R_ABS64; - rrec.target = resolver_id; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - - /* .iplt.pairs[i].slot = &slot (R_ABS64) */ - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = pairs_sec->id; - rrec.offset = (u32)(slot_idx * 16u + 8u); - rrec.width = 8; - rrec.write_vaddr = pair_vaddr + 8u; - rrec.write_file_offset = pair_vaddr + 8u; - rrec.kind = R_ABS64; - rrec.target = slot_id; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - - /* Redirect the IFUNC symbol to the stub. Keep its name + - * binding so cfree_jit_lookup and external relocs still find - * it; switch kind to SK_FUNC since the resolver indirection is - * hidden behind the stub. */ - s->kind = SK_FUNC; - s->section_id = iplt_sec->id; - s->value = (u64)slot_idx * (u64)arch->iplt_stub_size; - s->vaddr = stub_vaddr; - s->size = arch->iplt_stub_size; - - ++slot_idx; - } - - /* .preinit_array entry: one R_ABS64 reloc filling the 8-byte - * slot with __cfree_ifunc_init's resolved address. Preinit runs - * strictly before .init_array so user ctors that call IFUNCs see - * filled .igot.plt slots. */ - if (emit_init_array) { - LinkRelocApply rrec; - memset(&rrec, 0, sizeof(rrec)); - rrec.input_id = LINK_INPUT_NONE; - rrec.section_id = OBJ_SEC_NONE; - rrec.link_section_id = init_sec->id; - rrec.offset = 0; - rrec.width = 8; - rrec.write_vaddr = init_vaddr; - rrec.write_file_offset = init_vaddr; - rrec.kind = R_ABS64; - rrec.target = ifunc_init_sym; - rrec.addend = 0; - *append_reloc_slot(img) = rrec; - } - - /* Pass C: propagate the redirect to every per-input undef - * LinkSymbol that shares the IFUNC's name. resolve_undefs - * copied the pre-redirect (resolver) fields into each undef - * slot; without this fix-up, cross-TU references to the IFUNC - * (R_ABS64 / GOT-page / direct call) would resolve to the - * resolver's address, not the iplt stub. Identified by section - * matching the synthesized .iplt section, which only the - * canonical IFUNC defs land in (slot syms are LOCAL + nameless). */ - { - u32 n = LinkSyms_count(&img->syms); - for (i = 0; i < n; ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - LinkSymId canonical; - LinkSymbol* def; - if (s->name == 0) continue; - canonical = symhash_get(&img->globals, s->name); - if (canonical == LINK_SYM_NONE || canonical == s->id) continue; - def = LinkSyms_at(&img->syms, canonical - 1); - if (def->section_id != iplt_sec->id) continue; - s->section_id = def->section_id; - s->value = def->value; - s->vaddr = def->vaddr; - s->kind = def->kind; - s->size = def->size; - s->defined = 1; - } - } -} - -/* ---- entry symbol ---- */ - -static void resolve_entry(Linker* l, LinkImage* img) { - LinkSymId id; - LinkSymbol* s; - if (l->entry_name == 0) return; - id = symhash_get(&img->globals, l->entry_name); - if (id == LINK_SYM_NONE) { - size_t namelen; - const char* nm = pool_str(l->c->global, l->entry_name, &namelen); - compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' not defined", - (int)namelen, nm); - } - s = LinkSyms_at(&img->syms, id - 1); - if (!s->defined) { - size_t namelen; - const char* nm = pool_str(l->c->global, l->entry_name, &namelen); - compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' is undefined", - (int)namelen, nm); - } - img->entry_sym = id; -} - -/* ---- archive ingestion ---- - * - * Members were parsed up-front by link_add_archive_bytes; this pass - * decides which ones get pulled into the link. --whole-archive - * archives include every member; demand archives include any member - * that defines a global symbol referenced (and not yet defined) by - * the current input set, iterated to a fixed point so a member that - * pulls in fresh undefs can drag in further members. */ - -static void include_archive_member(Linker* l, LinkArchiveMember* mem) { - LinkInput* in; - LinkInputId id; - u32 idx; - if (mem->included) return; - in = LinkInputs_push(&l->inputs, &idx); - if (!in) - compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)"); - id = (LinkInputId)(idx + 1u); - in->id = id; - in->kind = LINK_INPUT_OBJ_BYTES; /* the input owns the ObjBuilder now */ - in->obj = mem->obj; - in->name = mem->name; - mem->included = 1; - mem->obj = NULL; /* ownership transferred */ -} - -/* Build presence sets across all currently-included inputs. The values - * stored in the SymHash are dummies (1) — only key presence matters. */ -static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) { - u32 ii; - ObjSymIter* it; - ObjSymEntry e; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - it = obj_symiter_new(ob); - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->name == 0) continue; - if (s->bind == SB_LOCAL) continue; - if (s->kind == SK_UNDEF) - symhash_set(undefs, s->name, 1u); - else - symhash_set(defined, s->name, 1u); - } - obj_symiter_free(it); - } -} - -/* True if any currently-included input defines at least one - * STT_GNU_IFUNC symbol. Used to seed __cfree_ifunc_init into the - * archive demand-load wanted set when emitting a static ET_EXEC: the - * synthesized .init_array entry pulls the rt member which carries the - * startup ctor that fills .igot.plt slots. */ -static int inputs_have_defined_ifunc(Linker* l) { - u32 ii; - ObjSymIter* it; - ObjSymEntry e; - for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { - ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; - it = obj_symiter_new(ob); - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->kind == SK_IFUNC) { - obj_symiter_free(it); - return 1; - } - } - obj_symiter_free(it); - } - return 0; -} - -/* True if `mem` defines a non-undef SB_GLOBAL or SB_WEAK symbol that's - * in `wanted` and not already in `defined`. Both GNU ld and lld pull - * archive members on weak defs against an unresolved undef — the - * "weak doesn't drag" rule applies to weak *references*, not weak - * definitions. (musl's __init_tls is a weak def and must be pulled - * to satisfy __libc_start_main's hard ref.) */ -static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, - const SymHash* wanted) { - ObjSymIter* it; - ObjSymEntry e; - int hit = 0; - it = obj_symiter_new(mem->obj); - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->name == 0) continue; - if (s->kind == SK_UNDEF) continue; - if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; - if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue; - if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue; - hit = 1; - break; - } - obj_symiter_free(it); - return hit; -} - -void link_ingest_archives(Linker* l) { - u32 a, m; - if (LinkArchives_count(&l->archives) == 0) return; - - /* Pass 1: --whole-archive members are pulled unconditionally. */ - for (a = 0; a < LinkArchives_count(&l->archives); ++a) { - LinkArchive* ar = LinkArchives_at(&l->archives, a); - if (!ar->whole_archive) continue; - for (m = 0; m < ar->nmembers; ++m) - include_archive_member(l, &ar->members[m]); - } - - /* When emitting a static ET_EXEC and any input defines an IFUNC, - * seed __cfree_ifunc_init into the wanted set so demand-load pulls - * libcfree_rt's ifunc_init.c. Layout_iplt later synthesizes a - * .init_array entry referencing this symbol; the rt member's - * implementation walks .iplt.pairs and fills each slot at startup. - * Done once before the demand loop — the seed needs to be present - * on every iteration of the loop's local symhash, so we stash the - * Sym handle and inject it inside the loop body. */ - Sym want_ifunc_init = 0; - if (l->emit_static_exe && inputs_have_defined_ifunc(l)) { - want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); - } - - /* Pass 2: demand loop over the remaining archives. Pulling member A - * may introduce undefs satisfied by member B, so iterate to a - * fixed point. Bounded by total member count across archives. */ - for (;;) { - SymHash defined, undefs; - int changed = 0; - symhash_init(&defined, l->heap); - symhash_init(&undefs, l->heap); - scan_presence(l, &defined, &undefs); - if (want_ifunc_init != 0 && - symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) - symhash_set(&undefs, want_ifunc_init, 1u); - - for (a = 0; a < LinkArchives_count(&l->archives); ++a) { - LinkArchive* ar = LinkArchives_at(&l->archives, a); - if (ar->whole_archive) continue; - for (m = 0; m < ar->nmembers; ++m) { - LinkArchiveMember* mem = &ar->members[m]; - if (mem->included) continue; - if (!member_satisfies(mem, &defined, &undefs)) continue; - include_archive_member(l, mem); - changed = 1; - } - } - symhash_fini(&defined); - symhash_fini(&undefs); - if (!changed) break; - } -} - -/* ---- public ---- */ +/* ---- public orchestration ---- */ LinkImage* link_resolve(Linker* l) { LinkImage* img; Heap* h; - /* Expand archive members into Linker.inputs before any layout - * machinery runs — once that's done, the rest of the pipeline - * sees a single flat input list and doesn't care about archives. */ link_ingest_archives(l); img = link_image_alloc(l->c); h = img->heap; img->linker = l; - /* Per-input map storage. */ img->ninput_maps = LinkInputs_count(&l->inputs); img->input_maps = LinkInputs_count(&l->inputs) @@ -3179,42 +915,21 @@ LinkImage* link_resolve(Linker* l) { memset(img->input_maps, 0, sizeof(*img->input_maps) * LinkInputs_count(&l->inputs)); - resolve_symbols(l, img); + link_resolve_symbols(l, img); { GcLive g = {0}; - gc_live_alloc(&g, l, h); - gc_compute(l, img, &g); - layout_sections(l, img, &g); - layout_commons(l, img); - emit_segment_bytes(l, img); - link_symbols_to_sections(l, img); - emit_array_boundaries(l, img); - emit_tls_boundaries(l, img); - emit_encoding_section_boundaries(l, img); - /* Linker-defined synthetic symbols that may be referenced as - * undefs (often hidden) by sysroot startfiles / nonshared archives. - * Pre-defining them here satisfies resolve_undefs' undef sweep so - * it doesn't panic on hidden-undef references that no object or - * DSO supplies. vaddr=0 is a placeholder; layout_dyn may refine - * _DYNAMIC to the actual .dynamic vaddr later. */ - emit_boundary_sym(l, img, "__dso_handle", 0); - emit_boundary_sym(l, img, "_DYNAMIC", 0); - /* _GLOBAL_OFFSET_TABLE_ is referenced as a SHN_UNDEF marker by - * any x86_64 input that uses the GOT (musl/glibc libc.a routinely - * do). GNU ld auto-defines it at the .got base; cfree-ld doesn't - * use the symbol for any actual reloc, so a placeholder vaddr=0 - * keeps the undef sweep happy without affecting code that - * computes GOT addresses through their own GOTPC32 relocs. */ - emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); - /* RISC-V startfiles use `__global_pointer$` to load gp in _start; - * the RISC-V psABI says it's defined as `.sdata + 0x800` so - * gp-relative addressing covers [.sdata - 2KiB, .sdata + 2KiB). - * We don't have .sdata as a distinct section, but any address in - * the writable data region is functionally adequate when the code - * doesn't actually use gp-relative addressing (cfree-cc doesn't - * emit `-mrelax`, and musl's static crt only loads gp without - * dereferencing through it). Pick the first RW segment base + - * 0x800. Only relevant for rv64; harmless on other arches. */ + link_gc_live_alloc(&g, l, h); + link_gc_compute(l, img, &g); + link_layout_sections(l, img, &g); + link_layout_commons(l, img); + link_emit_segment_bytes(l, img); + link_assign_symbol_vaddrs(l, img); + link_emit_array_boundaries(l, img); + link_emit_tls_boundaries(l, img); + link_emit_encoding_section_boundaries(l, img); + link_emit_boundary_sym(l, img, "__dso_handle", 0); + link_emit_boundary_sym(l, img, "_DYNAMIC", 0); + link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); if (l->c->target.arch == CFREE_ARCH_RV64) { u32 si; u64 gp_vaddr = 0; @@ -3224,67 +939,28 @@ LinkImage* link_resolve(Linker* l) { break; } } - emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr); + link_emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr); } - resolve_undefs(l, img); - gc_drop_dead_globals(l, img, &g); - /* layout_iplt runs last among the symbol-shaping passes: it - * redirects each defined IFUNC LinkSymbol from the resolver - * to its iplt stub and (under emit_static_exe) materializes a - * .init_array entry pointing at __cfree_ifunc_init. We then - * re-run emit_array_boundaries so __init_array_start/end span - * the synthetic entry. Cross-TU undefs may retain the - * pre-redirect (resolver) vaddr — only a concern for - * GOT-slot fills; not exercised by current tests. */ - layout_iplt(l, img); - if (img->niplt) emit_array_boundaries(l, img); + link_resolve_undefs(l, img); + link_gc_drop_dead_globals(l, img, &g); + link_layout_iplt(l, img); + if (img->niplt) link_emit_array_boundaries(l, img); { LinkSymId* got_map = NULL; LinkSymId* stub_map = NULL; - /* Both maps are sparse arrays indexed by orig LinkSymId, sized - * to the symbol count BEFORE either pass appends synthetic - * entries (stub/slot/resolver_rec from layout_jit_call_stubs; - * GOT-slot syms from layout_got). Snapshot here so the free - * size matches the allocation. */ u32 map_size = LinkSyms_count(&img->syms) + 1u; - /* JIT-only: synthesize per-target stubs for CALL26/JUMP26 - * against resolver-supplied or weak-undef SK_ABS targets so the - * branch displacement stays within ±128 MiB of .text regardless - * of where the resolver-returned host pointer lives. Runs - * before layout_got (the stub's slot reloc is non-GOT) and - * before emit_reloc_records (which consults stub_map). */ - layout_jit_call_stubs(l, img, map_size, &stub_map); - /* layout_got synthesizes ELF-shaped .got slots and rewrites - * GOT-using reloc targets to point at them. Mach-O has its own - * __DATA_CONST,__got mechanism wired up in link_macho.c for the - * exe path (driven by collect_imports), so skip the ELF synthesis - * there. The JIT path has no equivalent — link_jit.c does not - * run collect_imports — so fall through to layout_got on Mach-O - * when emit_static_exe is off (cfree_link_jit). Without this, - * cross-TU GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC relocs would patch - * with S = symbol value instead of S = slot address (see - * doc/MACHO.md §3.1). */ + link_layout_jit_stubs(l, img, map_size, &stub_map); if (l->c->target.obj != CFREE_OBJ_MACHO || !l->emit_static_exe) - layout_got(l, img, map_size, &got_map); - emit_reloc_records(l, img, got_map, stub_map); + link_layout_got(l, img, map_size, &got_map); + link_emit_relocations(l, img, got_map, stub_map); if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size); if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size); } - /* Phase 4 dynamic-link tables. Runs after every other layout - * pass: it depends on import resolution (resolve_undefs), every - * synthesized section already being on the image (layout_got / - * layout_iplt), and adds its own segments at the tail. The - * static-exe path early-outs in layout_dyn (l->emit_pie==0). */ layout_dyn(l, img); - resolve_entry(l, img); - gc_live_free(&g, h); + link_resolve_entry(l, img); + link_gc_live_free(&g, h); } - /* Hand the input ObjBuilders to the image so cfree_jit_view can - * surface .debug_* sections after link_free runs (layout/reloc are - * complete, so the builders are otherwise idle). Must be the last - * step before returning — any pass that walks LinkInputs.obj - * expecting a value would break otherwise. */ link_capture_debug_inputs(l, img); return img; diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -0,0 +1,1236 @@ +/* link_reloc_layout.c — post-section-placement passes: + * link_assign_symbol_vaddrs — symbol→vaddr binding (pass 3) + * link_emit_array_boundaries — __init_array_start/end etc. + * link_emit_tls_boundaries — __tdata_start/end, __tbss_size + * link_emit_encoding_section_boundaries — __start_<X>/__stop_<X> + * link_layout_jit_stubs — AArch64 JIT CALL26/JUMP26 stubs + * link_layout_got — static-PIC .got + * link_layout_iplt — STT_GNU_IFUNC trampoline (.iplt etc.) + * link_emit_relocations — emit LinkRelocApply records (pass 4) + * link_resolve_entry — entry symbol lookup + */ + +#include <cfree.h> +#include <string.h> + +#include "core/buf.h" +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link.h" +#include "link/link_arch.h" +#include "link/link_internal.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- pass 3: assign symbol vaddrs ---- */ + +void link_assign_symbol_vaddrs(Linker* l, LinkImage* img) { + u32 ii; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + LinkInput* in = LinkInputs_at(&l->inputs, ii); + ObjBuilder* ob = in->obj; + InputMap* m = &img->input_maps[ii]; + ObjSymIter* it; + ObjSymEntry e; + if (in->kind == LINK_INPUT_DSO_BYTES) continue; + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) { + LinkSymId lsid = m->sym[e.id]; + LinkSymbol* ls; + if (lsid == LINK_SYM_NONE) continue; + ls = LinkSyms_at(&img->syms, lsid - 1); + if (!ls->defined) continue; + if (ls->kind == SK_ABS && ls->vaddr != 0) continue; + if (e.sym->section_id == OBJ_SEC_NONE) continue; + if (ls->input_id != LinkInputs_at(&l->inputs, ii)->id) continue; + ls->section_id = m->section[e.sym->section_id]; + } + obj_symiter_free(it); + } + { + u32 i; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->kind == SK_ABS && s->vaddr != 0) continue; + if (!s->defined) continue; + if (s->section_id == LINK_SEC_NONE) continue; + s->vaddr = img->sections[s->section_id - 1].vaddr + s->value; + } + } + { + u32 i; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->defined) continue; + if (s->name == 0) continue; + { + LinkSymId hit = symhash_get(&img->globals, s->name); + if (hit != LINK_SYM_NONE && hit != s->id) { + LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); + if (def->defined) { + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->defined = 1; + } + } + } + } + } +} + +/* ---- pass 3b: boundary symbols ---- */ + +void link_emit_array_boundaries(Linker* l, LinkImage* img) { + u32 ii, j; + u64 init_start = (u64)-1, init_end = 0; + u64 fini_start = (u64)-1, fini_end = 0; + u64 preinit_start = (u64)-1, preinit_end = 0; + + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; + for (j = 1; j < obj_section_count(ob); ++j) { + const Section* s = obj_section_get(ob, j); + LinkSectionId ls_id; + const LinkSection* ls; + u64 start, end; + if (!s) continue; + if (s->sem != SSEM_INIT_ARRAY && s->sem != SSEM_FINI_ARRAY && + s->sem != SSEM_PREINIT_ARRAY) + continue; + ls_id = m->section[j]; + if (ls_id == LINK_SEC_NONE) continue; + ls = &img->sections[ls_id - 1]; + start = ls->vaddr; + end = ls->vaddr + ls->size; + if (s->sem == SSEM_INIT_ARRAY) { + if (start < init_start) init_start = start; + if (end > init_end) init_end = end; + } else if (s->sem == SSEM_FINI_ARRAY) { + if (start < fini_start) fini_start = start; + if (end > fini_end) fini_end = end; + } else { + if (start < preinit_start) preinit_start = start; + if (end > preinit_end) preinit_end = end; + } + } + } + + { + u32 i; + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + u64 start, end; + if (ls->input_id != LINK_INPUT_NONE) continue; + if (ls->sem != SSEM_INIT_ARRAY && ls->sem != SSEM_FINI_ARRAY && + ls->sem != SSEM_PREINIT_ARRAY) + continue; + start = ls->vaddr; + end = ls->vaddr + ls->size; + if (ls->sem == SSEM_INIT_ARRAY) { + if (start < init_start) init_start = start; + if (end > init_end) init_end = end; + } else if (ls->sem == SSEM_FINI_ARRAY) { + if (start < fini_start) fini_start = start; + if (end > fini_end) fini_end = end; + } else { + if (start < preinit_start) preinit_start = start; + if (end > preinit_end) preinit_end = end; + } + } + } + + if (init_start == (u64)-1) { + init_start = 0; + init_end = 0; + } + if (fini_start == (u64)-1) { + fini_start = 0; + fini_end = 0; + } + if (preinit_start == (u64)-1) { + preinit_start = 0; + preinit_end = 0; + } + + link_emit_boundary_sym(l, img, "__init_array_start", init_start); + link_emit_boundary_sym(l, img, "__init_array_end", init_end); + link_emit_boundary_sym(l, img, "__fini_array_start", fini_start); + link_emit_boundary_sym(l, img, "__fini_array_end", fini_end); + link_emit_boundary_sym(l, img, "__preinit_array_start", preinit_start); + link_emit_boundary_sym(l, img, "__preinit_array_end", preinit_end); +} + +void link_emit_tls_boundaries(Linker* l, LinkImage* img) { + u64 tdata_start = img->tls_vaddr; + u64 tdata_end = img->tls_vaddr + img->tls_filesz; + u64 tbss_size = img->tls_memsz - img->tls_filesz; + Sym sym_size = pool_intern_cstr(l->c->global, "__tbss_size"); + LinkSymId id; + LinkSymbol rec; + + link_emit_boundary_sym(l, img, "__tdata_start", tdata_start); + link_emit_boundary_sym(l, img, "__tdata_end", tdata_end); + + id = symhash_get(&img->globals, sym_size); + memset(&rec, 0, sizeof(rec)); + rec.name = sym_size; + rec.kind = SK_ABS; + rec.bind = SB_GLOBAL; + rec.defined = 1; + rec.vaddr = tbss_size; + if (id != LINK_SYM_NONE) { + *LinkSyms_at(&img->syms, id - 1) = rec; + LinkSyms_at(&img->syms, id - 1)->id = id; + } else { + LinkSymId fresh = link_append_symbol(img, &rec); + symhash_insert(&img->globals, sym_size, fresh, &id); + } +} + +void link_emit_encoding_section_boundaries(Linker* l, LinkImage* img) { + u32 i, ii, j; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* sym = LinkSyms_at(&img->syms, i); + const char* nm; + size_t namelen, off, ilen; + int is_start; + Sym secname; + u64 lo = (u64)-1; + u64 hi = 0; + int found = 0; + if (sym->defined) continue; + if (sym->name == 0) continue; + nm = pool_str(l->c->global, sym->name, &namelen); + if (!link_gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) continue; + secname = pool_intern(l->c->global, nm + off, ilen); + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; + for (j = 1; j < obj_section_count(ob); ++j) { + const Section* s = obj_section_get(ob, j); + LinkSectionId ls_id; + const LinkSection* ls; + u64 start, end; + if (!s || s->name != secname) continue; + ls_id = m->section[j]; + if (ls_id == LINK_SEC_NONE) continue; + ls = &img->sections[ls_id - 1]; + start = ls->vaddr; + end = ls->vaddr + ls->size; + if (start < lo) lo = start; + if (end > hi) hi = end; + found = 1; + } + } + if (!found) continue; + sym->kind = SK_OBJ; + sym->bind = SB_GLOBAL; + sym->defined = 1; + sym->vaddr = is_start ? lo : hi; + } +} + +/* ---- pass 4: reloc records ---- */ + +static u8 reloc_width(RelocKind k) { + switch (k) { + case R_ABS32: + case R_REL32: + case R_PC32: + case R_GOT32: + case R_PLT32: + case R_X64_PLT32: + case R_X64_32S: + case R_X64_TPOFF32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_X64_GOTPC32: + return 4; + case R_ABS64: + case R_REL64: + case R_PC64: + case R_X64_TPOFF64: + return 8; + case R_AARCH64_ABS16: + case R_AARCH64_PREL16: + return 2; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: + case R_AARCH64_LD_PREL_LO19: + case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + case R_AARCH64_TLVP_LOAD_PAGE21: + case R_AARCH64_TLVP_LOAD_PAGEOFF12: + return 4; + case R_RV_HI20: + case R_RV_LO12_I: + case R_RV_LO12_S: + case R_RV_BRANCH: + case R_RV_JAL: + case R_RV_PCREL_HI20: + case R_RV_PCREL_LO12_I: + case R_RV_PCREL_LO12_S: + case R_RV_GOT_HI20: + case R_RV_TPREL_HI20: + case R_RV_TPREL_LO12_I: + case R_RV_TPREL_LO12_S: + return 4; + case R_RV_CALL: + return 8; + case R_RV_RVC_BRANCH: + case R_RV_RVC_JUMP: + return 2; + case R_RV_RELAX: + case R_RV_TPREL_ADD: + return 4; + case R_RV_ADD8: + case R_RV_SUB8: + case R_RV_SUB6: + case R_RV_SET6: + case R_RV_SET8: + return 1; + case R_RV_ADD16: + case R_RV_SUB16: + case R_RV_SET16: + return 2; + case R_RV_ADD32: + case R_RV_SUB32: + case R_RV_SET32: + return 4; + case R_RV_ADD64: + case R_RV_SUB64: + return 8; + default: + return 0; + } +} + +static int reloc_uses_got(u16 kind) { + switch (kind) { + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_RV_GOT_HI20: + return 1; + default: + return 0; + } +} + +/* ---- iplt alloc helpers (used by layout_jit_call_stubs too) ---- */ + +u32 link_iplt_alloc_segments(LinkImage* img, u32 nseg) { + Heap* h = img->heap; + u32 base = img->nsegments; + u32 new_nseg = base + nseg; + LinkSegment* nsegs = (LinkSegment*)h->realloc( + h, img->segments, sizeof(*img->segments) * img->nsegments, + sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); + u8** nsbufs = (u8**)h->realloc( + h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, + sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); + size_t* nscaps = (size_t*)h->realloc( + h, img->segment_bytes_cap, + sizeof(*img->segment_bytes_cap) * img->nsegments, + sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); + if (!nsegs || !nsbufs || !nscaps) + compiler_panic(img->c, no_loc(), "link: oom on iplt segments"); + img->segments = nsegs; + img->segment_bytes = nsbufs; + img->segment_bytes_cap = nscaps; + return base; +} + +u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec) { + Heap* h = img->heap; + u32 base = img->nsections; + u32 new_nsec = base + nsec; + LinkSection* nsections = (LinkSection*)h->realloc( + h, img->sections, sizeof(*img->sections) * img->nsections, + sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); + if (!nsections) + compiler_panic(img->c, no_loc(), "link: oom on iplt sections"); + img->sections = nsections; + return base; +} + +/* ---- pass: JIT call stubs ---- */ + +void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, + LinkSymId** stub_map_out) { + Heap* h = img->heap; + const LinkArchDesc* arch; + LinkSymId* stub_map; + LinkSymId* targets = NULL; + u32 ntarget = 0, tcap = 0; + u32 ii, k, i; + u64 page; + u64 base_vaddr = 0; + u64 stubs_vaddr, slots_vaddr; + u64 stubs_size, slots_size; + u32 stubs_seg_idx, slots_seg_idx; + u32 seg_base, sec_base; + LinkSegment* stubs_seg; + LinkSegment* slots_seg; + LinkSection* stubs_sec; + LinkSection* slots_sec; + u8* stubs_bytes; + + *stub_map_out = NULL; + if (l->emit_static_exe) return; + if (l->c->target.arch != CFREE_ARCH_ARM_64) return; + + arch = link_arch_desc_for(l->c); + if (!arch) return; + + stub_map = (LinkSymId*)h->alloc(h, sizeof(*stub_map) * map_size, + _Alignof(LinkSymId)); + if (!stub_map) compiler_panic(img->c, no_loc(), "link: oom on stub map"); + memset(stub_map, 0, sizeof(*stub_map) * map_size); + + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; + u32 total = obj_reloc_total(ob); + if (!total) continue; + for (k = 0; k < total; ++k) { + const Reloc* r = obj_reloc_at(ob, k); + const Section* s = obj_section_get(ob, r->section_id); + LinkSymId target; + const LinkSymbol* tgt; + if (!s || !link_section_kept(s)) continue; + if (m->section[r->section_id] == LINK_SEC_NONE) continue; + if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue; + if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; + target = m->sym[r->sym]; + if (target == LINK_SYM_NONE) continue; + tgt = LinkSyms_at(&img->syms, target - 1); + if (!tgt || tgt->kind != SK_ABS) continue; + if (stub_map[target] != LINK_SYM_NONE) continue; + if (VEC_GROW(h, targets, tcap, ntarget + 1u)) + compiler_panic(img->c, no_loc(), "link: oom on stub target list"); + targets[ntarget] = target; + stub_map[target] = (LinkSymId)(ntarget + 1u); + ntarget++; + } + } + + if (ntarget == 0) { + if (targets) h->free(h, targets, sizeof(*targets) * tcap); + h->free(h, stub_map, sizeof(*stub_map) * map_size); + return; + } + for (i = 0; i < ntarget; ++i) stub_map[targets[i]] = LINK_SYM_NONE; + + page = link_layout_page_size(l); + for (i = 0; i < img->nsegments; ++i) { + u64 end = img->segments[i].vaddr + img->segments[i].mem_size; + if (end > base_vaddr) base_vaddr = end; + } + base_vaddr = ALIGN_UP(base_vaddr, (u64)page); + stubs_vaddr = base_vaddr; + stubs_size = (u64)ntarget * (u64)arch->iplt_stub_size; + slots_vaddr = ALIGN_UP(stubs_vaddr + stubs_size, (u64)page); + slots_size = (u64)ntarget * 8u; + + seg_base = link_iplt_alloc_segments(img, 2u); + stubs_seg_idx = seg_base + 0u; + slots_seg_idx = seg_base + 1u; + + stubs_seg = &img->segments[stubs_seg_idx]; + memset(stubs_seg, 0, sizeof(*stubs_seg)); + stubs_seg->id = (LinkSegmentId)(stubs_seg_idx + 1u); + stubs_seg->flags = SF_ALLOC | SF_EXEC; + stubs_seg->file_offset = stubs_vaddr; + stubs_seg->vaddr = stubs_vaddr; + stubs_seg->file_size = stubs_size; + stubs_seg->mem_size = stubs_size; + stubs_seg->align = (u32)page; + stubs_seg->nsections = 1; + img->segment_bytes[stubs_seg_idx] = (u8*)h->alloc(h, (size_t)stubs_size, 16); + img->segment_bytes_cap[stubs_seg_idx] = (size_t)stubs_size; + if (!img->segment_bytes[stubs_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on jit stubs bytes"); + memset(img->segment_bytes[stubs_seg_idx], 0, (size_t)stubs_size); + + slots_seg = &img->segments[slots_seg_idx]; + memset(slots_seg, 0, sizeof(*slots_seg)); + slots_seg->id = (LinkSegmentId)(slots_seg_idx + 1u); + slots_seg->flags = SF_ALLOC | SF_WRITE; + slots_seg->file_offset = slots_vaddr; + slots_seg->vaddr = slots_vaddr; + slots_seg->file_size = slots_size; + slots_seg->mem_size = slots_size; + slots_seg->align = (u32)page; + slots_seg->nsections = 1; + img->segment_bytes[slots_seg_idx] = (u8*)h->alloc(h, (size_t)slots_size, 16); + img->segment_bytes_cap[slots_seg_idx] = (size_t)slots_size; + if (!img->segment_bytes[slots_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on jit stub slots bytes"); + memset(img->segment_bytes[slots_seg_idx], 0, (size_t)slots_size); + img->nsegments += 2u; + + sec_base = link_iplt_alloc_sections(img, 2u); + stubs_sec = &img->sections[sec_base + 0u]; + memset(stubs_sec, 0, sizeof(*stubs_sec)); + stubs_sec->id = (LinkSectionId)(sec_base + 0u + 1u); + stubs_sec->input_id = LINK_INPUT_NONE; + stubs_sec->obj_section_id = OBJ_SEC_NONE; + stubs_sec->segment_id = stubs_seg->id; + stubs_sec->input_offset = 0; + stubs_sec->file_offset = stubs_vaddr; + stubs_sec->vaddr = stubs_vaddr; + stubs_sec->size = stubs_size; + stubs_sec->flags = SF_ALLOC | SF_EXEC; + stubs_sec->align = 4; + stubs_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_stubs"); + stubs_sec->sem = SSEM_PROGBITS; + + slots_sec = &img->sections[sec_base + 1u]; + memset(slots_sec, 0, sizeof(*slots_sec)); + slots_sec->id = (LinkSectionId)(sec_base + 1u + 1u); + slots_sec->input_id = LINK_INPUT_NONE; + slots_sec->obj_section_id = OBJ_SEC_NONE; + slots_sec->segment_id = slots_seg->id; + slots_sec->input_offset = 0; + slots_sec->file_offset = slots_vaddr; + slots_sec->vaddr = slots_vaddr; + slots_sec->size = slots_size; + slots_sec->flags = SF_ALLOC | SF_WRITE; + slots_sec->align = 8; + slots_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_slots"); + slots_sec->sem = SSEM_PROGBITS; + img->nsections += 2u; + + stubs_bytes = img->segment_bytes[stubs_seg_idx]; + for (i = 0; i < ntarget; ++i) { + LinkSymId orig = targets[i]; + LinkSymbol* orig_sym = LinkSyms_at(&img->syms, orig - 1); + u64 stub_vaddr = stubs_vaddr + (u64)i * (u64)arch->iplt_stub_size; + u64 slot_vaddr = slots_vaddr + (u64)i * 8u; + LinkSymbol slot_rec, resolver_rec, stub_rec; + LinkSymId slot_id, resolver_id, stub_id; + LinkArchIPltReloc stub_relocs[2]; + u32 nstub_relocs; + LinkRelocApply rrec; + u8* stub_dst = stubs_bytes + (size_t)i * (size_t)arch->iplt_stub_size; + u32 ri; + + nstub_relocs = + arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, stub_relocs); + + memset(&slot_rec, 0, sizeof(slot_rec)); + slot_rec.kind = SK_OBJ; + slot_rec.bind = SB_LOCAL; + slot_rec.defined = 1; + slot_rec.section_id = slots_sec->id; + slot_rec.vaddr = slot_vaddr; + slot_rec.size = 8; + slot_id = link_append_symbol(img, &slot_rec); + + memset(&resolver_rec, 0, sizeof(resolver_rec)); + resolver_rec.kind = SK_ABS; + resolver_rec.bind = SB_LOCAL; + resolver_rec.defined = 1; + resolver_rec.vaddr = orig_sym->vaddr; + resolver_id = link_append_symbol(img, &resolver_rec); + + memset(&stub_rec, 0, sizeof(stub_rec)); + stub_rec.kind = SK_FUNC; + stub_rec.bind = SB_LOCAL; + stub_rec.defined = 1; + stub_rec.section_id = stubs_sec->id; + stub_rec.vaddr = stub_vaddr; + stub_rec.size = arch->iplt_stub_size; + stub_id = link_append_symbol(img, &stub_rec); + stub_map[orig] = stub_id; + + for (ri = 0; ri < nstub_relocs; ++ri) { + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = stubs_sec->id; + rrec.offset = (u32)(i * arch->iplt_stub_size) + + stub_relocs[ri].offset_in_stub; + rrec.width = stub_relocs[ri].width; + rrec.write_vaddr = stub_vaddr + stub_relocs[ri].offset_in_stub; + rrec.write_file_offset = rrec.write_vaddr; + rrec.kind = stub_relocs[ri].kind; + rrec.target = slot_id; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + } + + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = slots_sec->id; + rrec.offset = (u32)(i * 8u); + rrec.width = 8; + rrec.write_vaddr = slot_vaddr; + rrec.write_file_offset = slot_vaddr; + rrec.kind = R_ABS64; + rrec.target = resolver_id; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + } + + if (targets) h->free(h, targets, sizeof(*targets) * tcap); + *stub_map_out = stub_map; +} + +/* ---- pass 3c: GOT layout ---- */ + +void link_layout_got(Linker* l, LinkImage* img, u32 map_size, + LinkSymId** got_map_out) { + Heap* h = img->heap; + LinkSymId* got_map; + LinkSymId* slot_targets = NULL; + u32 slot_cap = 0; + u32 nslot = 0; + u32 ii, j, k; + u64 page; + u64 base_vaddr = 0; + u64 got_size; + LinkSegment* gotseg; + LinkSection* gotsec; + u32 gotseg_idx; + u32 si; + + *got_map_out = NULL; + + got_map = (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size, + _Alignof(LinkSymId)); + if (!got_map) compiler_panic(img->c, no_loc(), "link: oom on got map"); + memset(got_map, 0, sizeof(*got_map) * map_size); + + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; + u32 total = obj_reloc_total(ob); + if (!total) continue; + for (k = 0; k < total; ++k) { + const Reloc* r = obj_reloc_at(ob, k); + const Section* s = obj_section_get(ob, r->section_id); + LinkSymId target; + if (!s || !link_section_kept(s)) continue; + if (m->section[r->section_id] == LINK_SEC_NONE) continue; + if (!reloc_uses_got(r->kind)) continue; + if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; + target = m->sym[r->sym]; + if (target == LINK_SYM_NONE) continue; + if (got_map[target] != LINK_SYM_NONE) continue; + if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u)) + compiler_panic(img->c, no_loc(), "link: oom on got slot list"); + slot_targets[nslot] = target; + got_map[target] = (LinkSymId)(nslot + 1u); + nslot++; + } + } + + if (nslot == 0) { + if (slot_targets) + h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); + h->free(h, got_map, sizeof(*got_map) * map_size); + return; + } + + for (si = 0; si < nslot; ++si) got_map[slot_targets[si]] = LINK_SYM_NONE; + + page = link_layout_page_size(l); + for (j = 0; j < img->nsegments; ++j) { + u64 end = img->segments[j].vaddr + img->segments[j].mem_size; + if (end > base_vaddr) base_vaddr = end; + } + base_vaddr = ALIGN_UP(base_vaddr, (u64)(page)); + got_size = (u64)nslot * 8u; + + { + u32 new_nseg = img->nsegments + 1u; + LinkSegment* nsegs = (LinkSegment*)h->realloc( + h, img->segments, sizeof(*img->segments) * img->nsegments, + sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); + u8** nsbufs = (u8**)h->realloc( + h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, + sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); + size_t* nscaps = (size_t*)h->realloc( + h, img->segment_bytes_cap, + sizeof(*img->segment_bytes_cap) * img->nsegments, + sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); + if (!nsegs || !nsbufs || !nscaps) + compiler_panic(img->c, no_loc(), "link: oom on got segment"); + img->segments = nsegs; + img->segment_bytes = nsbufs; + img->segment_bytes_cap = nscaps; + } + + gotseg_idx = img->nsegments; + gotseg = &img->segments[gotseg_idx]; + memset(gotseg, 0, sizeof(*gotseg)); + gotseg->id = (LinkSegmentId)(gotseg_idx + 1u); + gotseg->flags = SF_ALLOC | SF_WRITE; + gotseg->file_offset = base_vaddr; + gotseg->vaddr = base_vaddr; + gotseg->file_size = got_size; + gotseg->mem_size = got_size; + gotseg->align = (u32)page; + gotseg->nsections = 1; + + img->segment_bytes[gotseg_idx] = (u8*)h->alloc(h, (size_t)got_size, 16); + img->segment_bytes_cap[gotseg_idx] = (size_t)got_size; + if (!img->segment_bytes[gotseg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on got bytes"); + memset(img->segment_bytes[gotseg_idx], 0, (size_t)got_size); + img->nsegments++; + + { + u32 new_nsec = img->nsections + 1u; + LinkSection* nsections = (LinkSection*)h->realloc( + h, img->sections, sizeof(*img->sections) * img->nsections, + sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); + if (!nsections) + compiler_panic(img->c, no_loc(), "link: oom on got section"); + img->sections = nsections; + } + gotsec = &img->sections[img->nsections]; + memset(gotsec, 0, sizeof(*gotsec)); + gotsec->id = (LinkSectionId)(img->nsections + 1u); + gotsec->input_id = LINK_INPUT_NONE; + gotsec->obj_section_id = OBJ_SEC_NONE; + gotsec->segment_id = gotseg->id; + gotsec->input_offset = 0; + gotsec->file_offset = base_vaddr; + gotsec->vaddr = base_vaddr; + gotsec->size = got_size; + gotsec->flags = SF_ALLOC | SF_WRITE; + gotsec->align = 8; + gotsec->name = pool_intern_cstr(img->c->global, ".got"); + gotsec->sem = SSEM_PROGBITS; + img->nsections++; + + for (si = 0; si < nslot; ++si) { + LinkSymId orig = slot_targets[si]; + u64 slot_vaddr = base_vaddr + (u64)si * 8u; + LinkSymbol sym_rec; + LinkRelocApply rrec; + LinkSymId slot_id; + + memset(&sym_rec, 0, sizeof(sym_rec)); + sym_rec.name = 0; + sym_rec.kind = SK_OBJ; + sym_rec.bind = SB_LOCAL; + sym_rec.defined = 1; + sym_rec.section_id = gotsec->id; + sym_rec.vaddr = slot_vaddr; + sym_rec.size = 8; + slot_id = link_append_symbol(img, &sym_rec); + got_map[orig] = slot_id; + + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = gotsec->id; + rrec.offset = (u32)(si * 8u); + rrec.width = 8; + rrec.write_vaddr = slot_vaddr; + rrec.write_file_offset = base_vaddr + (u64)si * 8u; + rrec.kind = R_ABS64; + rrec.target = orig; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + } + + if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); + + *got_map_out = got_map; +} + +/* ---- pass 3d: STT_GNU_IFUNC trampoline ---- */ + +void link_layout_iplt(Linker* l, LinkImage* img) { + Heap* h = img->heap; + u32 i; + u32 nifunc = 0; + u64 page; + u64 base_vaddr = 0; + u64 iplt_vaddr, igot_vaddr, pairs_vaddr; + u64 iplt_size, igot_size, pairs_size; + u64 init_vaddr = 0, init_size = 0; + u32 iplt_seg_idx, igot_seg_idx, pairs_seg_idx; + u32 init_seg_idx = 0; + u32 seg_base, sec_base; + LinkSegment* iplt_seg; + LinkSegment* igot_seg; + LinkSegment* pairs_seg; + LinkSegment* init_seg = NULL; + LinkSection* iplt_sec; + LinkSection* igot_sec; + LinkSection* pairs_sec; + LinkSection* init_sec = NULL; + u8* iplt_bytes; + u32 slot_idx; + int emit_init_array = l->emit_static_exe; + LinkSymId ifunc_init_sym = LINK_SYM_NONE; + Sym ifunc_init_name = 0; + Sym pairs_section_name; + Sym init_section_name; + const LinkArchDesc* arch = link_arch_desc_for(l->c); + if (!arch) + compiler_panic(img->c, no_loc(), + "link: layout_iplt: no arch descriptor for arch %u", + (u32)l->c->target.arch); + + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + const LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->kind != SK_IFUNC || !s->defined) continue; + if (s->name != 0) { + LinkSymId canonical = symhash_get(&img->globals, s->name); + if (canonical != LINK_SYM_NONE && canonical != s->id) continue; + } + ++nifunc; + } + if (nifunc == 0) return; + + page = link_layout_page_size(l); + + for (i = 0; i < img->nsegments; ++i) { + u64 end = img->segments[i].vaddr + img->segments[i].mem_size; + if (end > base_vaddr) base_vaddr = end; + } + + base_vaddr = ALIGN_UP(base_vaddr, (u64)(page)); + iplt_vaddr = base_vaddr; + iplt_size = (u64)nifunc * (u64)arch->iplt_stub_size; + igot_vaddr = ALIGN_UP(iplt_vaddr + iplt_size, (u64)(page)); + igot_size = (u64)nifunc * 8u; + pairs_vaddr = ALIGN_UP(igot_vaddr + igot_size, (u64)(page)); + pairs_size = (u64)nifunc * 16u; + + if (emit_init_array) { + ifunc_init_name = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); + ifunc_init_sym = symhash_get(&img->globals, ifunc_init_name); + if (ifunc_init_sym == LINK_SYM_NONE || + !LinkSyms_at(&img->syms, ifunc_init_sym - 1)->defined) { + compiler_panic(img->c, no_loc(), + "link: STT_GNU_IFUNC requires '__cfree_ifunc_init' " + "to be defined (link in libcfree_rt.a or provide " + "your own implementation)"); + } + init_vaddr = ALIGN_UP(pairs_vaddr + pairs_size, (u64)(page)); + init_size = 8u; + } + + { + u32 nseg = emit_init_array ? 4u : 3u; + seg_base = link_iplt_alloc_segments(img, nseg); + } + iplt_seg_idx = seg_base + 0u; + igot_seg_idx = seg_base + 1u; + pairs_seg_idx = seg_base + 2u; + if (emit_init_array) init_seg_idx = seg_base + 3u; + + iplt_seg = &img->segments[iplt_seg_idx]; + memset(iplt_seg, 0, sizeof(*iplt_seg)); + iplt_seg->id = (LinkSegmentId)(iplt_seg_idx + 1u); + iplt_seg->flags = SF_ALLOC | SF_EXEC; + iplt_seg->file_offset = iplt_vaddr; + iplt_seg->vaddr = iplt_vaddr; + iplt_seg->file_size = iplt_size; + iplt_seg->mem_size = iplt_size; + iplt_seg->align = (u32)page; + iplt_seg->nsections = 1; + img->segment_bytes[iplt_seg_idx] = (u8*)h->alloc(h, (size_t)iplt_size, 16); + img->segment_bytes_cap[iplt_seg_idx] = (size_t)iplt_size; + if (!img->segment_bytes[iplt_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on iplt bytes"); + memset(img->segment_bytes[iplt_seg_idx], 0, (size_t)iplt_size); + + igot_seg = &img->segments[igot_seg_idx]; + memset(igot_seg, 0, sizeof(*igot_seg)); + igot_seg->id = (LinkSegmentId)(igot_seg_idx + 1u); + igot_seg->flags = SF_ALLOC | SF_WRITE; + igot_seg->file_offset = igot_vaddr; + igot_seg->vaddr = igot_vaddr; + igot_seg->file_size = igot_size; + igot_seg->mem_size = igot_size; + igot_seg->align = (u32)page; + igot_seg->nsections = 1; + img->segment_bytes[igot_seg_idx] = (u8*)h->alloc(h, (size_t)igot_size, 16); + img->segment_bytes_cap[igot_seg_idx] = (size_t)igot_size; + if (!img->segment_bytes[igot_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on igot bytes"); + memset(img->segment_bytes[igot_seg_idx], 0, (size_t)igot_size); + + pairs_seg = &img->segments[pairs_seg_idx]; + memset(pairs_seg, 0, sizeof(*pairs_seg)); + pairs_seg->id = (LinkSegmentId)(pairs_seg_idx + 1u); + pairs_seg->flags = SF_ALLOC | SF_WRITE; + pairs_seg->file_offset = pairs_vaddr; + pairs_seg->vaddr = pairs_vaddr; + pairs_seg->file_size = pairs_size; + pairs_seg->mem_size = pairs_size; + pairs_seg->align = (u32)page; + pairs_seg->nsections = 1; + img->segment_bytes[pairs_seg_idx] = (u8*)h->alloc(h, (size_t)pairs_size, 16); + img->segment_bytes_cap[pairs_seg_idx] = (size_t)pairs_size; + if (!img->segment_bytes[pairs_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on iplt.pairs bytes"); + memset(img->segment_bytes[pairs_seg_idx], 0, (size_t)pairs_size); + + if (emit_init_array) { + init_seg = &img->segments[init_seg_idx]; + memset(init_seg, 0, sizeof(*init_seg)); + init_seg->id = (LinkSegmentId)(init_seg_idx + 1u); + init_seg->flags = SF_ALLOC | SF_WRITE; + init_seg->file_offset = init_vaddr; + init_seg->vaddr = init_vaddr; + init_seg->file_size = init_size; + init_seg->mem_size = init_size; + init_seg->align = (u32)page; + init_seg->nsections = 1; + img->segment_bytes[init_seg_idx] = (u8*)h->alloc(h, (size_t)init_size, 16); + img->segment_bytes_cap[init_seg_idx] = (size_t)init_size; + if (!img->segment_bytes[init_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on iplt init_array bytes"); + memset(img->segment_bytes[init_seg_idx], 0, (size_t)init_size); + } + img->nsegments += emit_init_array ? 4u : 3u; + + { + u32 nsec = emit_init_array ? 4u : 3u; + sec_base = link_iplt_alloc_sections(img, nsec); + } + + pairs_section_name = pool_intern_cstr(l->c->global, ".iplt.pairs"); + init_section_name = obj_secname_preinit_array(l->c); + + iplt_sec = &img->sections[sec_base + 0u]; + memset(iplt_sec, 0, sizeof(*iplt_sec)); + iplt_sec->id = (LinkSectionId)(sec_base + 0u + 1u); + iplt_sec->input_id = LINK_INPUT_NONE; + iplt_sec->obj_section_id = OBJ_SEC_NONE; + iplt_sec->segment_id = iplt_seg->id; + iplt_sec->input_offset = 0; + iplt_sec->file_offset = iplt_vaddr; + iplt_sec->vaddr = iplt_vaddr; + iplt_sec->size = iplt_size; + iplt_sec->flags = SF_ALLOC | SF_EXEC; + iplt_sec->align = 4; + iplt_sec->name = pool_intern_cstr(l->c->global, ".iplt"); + iplt_sec->sem = SSEM_PROGBITS; + + igot_sec = &img->sections[sec_base + 1u]; + memset(igot_sec, 0, sizeof(*igot_sec)); + igot_sec->id = (LinkSectionId)(sec_base + 1u + 1u); + igot_sec->input_id = LINK_INPUT_NONE; + igot_sec->obj_section_id = OBJ_SEC_NONE; + igot_sec->segment_id = igot_seg->id; + igot_sec->input_offset = 0; + igot_sec->file_offset = igot_vaddr; + igot_sec->vaddr = igot_vaddr; + igot_sec->size = igot_size; + igot_sec->flags = SF_ALLOC | SF_WRITE; + igot_sec->align = 8; + igot_sec->name = pool_intern_cstr(l->c->global, ".igot.plt"); + igot_sec->sem = SSEM_PROGBITS; + + pairs_sec = &img->sections[sec_base + 2u]; + memset(pairs_sec, 0, sizeof(*pairs_sec)); + pairs_sec->id = (LinkSectionId)(sec_base + 2u + 1u); + pairs_sec->input_id = LINK_INPUT_NONE; + pairs_sec->obj_section_id = OBJ_SEC_NONE; + pairs_sec->segment_id = pairs_seg->id; + pairs_sec->input_offset = 0; + pairs_sec->file_offset = pairs_vaddr; + pairs_sec->vaddr = pairs_vaddr; + pairs_sec->size = pairs_size; + pairs_sec->flags = SF_ALLOC | SF_WRITE; + pairs_sec->align = 8; + pairs_sec->name = pairs_section_name; + pairs_sec->sem = SSEM_PROGBITS; + + if (emit_init_array) { + init_sec = &img->sections[sec_base + 3u]; + memset(init_sec, 0, sizeof(*init_sec)); + init_sec->id = (LinkSectionId)(sec_base + 3u + 1u); + init_sec->input_id = LINK_INPUT_NONE; + init_sec->obj_section_id = OBJ_SEC_NONE; + init_sec->segment_id = init_seg->id; + init_sec->input_offset = 0; + init_sec->file_offset = init_vaddr; + init_sec->vaddr = init_vaddr; + init_sec->size = init_size; + init_sec->flags = SF_ALLOC | SF_WRITE; + init_sec->align = 8; + init_sec->name = init_section_name; + init_sec->sem = SSEM_PREINIT_ARRAY; + } + img->nsections += emit_init_array ? 4u : 3u; + + link_emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr); + link_emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size); + + img->iplt_pairs = (u64*)h->alloc( + h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64)); + if (!img->iplt_pairs) + compiler_panic(img->c, no_loc(), "link: oom on iplt pairs"); + img->niplt = nifunc; + + iplt_bytes = img->segment_bytes[iplt_seg_idx]; + slot_idx = 0; + + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + u64 stub_vaddr; + u64 slot_vaddr; + u64 pair_vaddr; + u64 resolver_vaddr; + LinkSectionId resolver_section; + u64 resolver_value; + LinkSymbol slot_rec; + LinkSymbol resolver_rec; + LinkSymId slot_id; + LinkSymId resolver_id; + LinkRelocApply rrec; + u8* stub_dst; + + if (s->kind != SK_IFUNC || !s->defined) continue; + if (s->name != 0) { + LinkSymId canonical = symhash_get(&img->globals, s->name); + if (canonical != LINK_SYM_NONE && canonical != s->id) continue; + } + + stub_vaddr = iplt_vaddr + (u64)slot_idx * 12u; + slot_vaddr = igot_vaddr + (u64)slot_idx * 8u; + pair_vaddr = pairs_vaddr + (u64)slot_idx * 16u; + resolver_vaddr = s->vaddr; + resolver_section = s->section_id; + resolver_value = s->value; + + img->iplt_pairs[2u * slot_idx + 0] = resolver_vaddr; + img->iplt_pairs[2u * slot_idx + 1] = slot_vaddr; + + stub_dst = iplt_bytes + (size_t)slot_idx * (size_t)arch->iplt_stub_size; + LinkArchIPltReloc iplt_relocs[2]; + u32 niplt_relocs = + arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, iplt_relocs); + + memset(&slot_rec, 0, sizeof(slot_rec)); + slot_rec.name = 0; + slot_rec.kind = SK_OBJ; + slot_rec.bind = SB_LOCAL; + slot_rec.defined = 1; + slot_rec.section_id = igot_sec->id; + slot_rec.vaddr = slot_vaddr; + slot_rec.size = 8; + slot_id = link_append_symbol(img, &slot_rec); + + memset(&resolver_rec, 0, sizeof(resolver_rec)); + resolver_rec.name = 0; + resolver_rec.kind = SK_FUNC; + resolver_rec.bind = SB_LOCAL; + resolver_rec.defined = 1; + resolver_rec.section_id = resolver_section; + resolver_rec.value = resolver_value; + resolver_rec.vaddr = resolver_vaddr; + resolver_rec.size = 0; + resolver_id = link_append_symbol(img, &resolver_rec); + + { + u32 ri; + for (ri = 0; ri < niplt_relocs; ++ri) { + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = iplt_sec->id; + rrec.offset = (u32)(slot_idx * arch->iplt_stub_size) + + iplt_relocs[ri].offset_in_stub; + rrec.width = iplt_relocs[ri].width; + rrec.write_vaddr = stub_vaddr + iplt_relocs[ri].offset_in_stub; + rrec.write_file_offset = rrec.write_vaddr; + rrec.kind = iplt_relocs[ri].kind; + rrec.target = slot_id; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + } + } + + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = pairs_sec->id; + rrec.offset = (u32)(slot_idx * 16u); + rrec.width = 8; + rrec.write_vaddr = pair_vaddr; + rrec.write_file_offset = pair_vaddr; + rrec.kind = R_ABS64; + rrec.target = resolver_id; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = pairs_sec->id; + rrec.offset = (u32)(slot_idx * 16u + 8u); + rrec.width = 8; + rrec.write_vaddr = pair_vaddr + 8u; + rrec.write_file_offset = pair_vaddr + 8u; + rrec.kind = R_ABS64; + rrec.target = slot_id; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + + s->kind = SK_FUNC; + s->section_id = iplt_sec->id; + s->value = (u64)slot_idx * (u64)arch->iplt_stub_size; + s->vaddr = stub_vaddr; + s->size = arch->iplt_stub_size; + + ++slot_idx; + } + + if (emit_init_array) { + LinkRelocApply rrec; + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = init_sec->id; + rrec.offset = 0; + rrec.width = 8; + rrec.write_vaddr = init_vaddr; + rrec.write_file_offset = init_vaddr; + rrec.kind = R_ABS64; + rrec.target = ifunc_init_sym; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; + } + + { + u32 n = LinkSyms_count(&img->syms); + for (i = 0; i < n; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + LinkSymId canonical; + LinkSymbol* def; + if (s->name == 0) continue; + canonical = symhash_get(&img->globals, s->name); + if (canonical == LINK_SYM_NONE || canonical == s->id) continue; + def = LinkSyms_at(&img->syms, canonical - 1); + if (def->section_id != iplt_sec->id) continue; + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->size = def->size; + s->defined = 1; + } + } +} + +/* ---- entry symbol ---- */ + +void link_resolve_entry(Linker* l, LinkImage* img) { + LinkSymId id; + LinkSymbol* s; + if (l->entry_name == 0) return; + id = symhash_get(&img->globals, l->entry_name); + if (id == LINK_SYM_NONE) { + size_t namelen; + const char* nm = pool_str(l->c->global, l->entry_name, &namelen); + compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' not defined", + (int)namelen, nm); + } + s = LinkSyms_at(&img->syms, id - 1); + if (!s->defined) { + size_t namelen; + const char* nm = pool_str(l->c->global, l->entry_name, &namelen); + compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' is undefined", + (int)namelen, nm); + } + img->entry_sym = id; +} + +/* ---- pass 4: emit reloc records ---- */ + +void link_emit_relocations(Linker* l, LinkImage* img, + const LinkSymId* got_map, + const LinkSymId* stub_map) { + u32 ii; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; + u32 total = obj_reloc_total(ob); + u32 k; + if (total == 0) continue; + for (k = 0; k < total; ++k) { + const Reloc* r = obj_reloc_at(ob, k); + const Section* s = obj_section_get(ob, r->section_id); + LinkSymId target; + LinkSection* ls; + LinkRelocApply rec; + if (!s || !link_section_kept(s)) continue; + if (m->section[r->section_id] == LINK_SEC_NONE) continue; + if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD || + r->kind == R_RV_ALIGN) + continue; + if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) + compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol"); + target = m->sym[r->sym]; + if (target == LINK_SYM_NONE) + compiler_panic(l->c, no_loc(), + "link: reloc references unmapped symbol"); + if (got_map && reloc_uses_got(r->kind)) { + LinkSymId slot = got_map[target]; + if (slot == LINK_SYM_NONE) + compiler_panic(l->c, no_loc(), "link: GOT slot missing for symbol"); + target = slot; + } + if (stub_map && (r->kind == R_AARCH64_CALL26 || + r->kind == R_AARCH64_JUMP26)) { + LinkSymId stub = stub_map[target]; + if (stub != LINK_SYM_NONE) target = stub; + } + ls = &img->sections[m->section[r->section_id] - 1]; + memset(&rec, 0, sizeof(rec)); + rec.input_id = LinkInputs_at(&l->inputs, ii)->id; + rec.section_id = r->section_id; + rec.link_section_id = ls->id; + rec.offset = r->offset; + rec.width = reloc_width((RelocKind)r->kind); + rec.write_vaddr = ls->vaddr + r->offset; + rec.write_file_offset = ls->file_offset + r->offset; + rec.kind = (RelocKind)r->kind; + rec.target = target; + rec.addend = r->addend; + if (rec.width == 0) + compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u", + (unsigned)r->kind); + *link_append_reloc_slot(img) = rec; + } + } +} diff --git a/src/link/link_resolve.c b/src/link/link_resolve.c @@ -0,0 +1,597 @@ +/* link_resolve.c — archive ingest, symbol resolution, --gc-sections liveness. + * + * Phase 1 of the link pipeline: + * link_ingest_archives — pull archive members into l->inputs + * link_resolve_symbols — register every ObjSym, build img->globals + * link_resolve_undefs — satisfy remaining undefs (globals/DSOs/resolver) + * link_gc_compute — mark live sections (or mark all live if disabled) + * link_gc_drop_dead_globals — clear `defined` on syms in dropped sections + */ + +#include <cfree.h> +#include <string.h> + +#include "core/buf.h" +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link.h" +#include "link/link_arch.h" +#include "link/link_internal.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- per-input symbol/section maps ---- */ + +static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) { + Heap* h = img->heap; + m->nsym = nsym; + m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId)); + if (!m->sym) + compiler_panic(img->c, no_loc(), "link: oom on input symbol map"); + memset(m->sym, 0, sizeof(*m->sym) * nsym); + m->nsection = nsection; + m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection, + _Alignof(LinkSectionId)); + if (!m->section) + compiler_panic(img->c, no_loc(), "link: oom on input section map"); + memset(m->section, 0, sizeof(*m->section) * nsection); +} + +/* ---- pass 1: collect symbols ---- */ + +/* Defined-symbol replacement policy: a stronger binding wins. */ +static int bind_strength(u8 bind) { + switch (bind) { + case SB_GLOBAL: + return 3; + case SB_WEAK: + return 2; + case SB_LOCAL: + return 1; + default: + return 0; + } +} + +void link_resolve_symbols(Linker* l, LinkImage* img) { + u32 ii; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + LinkInput* in = LinkInputs_at(&l->inputs, ii); + ObjBuilder* ob = in->obj; + InputMap* m = &img->input_maps[ii]; + u32 nsym = obj_section_count(ob); + (void)nsym; + ObjSymIter* it; + ObjSymEntry e; + + if (in->kind == LINK_INPUT_DSO_BYTES) continue; + + u32 nsyms_in_input = 0; + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) ++nsyms_in_input; + obj_symiter_free(it); + + map_alloc(img, m, nsyms_in_input + 1u /* +1 for id-0 slot */, + obj_section_count(ob)); + + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + LinkSymbol rec; + LinkSymId existing; + { + int is_logical_undef = (s->section_id == OBJ_SEC_NONE) && + (s->kind != SK_ABS) && (s->kind != SK_COMMON); + if (is_logical_undef && !s->referenced && + (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { + continue; + } + } + int is_def = (s->kind != SK_UNDEF) && + (s->kind == SK_ABS || s->kind == SK_COMMON || + s->kind == SK_FILE || + s->section_id != OBJ_SEC_NONE); + + memset(&rec, 0, sizeof(rec)); + rec.name = s->name; + rec.input_id = in->id; + rec.obj_sym = e.id; + rec.section_id = LINK_SEC_NONE; + rec.value = s->value; + rec.size = s->size; + rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u; + rec.bind = (u8)s->bind; + rec.kind = (u8)s->kind; + rec.defined = (u8)is_def; + rec.vaddr = 0; + + if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && + s->name != 0) { + LinkSymId fresh = (LinkSymId)(LinkSyms_count(&img->syms) + 1u); + if (symhash_insert(&img->globals, s->name, fresh, &existing)) { + m->sym[e.id] = link_append_symbol(img, &rec); + } else { + LinkSymbol* prev = LinkSyms_at(&img->syms, existing - 1); + int new_strength = bind_strength((u8)s->bind); + int old_strength = bind_strength(prev->bind); + if (prev->kind == SK_COMMON && rec.kind == SK_COMMON) { + if (rec.size > prev->size) { + u32 new_align = (rec.common_align > prev->common_align) + ? rec.common_align + : prev->common_align; + rec.id = existing; + rec.common_align = new_align; + *prev = rec; + } + m->sym[e.id] = existing; + } else if (rec.kind == SK_COMMON) { + m->sym[e.id] = existing; + } else if (prev->kind == SK_COMMON) { + rec.id = existing; + *prev = rec; + m->sym[e.id] = existing; + } else if (new_strength > old_strength) { + rec.id = existing; + *prev = rec; + m->sym[e.id] = existing; + } else if (new_strength == old_strength && + new_strength == bind_strength(SB_GLOBAL)) { + size_t namelen; + const char* nm = pool_str(l->c->global, s->name, &namelen); + compiler_panic(l->c, no_loc(), + "link: duplicate definition of " + "global symbol '%.*s'", + (int)namelen, nm); + } else { + m->sym[e.id] = existing; + } + } + } else { + m->sym[e.id] = link_append_symbol(img, &rec); + } + } + obj_symiter_free(it); + } +} + +/* Search DSO inputs for an exported symbol matching `name`. */ +static LinkInputId find_dso_export(Linker* l, Sym name) { + u32 ii; + ObjSymIter* it; + ObjSymEntry e; + if (name == 0) return LINK_INPUT_NONE; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + LinkInput* in = LinkInputs_at(&l->inputs, ii); + if (in->kind != LINK_INPUT_DSO_BYTES) continue; + it = obj_symiter_new(in->obj); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->name != name) continue; + if (s->kind == SK_UNDEF) continue; + if (s->bind == SB_LOCAL) continue; + obj_symiter_free(it); + return in->id; + } + obj_symiter_free(it); + } + return LINK_INPUT_NONE; +} + +void link_resolve_undefs(Linker* l, LinkImage* img) { + u32 i; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->defined) continue; + if (s->name != 0) { + LinkSymId hit = symhash_get(&img->globals, s->name); + if (hit != LINK_SYM_NONE && hit != s->id) { + LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); + if (def->defined) { + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->bind = def->bind; + s->defined = 1; + continue; + } + } + } + if (s->name != 0) { + LinkInputId dso = find_dso_export(l, s->name); + if (dso != LINK_INPUT_NONE) { + s->imported = 1; + s->dso_input_id = dso; + continue; + } + } + if (l->resolver && s->name != 0) { + size_t namelen; + const char* nm = pool_str(l->c->global, s->name, &namelen); + (void)namelen; + void* p = l->resolver(l->resolver_user, nm); + if (p) { + s->kind = SK_ABS; + s->vaddr = (u64)(uintptr_t)p; + s->defined = 1; + continue; + } + } + if (s->bind == SB_WEAK) { + s->kind = SK_ABS; + s->vaddr = 0; + s->defined = 1; + continue; + } + { + size_t namelen; + const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen) + : (namelen = 0, ""); + obj_format_demangle_c(l->c, &nm, &namelen); + compiler_panic(l->c, no_loc(), "link: undefined reference to '%.*s'", + (int)namelen, nm); + } + } +} + +/* ---- pass 1b: --gc-sections liveness ---- */ + +#define GC_PACK(ii, j) (((u64)(u32)(ii) << 32) | (u32)(j)) +#define GC_II(p) ((u32)((p) >> 32)) +#define GC_J(p) ((ObjSecId)((p) & 0xffffffffu)) + +static void gc_queue_push(GcQueue* q, Heap* h, u32 ii, ObjSecId j) { + if (VEC_GROW(h, q->items, q->cap, q->n + 1u)) + return; + q->items[q->n++] = GC_PACK(ii, j); +} + +void link_gc_live_alloc(GcLive* g, Linker* l, Heap* h) { + u32 ii; + g->ninputs = LinkInputs_count(&l->inputs); + g->marks = + LinkInputs_count(&l->inputs) + ? (u8**)h->alloc(h, sizeof(*g->marks) * LinkInputs_count(&l->inputs), + _Alignof(u8*)) + : NULL; + g->nsec = + LinkInputs_count(&l->inputs) + ? (u32*)h->alloc(h, sizeof(*g->nsec) * LinkInputs_count(&l->inputs), + _Alignof(u32)) + : NULL; + if (LinkInputs_count(&l->inputs) && (!g->marks || !g->nsec)) + compiler_panic(l->c, no_loc(), "link: oom on gc live map"); + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + u32 nsec = obj_section_count(LinkInputs_at(&l->inputs, ii)->obj); + g->nsec[ii] = nsec; + g->marks[ii] = (u8*)h->alloc(h, nsec ? nsec : 1u, 1); + if (!g->marks[ii]) compiler_panic(l->c, no_loc(), "link: oom on gc marks"); + memset(g->marks[ii], 0, nsec); + } +} + +void link_gc_live_free(GcLive* g, Heap* h) { + u32 ii; + if (g->marks) { + for (ii = 0; ii < g->ninputs; ++ii) + if (g->marks[ii]) + h->free(h, g->marks[ii], g->nsec[ii] ? g->nsec[ii] : 1u); + h->free(h, g->marks, sizeof(*g->marks) * g->ninputs); + } + if (g->nsec) h->free(h, g->nsec, sizeof(*g->nsec) * g->ninputs); +} + +int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j) { + if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return 0; + return g->marks[ii][j]; +} + +static void gc_mark(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjSecId j) { + if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return; + if (g->marks[ii][j]) return; + g->marks[ii][j] = 1; + gc_queue_push(q, h, ii, j); +} + +/* From a LinkSymId, find the (input_idx, obj_sec_id) of its defining section. + * Returns 1 on hit. */ +static int gc_def_site(LinkImage* img, Linker* l, LinkSymId id, u32* out_ii, + ObjSecId* out_sid) { + const LinkSymbol* s; + ObjBuilder* ob; + const ObjSym* osym; + if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return 0; + s = LinkSyms_at(&img->syms, id - 1); + if (!s->defined) { + LinkSymId hit; + if (s->name == 0) return 0; + hit = symhash_get(&img->globals, s->name); + if (hit == LINK_SYM_NONE || hit == s->id) return 0; + return gc_def_site(img, l, hit, out_ii, out_sid); + } + if (s->kind == SK_ABS || s->kind == SK_COMMON) return 0; + if (s->input_id == LINK_INPUT_NONE) return 0; + ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; + osym = obj_symbol_get(ob, s->obj_sym); + if (!osym || osym->section_id == OBJ_SEC_NONE) return 0; + *out_ii = (u32)(s->input_id - 1u); + *out_sid = osym->section_id; + return 1; +} + +/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. */ +int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off, + size_t* out_len, int* out_is_start) { + static const char START[] = "__start_"; + static const char STOP[] = "__stop_"; + size_t off, len, i; + int is_start; + if (n > sizeof(START) - 1u && memcmp(s, START, sizeof(START) - 1u) == 0) { + off = sizeof(START) - 1u; + is_start = 1; + } else if (n > sizeof(STOP) - 1u && memcmp(s, STOP, sizeof(STOP) - 1u) == 0) { + off = sizeof(STOP) - 1u; + is_start = 0; + } else { + return 0; + } + len = n - off; + if (len == 0) return 0; + { + char c = s[off]; + if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) + return 0; + } + for (i = 1; i < len; ++i) { + char c = s[off + i]; + if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9'))) + return 0; + } + *out_off = off; + *out_len = len; + if (out_is_start) *out_is_start = is_start; + return 1; +} + +static void gc_promote_by_section_name(Linker* l, GcLive* g, GcQueue* q, + Heap* h, Sym section_name) { + u32 ii, j; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + u32 nsec = obj_section_count(ob); + for (j = 1; j < nsec; ++j) { + const Section* s = obj_section_get(ob, j); + if (!s || !link_section_kept(s)) continue; + if (s->name != section_name) continue; + gc_mark(g, q, h, ii, j); + } + } +} + +void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) { + u32 ii, j, k; + GcQueue q; + Heap* h = img->heap; + + if (!l->gc_sections) { + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + u32 nsec = obj_section_count(ob); + for (j = 1; j < nsec; ++j) { + const Section* s = obj_section_get(ob, j); + if (s && link_section_kept(s)) g->marks[ii][j] = 1; + } + } + return; + } + + memset(&q, 0, sizeof(q)); + + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + u32 nsec = obj_section_count(ob); + for (j = 1; j < nsec; ++j) { + const Section* s = obj_section_get(ob, j); + int root; + if (!s || !link_section_kept(s)) continue; + root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY || + s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY; + if (root) gc_mark(g, &q, h, ii, j); + } + } + + if (l->entry_name != 0) { + LinkSymId id = symhash_get(&img->globals, l->entry_name); + u32 tii; + ObjSecId tsid; + if (gc_def_site(img, l, id, &tii, &tsid)) gc_mark(g, &q, h, tii, tsid); + } + + while (q.n > 0) { + u64 v = q.items[--q.n]; + u32 cii = GC_II(v); + ObjSecId cj = GC_J(v); + ObjBuilder* ob = LinkInputs_at(&l->inputs, cii)->obj; + InputMap* m = &img->input_maps[cii]; + u32 total = obj_reloc_total(ob); + (void)obj_section_count; + if (!total) continue; + for (k = 0; k < total; ++k) { + const Reloc* r = obj_reloc_at(ob, k); + LinkSymId target; + const LinkSymbol* tsym; + u32 tii; + ObjSecId tsid; + if (r->section_id != cj) continue; + if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; + target = m->sym[r->sym]; + if (target == LINK_SYM_NONE) continue; + tsym = LinkSyms_at(&img->syms, target - 1); + + if (tsym->name != 0) { + size_t namelen, off, ilen; + const char* nm = pool_str(l->c->global, tsym->name, &namelen); + if (link_gc_split_start_stop(nm, namelen, &off, &ilen, NULL)) { + Sym secname = pool_intern(l->c->global, nm + off, ilen); + gc_promote_by_section_name(l, g, &q, h, secname); + } + } + + if (gc_def_site(img, l, target, &tii, &tsid)) + gc_mark(g, &q, h, tii, tsid); + } + } + + if (q.items) h->free(h, q.items, sizeof(*q.items) * q.cap); +} + +void link_gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) { + u32 i; + if (!l->gc_sections) return; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + ObjBuilder* ob; + const ObjSym* osym; + ObjSecId osid; + if (!s->defined) continue; + if (s->kind == SK_ABS || s->kind == SK_COMMON) continue; + if (s->input_id == LINK_INPUT_NONE) continue; + ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; + osym = obj_symbol_get(ob, s->obj_sym); + if (!osym) continue; + osid = osym->section_id; + if (osid == OBJ_SEC_NONE) continue; + if (link_gc_live_get(g, (u32)(s->input_id - 1u), osid)) continue; + s->defined = 0; + s->vaddr = 0; + s->section_id = LINK_SEC_NONE; + } +} + +/* ---- archive ingestion ---- */ + +static void include_archive_member(Linker* l, LinkArchiveMember* mem) { + LinkInput* in; + LinkInputId id; + u32 idx; + if (mem->included) return; + in = LinkInputs_push(&l->inputs, &idx); + if (!in) + compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)"); + id = (LinkInputId)(idx + 1u); + in->id = id; + in->kind = LINK_INPUT_OBJ_BYTES; + in->obj = mem->obj; + in->name = mem->name; + mem->included = 1; + mem->obj = NULL; +} + +static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) { + u32 ii; + ObjSymIter* it; + ObjSymEntry e; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->name == 0) continue; + if (s->bind == SB_LOCAL) continue; + if (s->kind == SK_UNDEF) + symhash_set(undefs, s->name, 1u); + else + symhash_set(defined, s->name, 1u); + } + obj_symiter_free(it); + } +} + +static int inputs_have_defined_ifunc(Linker* l) { + u32 ii; + ObjSymIter* it; + ObjSymEntry e; + for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { + ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->kind == SK_IFUNC) { + obj_symiter_free(it); + return 1; + } + } + obj_symiter_free(it); + } + return 0; +} + +static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, + const SymHash* wanted) { + ObjSymIter* it; + ObjSymEntry e; + int hit = 0; + it = obj_symiter_new(mem->obj); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->name == 0) continue; + if (s->kind == SK_UNDEF) continue; + if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; + if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue; + if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue; + hit = 1; + break; + } + obj_symiter_free(it); + return hit; +} + +void link_ingest_archives(Linker* l) { + u32 a, m; + if (LinkArchives_count(&l->archives) == 0) return; + + for (a = 0; a < LinkArchives_count(&l->archives); ++a) { + LinkArchive* ar = LinkArchives_at(&l->archives, a); + if (!ar->whole_archive) continue; + for (m = 0; m < ar->nmembers; ++m) + include_archive_member(l, &ar->members[m]); + } + + Sym want_ifunc_init = 0; + if (l->emit_static_exe && inputs_have_defined_ifunc(l)) { + want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init"); + } + + for (;;) { + SymHash defined, undefs; + int changed = 0; + symhash_init(&defined, l->heap); + symhash_init(&undefs, l->heap); + scan_presence(l, &defined, &undefs); + if (want_ifunc_init != 0 && + symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) + symhash_set(&undefs, want_ifunc_init, 1u); + + for (a = 0; a < LinkArchives_count(&l->archives); ++a) { + LinkArchive* ar = LinkArchives_at(&l->archives, a); + if (ar->whole_archive) continue; + for (m = 0; m < ar->nmembers; ++m) { + LinkArchiveMember* mem = &ar->members[m]; + if (mem->included) continue; + if (!member_satisfies(mem, &defined, &undefs)) continue; + include_archive_member(l, mem); + changed = 1; + } + } + symhash_fini(&defined); + symhash_fini(&undefs); + if (!changed) break; + } +} diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -1,119 +1,28 @@ -/* C11 recursive-descent parser. No AST; the parser drives DeclTable for C - * declaration semantics and CG for executable code in a single pass. +/* parse.c — residual C11 parser core. * - * Module shape (DESIGN §5): - * - lex / pp produce a token stream; we keep one token of lookahead. - * - The parser maintains its own scope stack (block/file scope) for - * identifier resolution. DeclTable owns DeclId/ObjSymId allocation. - * - Statements drive CG: cg_func_begin/end, cg_local, cg_set_loc, - * cg_label_*, cg_branch_*, cg_jump, cg_ret. Expressions drive CG's - * value stack: cg_push_*, cg_load, cg_store, cg_binop, cg_cmp. - * - One Tok of lookahead is enough for C11; at decision points we use - * the keyword/punctuator directly. + * Contains: + * - kw_names[] table (used by parse_c to intern keywords) + * - Diagnostics/token helpers (perr, advance, peek1, fetch_tok, ...) + * - Scope/tag operations + * - Type helpers (ty_int, ty_size_t) + * - Local-variable slot allocation (make_local, make_local_aligned) + * - Static-local symbol naming (mint_static_local_sym) + * - Declaration driver (parse_init_declarator, parse_local_decl) + * - TU-level driver (parse_param_list, declare_function, + * parse_function_body, parse_external_decl, parse_translation_unit, + * parse_c) * - * v1 slice: single-TU; functions returning int; int locals (with comma- - * separated initializers); compound, if/else, while, for, return, - * expression statements; expressions covering the §6.5 spine - * (additive/multiplicative/relational/equality, unary, parens, post/pre - * inc-dec, simple assignment + compound assignment). The grammar is - * organized so each higher-level production gets its own function — the - * full C grammar slots in the same shape, one production at a time. */ + * All expression, type, initializer, and statement code lives in + * parse_expr.c, parse_type.c, parse_init.c, and parse_stmt.c. */ -#include "parse/parse.h" +#include "parse/parse_priv.h" #include <stdarg.h> #include <string.h> -#include "abi/abi.h" -#include "arch/arch.h" -#include "cg/cg.h" -#include "core/arena.h" -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "debug/debug.h" -#include "decl/decl.h" -#include "decl/decl_attrs.h" -#include "lex/lex.h" -#include "obj/obj.h" -#include "parse/attr.h" -#include "pp/pp.h" -#include "type/type.h" - -/* Type-aware push for locals — exposed by cg.c, not in cg.h. */ -extern void cg_push_local_typed(CG*, FrameSlot, const Type*); -/* Pop pointer rvalue, push INDIRECT lvalue of given pointee. */ -extern void cg_deref(CG*, const Type* pointee); -/* Read SValue.type at top of stack without popping. */ -extern const Type* cg_top_type(CG*); -/* Read SValue.type at second-from-top; used for pointer-arith dispatch when - * both operands are already on the stack. */ -extern const Type* cg_top2_type(CG*); -/* Replace the type tag on the top SValue without emitting code (used for - * pointer-to-pointer casts which are no-ops at the value level). */ -extern void cg_retag_top(CG*, const Type*); -/* Recycle the backend's scratch-register pool when no value-stack entry - * holds a live register. Called at statement boundaries to avoid - * exhausting the fixed scratch window over the course of a function. */ - /* ============================================================ * Keywords - * ============================================================ - * Lex emits TOK_IDENT; the parser bucketizes idents into keywords by - * comparing the interned Sym against a fixed table populated at parser - * init. The table covers C11 plus a handful of common GCC-style - * extensions the runtime headers use. Adding a new keyword is one entry - * here plus one parser branch; the lexer never changes. */ -typedef enum CKw { - KW_NONE = 0, - KW_AUTO, - KW_BREAK, - KW_CASE, - KW_CHAR, - KW_CONST, - KW_CONTINUE, - KW_DEFAULT, - KW_DO, - KW_DOUBLE, - KW_ELSE, - KW_ENUM, - KW_EXTERN, - KW_FLOAT, - KW_FOR, - KW_GOTO, - KW_IF, - KW_INLINE, - KW_INT, - KW_LONG, - KW_REGISTER, - KW_RESTRICT, - KW_RETURN, - KW_SHORT, - KW_SIGNED, - KW_SIZEOF, - KW_STATIC, - KW_STRUCT, - KW_SWITCH, - KW_TYPEDEF, - KW_UNION, - KW_UNSIGNED, - KW_VOID, - KW_VOLATILE, - KW_WHILE, - KW_BOOL, /* _Bool */ - KW_COMPLEX, /* _Complex */ - KW_IMAGINARY, /* _Imaginary */ - KW_ALIGNAS, /* _Alignas */ - KW_ALIGNOF, /* _Alignof */ - KW_ATOMIC, /* _Atomic */ - KW_GENERIC, /* _Generic */ - KW_NORETURN, /* _Noreturn */ - KW_STATIC_ASSERT, /* _Static_assert */ - KW_THREAD_LOCAL, /* _Thread_local */ - KW_ASM, /* GNU `asm` */ - KW_BUILTIN_ASM, /* GNU `__asm__` */ - KW_COUNT -} CKw; + * ============================================================ */ static const char* const kw_names[KW_COUNT] = { NULL, "auto", "break", "case", "char", @@ -129,250 +38,12 @@ static const char* const kw_names[KW_COUNT] = { }; /* ============================================================ - * Scope stack - * ============================================================ - * One ScopeEntry per declared identifier; chained in declaration order - * within a Scope. Block scopes are pushed/popped around every compound - * statement, parameter list, and `for`-init. Lookup walks parent chains. */ - -typedef enum SymEntryKind { - SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */ - SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */ - SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */ - SEK_TYPEDEF, /* typedef name */ - SEK_ENUM_CST, /* enumeration constant */ -} SymEntryKind; - -typedef struct SymEntry SymEntry; -struct SymEntry { - Sym name; - u8 kind; /* SymEntryKind */ - u8 pad[3]; - const Type* type; - union { - FrameSlot slot; - ObjSymId sym; - i64 enum_value; - } v; - /* For VLAs (SEK_LOCAL or SEK_TYPEDEF): a frame slot holding the array's - * byte size, captured at declaration / typedef site. FRAME_SLOT_NONE - * for non-VLA entries. Used by sizeof on VLA-bound IDENTs and by - * VLA-typedef variable declarations. */ - FrameSlot vla_byte_slot; - /* Phase 1: parsed __attribute__((...)) list attached to this entry. - * Populated for SEK_GLOBAL / SEK_FUNC declarators (used, section, - * noreturn, alias, weak, visibility, aligned). NULL otherwise. - * Phase 2 reads this; nothing in Phase 1 does. */ - struct Attr* attrs; - SymEntry* next; -}; - -/* Tag namespace (struct/union/enum). Lives parallel to the ordinary - * identifier scope on the same Scope chain — the spec puts them in - * separate namespaces (§6.2.3). The `type` field is a Type* (mutable so - * forward declarations can be completed in place); for enums it is the - * complete TY_ENUM type. `complete` mirrors `type->rec.incomplete` for - * struct/union and is set immediately for enums. */ -typedef struct TagEntry TagEntry; -struct TagEntry { - Sym name; - u8 kind; /* TagDeclKind */ - u8 complete; - u16 pad; - Type* type; - /* Phase 1: record-level __attribute__((...)) list (packed, aligned). - * Both leading-position (between keyword and tag/body) and trailing - * (after `}`) attrs are chained here. Phase 2 reads this; Phase 1 - * does not. */ - struct Attr* attrs; - TagEntry* next; -}; - -typedef struct Scope Scope; -struct Scope { - SymEntry* entries; /* LIFO */ - TagEntry* tags; /* LIFO */ - Scope* parent; -}; - -/* ============================================================ - * Parser context - * ============================================================ */ - -/* Switch dispatch: each `case K:` records (value, label) into the innermost - * switch context. After the body, the dispatch chain at L_dispatch loads the - * saved switch value, compares against each entry, and branches to its label. */ -typedef struct CaseEntry CaseEntry; -struct CaseEntry { - i64 value; - CGLabel label; - CaseEntry* next; /* LIFO; reverse-walked at dispatch emit time */ -}; - -typedef struct SwitchCtx SwitchCtx; -struct SwitchCtx { - CaseEntry* cases; /* LIFO, nodes arena-allocated */ - CGLabel default_label; /* 0 if no `default:` seen */ - FrameSlot value_slot; /* holds the switch expression value */ - const Type* value_type; /* type of the switch expression */ - SwitchCtx* parent; -}; - -/* Labels live in a per-function namespace separate from ordinary identifiers - * (§6.2.3 ¶1). One entry per unique label name; CGLabel is allocated lazily - * on first reference (whether goto-forward or label-place comes first). */ -typedef struct GotoLabel GotoLabel; -struct GotoLabel { - Sym name; - CGLabel label; - u8 placed; /* the matching `name:` was seen */ - u8 pad[3]; - SrcLoc first_use; - GotoLabel* next; -}; - -typedef struct Parser { - Compiler* c; - Pp* pp; - DeclTable* decls; - CG* cg; - Debug* debug; - TargetABI* abi; - Pool* pool; - - Tok cur; /* one token of lookahead */ - Tok next; /* second slot, populated lazily by peek1() */ - int has_next; - - /* String-literal fusion (C11 §6.4.5 ¶5) is performed at the pp-pull - * boundary: a run of adjacent TOK_STR tokens collapses into one before - * landing in `cur`/`next`. To peek past the run we have to read the - * first non-TOK_STR from pp; `pending` parks it for the next pull. */ - Tok pending; - int has_pending; - - Sym kw_sym[KW_COUNT]; - - /* Interned spellings for the __builtin_* / __atomic_* family routed through - * try_parse_builtin_call (Phase 9). __builtin_va_list is recognized as a - * type-name in parse_decl_specs / starts_type_name. */ - Sym sym_b_alloca; - Sym sym_b_ctz; - Sym sym_b_expect; - Sym sym_b_offsetof; - Sym sym_b_va_list; - Sym sym_b_va_start; - Sym sym_b_va_arg; - Sym sym_b_va_end; - Sym sym_b_va_copy; - /* GNU `__attribute__` keyword spelling (Phase 1). Not a real C keyword, - * so it lives outside kw_names[] — matched by IDENT comparison just like - * the __builtin_* family. */ - Sym sym_attribute; - /* GNU `__volatile__` alias for `volatile` inside asm() qualifiers. - * `volatile`/`KW_VOLATILE` already lives in kw_names[]; the doubled- - * underscore spelling is sym-compared in parse_asm_stmt. */ - Sym sym_volatile_alias; - /* GNU `__alignof__` alias for `_Alignof`. Routed through `ident_kw` so - * every `KW_ALIGNOF` consumer accepts both spellings without per-site - * checks. */ - Sym sym_alignof_alias; - Sym sym_a_load_n; - Sym sym_a_store_n; - Sym sym_a_exchange_n; - Sym sym_a_fetch_add; - Sym sym_a_fetch_sub; - Sym sym_a_fetch_and; - Sym sym_a_fetch_or; - Sym sym_a_fetch_xor; - Sym sym_a_cas_n; - Sym sym_a_thread_fence; - Sym sym_a_signal_fence; - - Scope* scope; /* top of stack; file scope is the root */ - - ObjSecId text_sec; - - /* Loop/switch context for break/continue. CGLabel 0 means none. */ - CGLabel cur_break; - CGLabel cur_continue; - - /* Innermost switch (`case`/`default` bind here). NULL outside any switch. - * `break` still goes through `cur_break`, which the switch sets. */ - SwitchCtx* cur_switch; - - /* Per-function label chain. Reset across each function definition. */ - GotoLabel* goto_labels; - - /* VLA bookkeeping. parse_decl_suffix emits the size-expression code at - * suffix-parse time (because the tokens are about to vanish) and stashes - * the i64 count in `vla_pending_count_slot`; parse_init_declarator picks - * it up to drive cg_alloca. v1 supports only one VLA dimension per - * declarator; nested cases panic in apply_decl_suffix. */ - u8 vla_pending; - FrameSlot vla_pending_count_slot; - - /* Tracks the most recent IDENT-lvalue push that resolved to a VLA-bound - * SEK_LOCAL. Lets `sizeof IDENT` and `sizeof(IDENT)` swap the constant - * `abi_sizeof(ptr)` (8) for a runtime load of the array's byte-size - * slot. Cleared by sizeof before parse_unary; the IDENT handler sets - * it. FRAME_SLOT_NONE when the last push was not a VLA-bound IDENT. */ - FrameSlot last_pushed_vla_slot; - - /* Counter raised while parsing a function-prototype parameter declarator. - * Per §6.7.6.3 ¶7, an array parameter `T x[expr]` is adjusted to `T *x` - * regardless of `expr`, so the size expression's value is irrelevant — - * including `[*]` (§6.7.6.2 ¶4) and identifier-bearing forms like - * `int a[n]`. While >0, parse_decl_suffix consumes the bracket contents - * without evaluating them. Counter (not bool) so nested function-typed - * parameters re-enter cleanly. */ - u8 in_param_decl; - - /* Counter used to mint unique linker-visible names for static locals so - * that two functions can each have their own `static int s = ...`. */ - u32 static_local_counter; - - /* Counter used to mint anonymous local names for compound literals - * (`(T){...}`). Each compound literal becomes a hidden frame slot whose - * name is reserved here purely for diagnostics; the symbol is never - * visible to user code. */ - u32 compound_literal_counter; - - /* Replay buffer for two-pass scans of brace-enclosed initializers. - * Used when a compound literal or initializer needs to size an - * incomplete array (`(int[]){10, 32}`): we record tokens through the - * matching `}`, count items, then rewind to re-parse. While - * `replay_active`, advance()/peek1() pull from `replay` instead of pp; - * once exhausted, they fall back to the regular pp source so the - * post-brace token is fetched fresh. The buffer lives in arena - * storage. */ - Tok* replay; - u32 replay_cap; - u32 replay_len; - u32 replay_pos; - u8 replay_active; - - /* Pending relocations collected while parsing a static-storage - * initializer (pointer constants like `&g` or `arr + 3`). The - * caller (`define_static_object`) flushes these via obj_reloc after - * the section has been pinned. Reset before each top-level init. */ - struct { - u32 offset; - u32 size; /* 4 or 8 bytes */ - ObjSymId target; - i64 addend; - } *static_relocs; - u32 static_relocs_len; - u32 static_relocs_cap; -} Parser; - -/* ============================================================ * Diagnostics * ============================================================ */ static SrcLoc tok_loc(const Tok* t) { return t->loc; } -static _Noreturn void perr(Parser* p, const char* fmt, ...) { +_Noreturn void perr(Parser* p, const char* fmt, ...) { va_list ap; SrcLoc loc = tok_loc(&p->cur); va_start(ap, fmt); @@ -384,8 +55,7 @@ static _Noreturn void perr(Parser* p, const char* fmt, ...) { * ============================================================ */ /* Width of an encoding prefix on a string-literal spelling: 0 for ordinary, - * 1 for L/u/U, 2 for u8. Driven by the TF_STR_* flag bits set by the lexer - * so we don't re-scan the spelling. */ + * 1 for L/u/U, 2 for u8. */ static size_t str_prefix_len(u16 flags) { if (flags & TF_STR_U8) return 2; if (flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) return 1; @@ -395,13 +65,7 @@ static size_t str_prefix_len(u16 flags) { #define STR_ENC_MASK \ (TF_STR_WIDE | TF_STR_U8 | TF_STR_U16 | TF_STR_U32) -/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5: - * - same encoding prefix (or both ordinary): keep that encoding; - * - one ordinary + one prefixed: the prefixed encoding wins; - * - two different non-ordinary prefixes: ill-formed. - * The combined spelling is `<prefix>"<content-of-a><content-of-b>"`, - * interned into the global pool; `loc` stays at the first token's loc - * so diagnostics still point at the start of the run. */ +/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5. */ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { u16 ae = (u16)(a.flags & STR_ENC_MASK); u16 be = (u16)(b.flags & STR_ENC_MASK); @@ -425,16 +89,12 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { fused_enc = ae ? ae : be; apfx = str_prefix_len(a.flags); bpfx = str_prefix_len(b.flags); - /* Each spelling is `<prefix>"...content..."`; strip prefix and the two - * delimiting quotes. lexer guarantees at least the prefix + 2 quotes. */ if (alen < apfx + 2 || as[apfx] != '"' || as[alen - 1] != '"' || blen < bpfx + 2 || bs[bpfx] != '"' || bs[blen - 1] != '"') { perr(p, "malformed string literal in concatenation"); } a_content_len = alen - apfx - 2; b_content_len = blen - bpfx - 2; - /* Output prefix: pick from whichever token contributed the surviving - * encoding (a if a was prefixed, else b — also covers both-ordinary). */ out_pfx_len = ae ? apfx : bpfx; out_len = out_pfx_len + 1 + a_content_len + b_content_len + 1; buf = (char*)h->alloc(h, out_len, 1); @@ -457,17 +117,12 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { out = a; out.spelling = pool_intern(p->pool, buf, k); out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc); - /* The fused token is freshly minted from the pool; LitId from the lexer - * pertained only to the first piece. Clear it so any future LitInfo - * lookups don't return stale per-token data. */ out.lit = LIT_NONE; h->free(h, buf, 0); return out; } -/* Pull one logical token from pp, collapsing adjacent TOK_STR runs into a - * single fused TOK_STR. The first non-TOK_STR token that terminates a run - * is parked in `pending` for the next call. */ +/* Pull one logical token from pp, collapsing adjacent TOK_STR runs. */ static Tok fetch_tok(Parser* p) { Tok t; if (p->has_pending) { @@ -488,15 +143,12 @@ static Tok fetch_tok(Parser* p) { } } -static void advance(Parser* p) { +void advance(Parser* p) { if (p->replay_active) { if (p->replay_pos < p->replay_len) { p->cur = p->replay[p->replay_pos++]; return; } - /* Replay exhausted; fall back to the underlying source. The pp stream - * sits exactly past the recorded `}` (record_braced_block left it - * there), so fetching the next token resumes parsing after the brace. */ p->replay_active = 0; } if (p->has_next) { @@ -507,8 +159,7 @@ static void advance(Parser* p) { } } -/* One-token lookahead beyond p->cur. Lazily populated. */ -static Tok peek1(Parser* p) { +Tok peek1(Parser* p) { if (p->replay_active && p->replay_pos < p->replay_len) { return p->replay[p->replay_pos]; } @@ -519,30 +170,13 @@ static Tok peek1(Parser* p) { return p->next; } -static int is_punct(const Tok* t, u32 punct) { - return t->kind == TOK_PUNCT && t->v.punct == punct; -} - -static int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; } - -static int is_kw(const Parser* p, const Tok* t, CKw k) { - if (t->kind != TOK_IDENT) return 0; - if (t->v.ident == p->kw_sym[k]) return 1; - if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1; - return 0; -} - -static CKw ident_kw(const Parser* p, Sym name) { - /* Linear scan; KW_COUNT is small. */ - CKw i; - for (i = (CKw)1; i < KW_COUNT; ++i) { - if (p->kw_sym[i] == name) return i; +void expect_punct(Parser* p, u32 punct, const char* what) { + if (!accept_punct(p, punct)) { + perr(p, "expected %s", what); } - if (name == p->sym_alignof_alias) return KW_ALIGNOF; - return KW_NONE; } -static int accept_punct(Parser* p, u32 punct) { +int accept_punct(Parser* p, u32 punct) { if (is_punct(&p->cur, punct)) { advance(p); return 1; @@ -550,26 +184,9 @@ static int accept_punct(Parser* p, u32 punct) { return 0; } -static int accept_kw(Parser* p, CKw k) { - if (is_kw(p, &p->cur, k)) { - advance(p); - return 1; - } - return 0; -} - -static void expect_punct(Parser* p, u32 punct, const char* what) { - if (!accept_punct(p, punct)) { - perr(p, "expected %s", what); - } -} - /* Record tokens from the current `{` through the matching `}` into the - * parser's replay buffer. Pre: p->cur is `{`. Post: p->cur is the closing - * `}` (not yet advanced past); replay buffer holds [`{`, ..., `}`]. The - * caller must subsequently call replay_rewind() to re-scan, or simply - * advance() to skip past the brace. */ -static void record_braced_block(Parser* p) { + * parser's replay buffer. */ +void record_braced_block(Parser* p) { int depth = 0; if (!is_punct(&p->cur, '{')) perr(p, "internal: record on non-'{'"); p->replay_len = 0; @@ -595,15 +212,10 @@ static void record_braced_block(Parser* p) { } advance(p); } - /* cur is the recorded closing `}`. Caller decides what to do next. */ } -/* After record_braced_block, rewind so subsequent advance()/peek1() pull - * tokens from the replay buffer starting at index 0. Discards any - * lazily-buffered `next` since tokens within the recorded range are now - * served from the buffer. The post-`}` token will be fetched via pp_next - * once the replay finishes draining. */ -static void replay_rewind(Parser* p) { +/* After record_braced_block, rewind to replay from the start. */ +void replay_rewind(Parser* p) { if (p->replay_len == 0) perr(p, "internal: replay_rewind with empty buffer"); p->cur = p->replay[0]; p->replay_pos = 1; @@ -611,12 +223,8 @@ static void replay_rewind(Parser* p) { p->has_next = 0; } -/* Count top-level items in a recorded brace list (positional or designator- - * led). The recording starts with `{` at index 0 and ends with the matching - * `}` at len-1. Top-level commas separate items; a trailing comma before - * the closing `}` does not introduce an extra item. Used to size incomplete - * arrays initialized with `{...}`. */ -static u32 count_recorded_top_level_items(const Tok* vec, u32 len) { +/* Count top-level items in a recorded brace list. */ +u32 count_recorded_top_level_items(const Tok* vec, u32 len) { u32 count; u32 i; int depth = 0; @@ -629,22 +237,15 @@ static u32 count_recorded_top_level_items(const Tok* vec, u32 len) { else if (is_punct(t, '}') || is_punct(t, ')') || is_punct(t, ']')) --depth; else if (depth == 0 && is_punct(t, ',')) ++count; } - /* If the last interior token is `,` it was a trailing separator; back off. */ if (is_punct(&vec[len - 2], ',')) --count; return count; } -/* expect_kw is wired up but unused at this slice — `void` consumption - * goes through accept_kw already. Kept commented as a documentation hook - * for the next slice that needs it (e.g. `_Static_assert`). - * - * static void expect_kw(Parser*, CKw, const char* what); */ - /* ============================================================ * Scopes * ============================================================ */ -static Scope* scope_new(Parser* p, Scope* parent) { +Scope* scope_new(Parser* p, Scope* parent) { Scope* s = arena_new(p->c->tu, Scope); if (!s) perr(p, "out of memory in scope_new"); s->entries = NULL; @@ -653,14 +254,14 @@ static Scope* scope_new(Parser* p, Scope* parent) { return s; } -static void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); } +void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); } -static void scope_pop(Parser* p) { +void scope_pop(Parser* p) { if (p->scope) p->scope = p->scope->parent; } -static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, - const Type* type) { +SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, + const Type* type) { SymEntry* e = arena_new(p->c->tu, SymEntry); if (!e) perr(p, "out of memory in scope_define"); memset(e, 0, sizeof *e); @@ -672,7 +273,7 @@ static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, return e; } -static SymEntry* scope_lookup(Parser* p, Sym name) { +SymEntry* scope_lookup(Parser* p, Sym name) { Scope* s; for (s = p->scope; s; s = s->parent) { SymEntry* e; @@ -683,12 +284,8 @@ static SymEntry* scope_lookup(Parser* p, Sym name) { return NULL; } -/* Tag scope ops. Tag lookup walks parent chains; tag definition lives in the - * current scope so an inner scope can shadow an outer tag of the same name - * (§6.2.1 ¶4). `tag_lookup_local` is used to detect redeclaration in the - * same scope and to complete a forward-declared tag in place. */ -static TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, - int complete) { +TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, + int complete) { TagEntry* e = arena_new(p->c->tu, TagEntry); if (!e) perr(p, "out of memory in tag_define"); memset(e, 0, sizeof *e); @@ -701,7 +298,7 @@ static TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, return e; } -static TagEntry* tag_lookup(Parser* p, Sym name) { +TagEntry* tag_lookup(Parser* p, Sym name) { Scope* s; for (s = p->scope; s; s = s->parent) { TagEntry* e; @@ -712,7 +309,7 @@ static TagEntry* tag_lookup(Parser* p, Sym name) { return NULL; } -static TagEntry* tag_lookup_local(Parser* p, Sym name) { +TagEntry* tag_lookup_local(Parser* p, Sym name) { TagEntry* e; for (e = p->scope->tags; e; e = e->next) { if (e->name == name) return e; @@ -724,4460 +321,44 @@ static TagEntry* tag_lookup_local(Parser* p, Sym name) { * Type helpers * ============================================================ */ -static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } static const Type* ty_size_t(Parser* p) { return abi_size_type(p->abi, p->pool); } /* ============================================================ - * GNU __attribute__ (Phase 1 — parse + carry; no semantic wire-up) - * ============================================================ - * Surface grammar: - * __attribute__ '(' '(' attr-list ')' ')' - * attr-list := attr (',' attr)* | <empty> - * attr := attr-name | attr-name '(' balanced-tokens ')' - * - * Both `name` and `__name__` map to the same attribute. Phase 1 stores - * the parsed list on DeclSpecs.attrs; other carrier sites consume tokens - * cleanly via parse_and_discard_attributes until Phase 2 wires them up. */ -/* AttrKind / AttrArgShape / Attr live in src/parse/attr.h so the Phase 2 - * decl consumers (src/decl/decl_attrs.c) can decode the same nodes. */ - -static const struct { - const char* name; - AttrKind kind; - AttrArgShape shape; -} kAttrTable[] = { - {"packed", ATTR_PACKED, AS_NONE}, - {"aligned", ATTR_ALIGNED, AS_INT_OPT}, - {"section", ATTR_SECTION, AS_STRING}, - {"used", ATTR_USED, AS_NONE}, - {"noreturn", ATTR_NORETURN, AS_NONE}, - {"alias", ATTR_ALIAS, AS_STRING}, - {"weak", ATTR_WEAK, AS_NONE}, - {"visibility", ATTR_VISIBILITY, AS_STRING}, - {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE}, - {"noinline", ATTR_NOINLINE, AS_NONE}, - {"unused", ATTR_UNUSED, AS_NONE}, - {"deprecated", ATTR_DEPRECATED, AS_OPAQUE}, - {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE}, - {"format", ATTR_FORMAT, AS_FORMAT}, - {"nonnull", ATTR_NONNULL, AS_OPAQUE}, - {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE}, - {"pure", ATTR_PURE, AS_NONE}, - {"const", ATTR_CONST, AS_NONE}, - {"malloc", ATTR_MALLOC, AS_OPAQUE}, - {"nothrow", ATTR_NOTHROW, AS_NONE}, - {"leaf", ATTR_LEAF, AS_NONE}, - {"cold", ATTR_COLD, AS_NONE}, - {"hot", ATTR_HOT, AS_NONE}, - {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT}, - {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT}, - {"cleanup", ATTR_CLEANUP, AS_IDENT}, - {"mode", ATTR_MODE, AS_IDENT}, - {"vector_size", ATTR_VECTOR_SIZE, AS_INT}, - {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE}, - {"gnu_inline", ATTR_GNU_INLINE, AS_NONE}, - {"fallthrough", ATTR_FALLTHROUGH, AS_NONE}, - {"sentinel", ATTR_SENTINEL, AS_OPAQUE}, - {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE}, - {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE}, -}; - -static int starts_attr(const Parser* p); -static Attr* parse_attribute_spec_list(Parser* p); -static void parse_and_discard_attributes(Parser* p); -static u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out); -/* Append `add` to the end of `*head` (linked via Attr.next). Both args - * are in source order; result preserves source order. */ -static void attr_list_append(Attr** head, Attr* add) { - if (!add) return; - if (!*head) { *head = add; return; } - Attr* tail = *head; - while (tail->next) tail = tail->next; - tail->next = add; -} -/* If `starts_attr`, parse and append to `*sink`. No-op otherwise. */ -static void parse_attrs_into(Parser* p, Attr** sink) { - if (starts_attr(p)) attr_list_append(sink, parse_attribute_spec_list(p)); -} - -/* DeclSpecs and the matching parser landed up in the declaration section - * historically; we hoist it before expression parsing because - * sizeof / _Alignof / cast need to consume a type-name from inside - * parse_unary. */ -typedef struct DeclSpecs { - const Type* type; - DeclStorage storage; - u32 flags; /* DeclFlag */ - u16 quals; /* TypeQual bits seen in the decl-spec list */ - u32 align; /* explicit alignment from `_Alignas`; 0 if none */ - /* When `type` came from a VLA typedef-name, propagates the typedef's - * captured byte-size slot so init_declarator can alloca the right - * runtime size. FRAME_SLOT_NONE otherwise. */ - FrameSlot vla_byte_slot; - /* Phase 1: parsed __attribute__((...)) list attached to this decl. */ - Attr* attrs; -} DeclSpecs; - -static int parse_decl_specs(Parser* p, DeclSpecs* out); -/* `*anon_attrs_out` receives any record-level attrs when the record is - * anonymous (no TagEntry to attach to). For tagged records the attrs go - * on the TagEntry directly. May be NULL if caller doesn't care. */ -static const Type* parse_struct_or_union(Parser* p, TypeKind kind, - Attr** anon_attrs_out); -static const Type* parse_enum(Parser* p, Attr** anon_attrs_out); -static void parse_assign_expr(Parser* p); -static i64 eval_const_int(Parser* p, SrcLoc loc); /* tiny constant evaluator */ -static const Type* parse_pointer_layer(Parser* p, const Type* base); -static const Type* parse_declarator_full(Parser* p, const Type* base, - int allow_abstract, Sym* name_out, - SrcLoc* loc_out); -/* Variant that also returns the attributes seen at the post-declarator-id - * position (after the IDENT, between/after suffixes). Callers that care - * about per-declarator attrs (struct members; ordinary declarators in - * decl-listings) pass an Attr** sink; pass NULL to drop them. */ -static const Type* parse_declarator_full_ex(Parser* p, const Type* base, - int allow_abstract, Sym* name_out, - SrcLoc* loc_out, - Attr** attrs_out); -static int starts_type_name(const Parser* p, const Tok* t); -static const Type* parse_type_name(Parser* p); -static i64 parse_int_literal(Parser* p, const Tok* t); -static i64 decode_char_literal(Parser* p, const Tok* t); - -/* Resolve the type implied by a multiset of type-specifier tokens - * (unsigned, signed, short, long, char, int, ...). C allows most orders - * (`unsigned long int` ≡ `int unsigned long`), so we collect everything - * first and pick the canonical TY_* tag at the end. Phase 1 covers the - * combinations the corpus needs; the float family (`long double`) is - * Phase 7's job and falls through to a "conflicting" diagnostic if - * combined with the integer keywords here. */ -typedef struct TypeSpecAccum { - u8 saw_void; - u8 saw_char; - u8 saw_int; - u8 saw_short; - u8 long_count; /* 0/1/2 */ - u8 saw_signed; - u8 saw_unsigned; - u8 saw_bool; - u8 saw_float; - u8 saw_double; - u8 saw_explicit_type; /* any of the above? */ -} TypeSpecAccum; - -static const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, - SrcLoc loc) { - if (!a->saw_explicit_type) return NULL; - if (a->saw_void) { - if (a->saw_char || a->saw_int || a->saw_short || a->long_count || - a->saw_signed || a->saw_unsigned || a->saw_bool || a->saw_float || - a->saw_double) { - compiler_panic(p->c, loc, "conflicting type specifiers (void mixed)"); - } - return type_void(p->pool); - } - if (a->saw_bool) { - return type_prim(p->pool, TY_BOOL); - } - if (a->saw_char) { - if (a->saw_unsigned) return type_prim(p->pool, TY_UCHAR); - if (a->saw_signed) return type_prim(p->pool, TY_SCHAR); - return type_prim(p->pool, TY_CHAR); - } - if (a->saw_float) return type_prim(p->pool, TY_FLOAT); - if (a->saw_double) { - return type_prim(p->pool, a->long_count ? TY_LDOUBLE : TY_DOUBLE); - } - if (a->saw_short) { - return type_prim(p->pool, a->saw_unsigned ? TY_USHORT : TY_SHORT); - } - if (a->long_count == 2) { - return type_prim(p->pool, a->saw_unsigned ? TY_ULLONG : TY_LLONG); - } - if (a->long_count == 1) { - return type_prim(p->pool, a->saw_unsigned ? TY_ULONG : TY_LONG); - } - if (a->saw_unsigned) return type_prim(p->pool, TY_UINT); - if (a->saw_signed || a->saw_int) return type_prim(p->pool, TY_INT); - return type_prim(p->pool, TY_INT); -} - -static int parse_decl_specs(Parser* p, DeclSpecs* out) { - /* Tracks integer/void/char type specifiers in any order, plus the - * storage-class and qualifier keywords. Returns 0 if no specifier was - * consumed (caller treats that as "not a declaration"). */ - TypeSpecAccum acc; - SrcLoc loc; - int seen = 0; - const Type* tagged_ty = NULL; /* set when struct/union/enum consumed */ - memset(&acc, 0, sizeof acc); - out->type = NULL; - out->storage = DS_AUTO; - out->flags = DF_NONE; - out->quals = 0; - out->align = 0; - out->vla_byte_slot = FRAME_SLOT_NONE; - out->attrs = NULL; - loc = tok_loc(&p->cur); - for (;;) { - Tok t = p->cur; - /* GNU __attribute__((...)) may appear anywhere among decl-specifiers - * and may repeat. Chain onto out->attrs (in source order). */ - if (starts_attr(p)) { - Attr* a = parse_attribute_spec_list(p); - if (a) { - Attr* tail = a; - while (tail->next) tail = tail->next; - tail->next = out->attrs; - out->attrs = a; - } - seen = 1; - continue; - } - if (is_kw(p, &t, KW_STRUCT) || is_kw(p, &t, KW_UNION)) { - TypeKind kind = is_kw(p, &t, KW_STRUCT) ? TY_STRUCT : TY_UNION; - Attr* anon_attrs = NULL; - if (tagged_ty || acc.saw_explicit_type) { - perr(p, "conflicting type specifiers (struct/union mixed)"); - } - advance(p); - tagged_ty = parse_struct_or_union(p, kind, &anon_attrs); - attr_list_append(&out->attrs, anon_attrs); - acc.saw_explicit_type = 1; - seen = 1; - continue; - } - if (is_kw(p, &t, KW_ENUM)) { - Attr* anon_attrs = NULL; - if (tagged_ty || acc.saw_explicit_type) { - perr(p, "conflicting type specifiers (enum mixed)"); - } - advance(p); - tagged_ty = parse_enum(p, &anon_attrs); - attr_list_append(&out->attrs, anon_attrs); - acc.saw_explicit_type = 1; - seen = 1; - continue; - } - if (is_kw(p, &t, KW_VOID)) { - acc.saw_void = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_CHAR)) { - acc.saw_char = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_INT)) { - acc.saw_int = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_SHORT)) { - acc.saw_short = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_LONG)) { - acc.long_count++; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_SIGNED)) { - acc.saw_signed = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_UNSIGNED)) { - acc.saw_unsigned = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_BOOL)) { - acc.saw_bool = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_FLOAT)) { - acc.saw_float = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_DOUBLE)) { - acc.saw_double = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_STATIC)) { - out->storage = DS_STATIC; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_EXTERN)) { - out->storage = DS_EXTERN; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_CONST)) { - out->quals |= Q_CONST; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_VOLATILE)) { - out->quals |= Q_VOLATILE; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_RESTRICT)) { - out->quals |= Q_RESTRICT; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_ATOMIC)) { - /* `_Atomic(type-name)` is a type specifier; bare `_Atomic` is a - * qualifier (§6.7.2.4). Disambiguate on the next token. */ - Tok n = peek1(p); - if (is_punct(&n, '(')) { - const Type* inner; - if (tagged_ty || acc.saw_explicit_type) { - perr(p, "conflicting type specifiers (_Atomic(T) mixed)"); - } - advance(p); /* `_Atomic` */ - advance(p); /* `(` */ - inner = parse_type_name(p); - expect_punct(p, ')', "')' after _Atomic type"); - tagged_ty = type_qualified(p->pool, inner, Q_ATOMIC); - acc.saw_explicit_type = 1; - seen = 1; - continue; - } - out->quals |= Q_ATOMIC; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_TYPEDEF)) { - out->storage = DS_TYPEDEF; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_ALIGNAS)) { - /* `_Alignas(N)` or `_Alignas(type-name)`. Either form yields a - * byte alignment that overrides the natural alignment of the - * declared object. Multiple specifiers take the strictest. */ - u32 a = 0; - advance(p); /* `_Alignas` */ - expect_punct(p, '(', "'(' after _Alignas"); - if (starts_type_name(p, &p->cur)) { - const Type* tn = parse_type_name(p); - a = abi_alignof(p->abi, tn); - } else { - i64 v = eval_const_int(p, tok_loc(&p->cur)); - if (v < 0) perr(p, "_Alignas requires a non-negative alignment"); - /* §6.7.5 ¶6: `_Alignas(0)` is a no-op (use the natural - * alignment), so leave `a` at 0 and skip the bump below. */ - a = (u32)v; - } - expect_punct(p, ')', "')' after _Alignas argument"); - if (a > out->align) out->align = a; - seen = 1; - } else if (is_kw(p, &t, KW_INLINE)) { - out->flags |= DF_INLINE; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_THREAD_LOCAL)) { - out->flags |= DF_THREAD; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) || - is_kw(p, &t, KW_AUTO)) { - /* Recognized but currently no-op at this slice. */ - advance(p); seen = 1; - } else if (!acc.saw_explicit_type && !tagged_ty && - t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) { - /* `__builtin_va_list` resolves to the per-ABI va_list type. */ - if (t.v.ident == p->sym_b_va_list) { - tagged_ty = abi_va_list_type(p->abi, p->pool); - acc.saw_explicit_type = 1; - advance(p); - seen = 1; - continue; - } - /* Typedef-name as a type specifier. Only consumed when no other - * type specifier has been seen — otherwise this IDENT is the - * declarator name. */ - SymEntry* e = scope_lookup(p, t.v.ident); - if (e && e->kind == SEK_TYPEDEF) { - tagged_ty = e->type; - if (e->vla_byte_slot != FRAME_SLOT_NONE) { - out->vla_byte_slot = e->vla_byte_slot; - } - acc.saw_explicit_type = 1; - advance(p); - seen = 1; - continue; - } - break; - } else { - break; - } - } - if (seen) { - if (tagged_ty) { - out->type = tagged_ty; - } else { - out->type = resolve_type_specs(p, &acc, loc); - if (!out->type) { - /* Storage class without a type — default to int per pre-C99. */ - out->type = ty_int(p); - } - } - } - return seen; -} - -/* ============================================================ - * struct / union / enum - * ============================================================ - * Recognized inside parse_decl_specs. The shapes are: - * struct-or-union-specifier = - * (`struct`|`union`) IDENT? (`{` member-decl+ `}`)? - * enum-specifier = - * `enum` IDENT? (`{` enumerator (`,` enumerator)* `,`? `}`)? - * - * Tag scope: an IDENT after `struct`/`union`/`enum` names a tag. Tags share - * the same scope chain as ordinary identifiers but live in a separate slot - * (TagEntry vs SymEntry). A `struct S` reference without `{...}` looks up - * `S` in the tag scope chain; if not found and the use is permissible - * (currently always — we don't yet distinguish struct-specifier-as-decl - * from struct-tag-only) we install an incomplete tag in the current scope. - * - * Forward decl + completion: `struct S;` followed later by `struct S { ... }` - * are joined by reusing the same Type* node (mutated in place by - * type_record_install). Self-referential pointers `struct N { struct N - * *next; }` work because the pointer type is constructed from the same - * (still-incomplete) Type* during member parsing — completion only changes - * the fields/incomplete bits, never the pointer's target identity. */ - -/* Tiny constant evaluator. Used by enum values and array sizes that may grow - * beyond bare TOK_NUM. Phase 3 only handles signed-int forms because the - * corpus never references float or pointer constants from these positions. - * Recognizes: - * integer literal | character literal | enumerator - * '+' cexpr | '-' cexpr | '~' cexpr | '!' cexpr - * '(' cexpr ')' - * plus '+ - * / % & | ^ << >>' between integer constants - * Returns the evaluated value; on parse-fail or non-constant operand it - * panics with `loc` as the diagnostic site. */ -static i64 cexpr_unary(Parser* p, SrcLoc loc); -static void parse_unary(Parser* p); -static i64 cexpr_mul(Parser* p, SrcLoc loc) { - i64 v = cexpr_unary(p, loc); - for (;;) { - if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc); - else if (accept_punct(p, '/')) { - i64 r = cexpr_unary(p, loc); - if (r == 0) compiler_panic(p->c, loc, "division by zero in constant"); - v = v / r; - } else if (accept_punct(p, '%')) { - i64 r = cexpr_unary(p, loc); - if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant"); - v = v % r; - } else break; - } - return v; -} -static i64 cexpr_add(Parser* p, SrcLoc loc) { - i64 v = cexpr_mul(p, loc); - for (;;) { - if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc); - else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc); - else break; - } - return v; -} -static i64 cexpr_shift(Parser* p, SrcLoc loc) { - i64 v = cexpr_add(p, loc); - for (;;) { - if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc); - else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc); - else break; - } - return v; -} -static i64 cexpr_rel(Parser* p, SrcLoc loc) { - i64 v = cexpr_shift(p, loc); - for (;;) { - if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc); - else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc); - else if (is_punct(&p->cur, '<')) { - advance(p); v = v < cexpr_shift(p, loc); - } else if (is_punct(&p->cur, '>')) { - advance(p); v = v > cexpr_shift(p, loc); - } else break; - } - return v; -} -static i64 cexpr_eq(Parser* p, SrcLoc loc) { - i64 v = cexpr_rel(p, loc); - for (;;) { - if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc)); - else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc)); - else break; - } - return v; -} -static i64 cexpr_band(Parser* p, SrcLoc loc) { - i64 v = cexpr_eq(p, loc); - while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) { - advance(p); - v = v & cexpr_eq(p, loc); - } - return v; -} -static i64 cexpr_bxor(Parser* p, SrcLoc loc) { - i64 v = cexpr_band(p, loc); - while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc); - return v; -} -static i64 cexpr_bor(Parser* p, SrcLoc loc) { - i64 v = cexpr_bxor(p, loc); - while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) { - advance(p); - v = v | cexpr_bxor(p, loc); - } - return v; -} -static i64 cexpr_unary(Parser* p, SrcLoc loc) { - if (accept_punct(p, '+')) return cexpr_unary(p, loc); - if (accept_punct(p, '-')) return -cexpr_unary(p, loc); - if (accept_punct(p, '~')) return ~cexpr_unary(p, loc); - if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1; - if (accept_kw(p, KW_SIZEOF)) { - /* `sizeof ( type-name )` resolves to a pure type-name lookup. The - * `sizeof ( expression )` and `sizeof unary-expression` forms run the - * operand through parse_unary (operand is not evaluated per §6.5.3.4, - * matching parse_unary's handling) and read the resulting type off the - * cg stack, then drop. */ - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after sizeof type-name"); - return (i64)abi_sizeof(p->abi, t); - } - } - } - parse_unary(p); - { - const Type* ty = cg_top_type(p->cg); - i64 sz = (i64)abi_sizeof(p->abi, ty); - cg_drop(p->cg); - return sz; - } - } - if (accept_kw(p, KW_ALIGNOF)) { - /* `_Alignof` is type-name only per §6.5.3.4. The GNU `__alignof__` - * alias additionally accepts an expression, mirroring sizeof. We - * disambiguate at the `(`: type-name → parse_type_name, otherwise - * route through parse_unary and read the operand type. */ - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after _Alignof type-name"); - return (i64)abi_alignof(p->abi, t); - } - } - } - parse_unary(p); - { - const Type* ty = cg_top_type(p->cg); - i64 al = (i64)abi_alignof(p->abi, ty); - cg_drop(p->cg); - return al; - } - } - if (accept_punct(p, '(')) { - /* `(type-name) cexpr` is an explicit cast in a constant context; for - * the §6.7.10 corpus the casts we see are integer→integer, so the - * mask-to-width is sufficient. Otherwise the parens enclose a - * sub-expression. */ - if (starts_type_name(p, &p->cur)) { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after cast type-name"); - { - i64 v = cexpr_unary(p, loc); - u32 sz = abi_sizeof(p->abi, t); - int is_signed = abi_type_info(p->abi, t).signed_; - if (sz < 8) { - u64 mask = (1ull << (sz * 8u)) - 1ull; - u64 uv = (u64)v & mask; - if (is_signed) { - u64 sign = 1ull << (sz * 8u - 1u); - v = (i64)((uv ^ sign) - sign); - } else { - v = (i64)uv; - } - } - return v; - } - } - { - i64 v = cexpr_bor(p, loc); - expect_punct(p, ')', "')' in constant expression"); - return v; - } - } - if (p->cur.kind == TOK_NUM) { - i64 v = parse_int_literal(p, &p->cur); - advance(p); - return v; - } - if (p->cur.kind == TOK_CHR) { - /* Character literals are integer constants per §6.4.4.4. */ - i64 v = decode_char_literal(p, &p->cur); - advance(p); - return v; - } - if (p->cur.kind == TOK_IDENT) { - SymEntry* e = scope_lookup(p, p->cur.v.ident); - if (e && e->kind == SEK_ENUM_CST) { - advance(p); - return e->v.enum_value; - } - compiler_panic(p->c, loc, "non-constant identifier in constant expression"); - } - compiler_panic(p->c, loc, "expected constant expression"); -} -static i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); } - -/* ============================================================ - * __attribute__ helpers + * Local-variable slot allocation * ============================================================ */ -static int starts_attr(const Parser* p) { - return p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_attribute; -} - -/* Canonicalize an attribute name spelling: strip exactly one leading `__` - * and one trailing `__` pair if both are present (GCC compat). Returns the - * (start, len) of the canonical substring within the original spelling. */ -static void attr_canon_range(const char* s, size_t len, - const char** out_p, size_t* out_len) { - if (len >= 4 && s[0] == '_' && s[1] == '_' && - s[len - 1] == '_' && s[len - 2] == '_') { - *out_p = s + 2; - *out_len = len - 4; - return; - } - *out_p = s; - *out_len = len; -} - -static AttrKind classify_attr(Parser* p, Sym name, AttrArgShape* shape_out) { - size_t len = 0; - const char* s = pool_str(p->pool, name, &len); - const char* cs; - size_t clen; - size_t i; - if (!s) { - *shape_out = AS_OPAQUE; - return ATTR_UNKNOWN; - } - attr_canon_range(s, len, &cs, &clen); - for (i = 0; i < sizeof(kAttrTable) / sizeof(kAttrTable[0]); ++i) { - const char* tn = kAttrTable[i].name; - size_t tlen = strlen(tn); - if (tlen == clen && memcmp(tn, cs, clen) == 0) { - *shape_out = kAttrTable[i].shape; - return kAttrTable[i].kind; - } - } - *shape_out = AS_OPAQUE; - return ATTR_UNKNOWN; -} - -/* Skip a balanced parenthesized token group. Pre: p->cur is `(`. Post: - * p->cur is the token immediately after the matching `)`. */ -static void skip_balanced_parens(Parser* p) { - int depth; - if (!is_punct(&p->cur, '(')) perr(p, "internal: skip_balanced_parens"); - depth = 1; - advance(p); - while (depth > 0) { - if (p->cur.kind == TOK_EOF) { - perr(p, "unexpected EOF inside attribute arguments"); - } - if (is_punct(&p->cur, '(')) ++depth; - else if (is_punct(&p->cur, ')')) { - --depth; - if (depth == 0) { advance(p); return; } - } - advance(p); - } -} - -/* Parse the body between `(` and `)` for one attribute according to its - * shape. Pre: p->cur is `(`. Post: p->cur is the token after the matching - * `)`. For unrecognized shapes or ATTR_UNKNOWN, swallows balanced tokens. */ -static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape, - const char* attr_diag_name) { - if (!is_punct(&p->cur, '(')) { - if (shape == AS_NONE || shape == AS_OPTIONAL || shape == AS_INT_OPT || - shape == AS_OPAQUE) { - return; - } - perr(p, "attribute '%s' expects '(' arguments", attr_diag_name); - } - switch (shape) { - case AS_NONE: { - /* Tolerate empty `()`. */ - advance(p); /* '(' */ - if (!accept_punct(p, ')')) { - perr(p, "attribute '%s' takes no arguments", attr_diag_name); - } - return; - } - case AS_OPTIONAL: { - skip_balanced_parens(p); - return; - } - case AS_INT: - case AS_INT_OPT: { - SrcLoc loc; - advance(p); /* '(' */ - if (is_punct(&p->cur, ')')) { - if (shape == AS_INT) { - perr(p, "attribute '%s' expects an integer argument", - attr_diag_name); - } - advance(p); - return; - } - loc = tok_loc(&p->cur); - a->v.i = eval_const_int(p, loc); - a->nargs = 1; - expect_punct(p, ')', "')' after attribute integer argument"); - return; - } - case AS_STRING: { - advance(p); /* '(' */ - if (p->cur.kind != TOK_STR) { - perr(p, "attribute '%s' expects a string literal", attr_diag_name); - } - /* Decode the literal so consumers (`section`, `alias`, `visibility`) - * see the content without surrounding quotes or escape sequences. */ - { - Tok t = p->cur; - size_t nlen = 0; - u8* bytes = decode_string_literal(p, &t, &nlen); - /* nlen includes a trailing NUL — intern without it. */ - u32 ilen = (nlen > 0) ? (u32)(nlen - 1) : 0; - a->v.sym = pool_intern(p->c->global, (const char*)bytes, ilen); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - } - a->nargs = 1; - advance(p); - expect_punct(p, ')', "')' after attribute string argument"); - return; - } - case AS_IDENT: { - advance(p); /* '(' */ - if (p->cur.kind != TOK_IDENT) { - perr(p, "attribute '%s' expects an identifier", attr_diag_name); - } - a->v.sym = p->cur.v.ident; - a->nargs = 1; - advance(p); - expect_punct(p, ')', "')' after attribute identifier argument"); - return; - } - case AS_FORMAT: { - /* format(archetype, m, n) — archetype is an identifier, m and n are - * positive integers. */ - SrcLoc mloc, nloc; - i64 mv, nv; - advance(p); /* '(' */ - if (p->cur.kind != TOK_IDENT) { - perr(p, "attribute 'format' expects (archetype, m, n)"); - } - advance(p); - expect_punct(p, ',', "',' after format archetype"); - mloc = tok_loc(&p->cur); - mv = eval_const_int(p, mloc); - expect_punct(p, ',', "',' after format string-index"); - nloc = tok_loc(&p->cur); - nv = eval_const_int(p, nloc); - if (mv < 0 || mv > 0xFFFF || nv < 0 || nv > 0xFFFF) { - perr(p, "attribute 'format' indices out of range"); - } - a->v.format.fmt_idx = (u16)mv; - a->v.format.first = (u16)nv; - a->nargs = 3; - expect_punct(p, ')', "')' after format arguments"); - return; - } - case AS_OPAQUE: - default: { - skip_balanced_parens(p); - return; - } - } -} - -/* Parse one or more `__attribute__((...))` runs starting at p->cur (must - * satisfy starts_attr). Returns a linked list of Attr* in source order. */ -static Attr* parse_attribute_spec_list(Parser* p) { - Attr* head = NULL; - Attr* tail = NULL; - while (starts_attr(p)) { - SrcLoc kw_loc = tok_loc(&p->cur); - advance(p); /* __attribute__ */ - expect_punct(p, '(', "'(' after __attribute__"); - expect_punct(p, '(', "'((' after __attribute__"); - /* attr-list: zero or more attr, comma-separated. Empty list and - * trailing/leading bare commas are accepted (GCC compat). */ - for (;;) { - Sym aname; - AttrArgShape shape; - Attr* a; - const char* diag_name; - size_t diag_len; - const char* canon; - size_t canon_len; - /* Accept stray commas and an empty list. */ - while (accept_punct(p, ',')) { /* skip */ } - if (is_punct(&p->cur, ')')) break; - if (p->cur.kind != TOK_IDENT) { - perr(p, "expected attribute name"); - } - aname = p->cur.v.ident; - a = arena_new(p->c->tu, Attr); - if (!a) perr(p, "out of memory in parse_attribute_spec_list"); - memset(a, 0, sizeof *a); - a->loc = tok_loc(&p->cur); - a->name = aname; - a->kind = (u16)classify_attr(p, aname, &shape); - advance(p); - diag_name = pool_str(p->pool, aname, &diag_len); - attr_canon_range(diag_name, diag_len, &canon, &canon_len); - (void)canon; (void)canon_len; - parse_attr_args(p, a, shape, diag_name ? diag_name : "<unknown>"); - if (tail) tail->next = a; else head = a; - tail = a; - if (!accept_punct(p, ',')) break; - } - expect_punct(p, ')', "')' after attribute list"); - expect_punct(p, ')', "'))' after attribute list"); - (void)kw_loc; - } - return head; -} - -static void parse_and_discard_attributes(Parser* p) { - (void)parse_attribute_spec_list(p); -} - -/* Bare `__attribute__((aligned))` (no argument) means "biggest scalar - * alignment". Same default as decl_attrs.c uses. */ -#define PARSE_ATTR_ALIGNED_DEFAULT 16u - -/* Scan an attribute chain and merge record-level packed / aligned(N) into - * the supplied TypeRecordOpts. */ -static void attrs_to_record_opts(const Attr* a, TypeRecordOpts* opts) { - for (; a; a = a->next) { - if (a->kind == ATTR_PACKED) { - opts->packed = 1; - } else if (a->kind == ATTR_ALIGNED) { - u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; - if (v > opts->align_override) opts->align_override = (u16)v; - } - } -} - -/* Scan an attribute chain and merge per-member packed / aligned(N) into a - * Field's carriers. */ -static void attrs_to_field(const Attr* a, Field* f) { - for (; a; a = a->next) { - if (a->kind == ATTR_PACKED) { - f->packed = 1; - } else if (a->kind == ATTR_ALIGNED) { - u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; - if (v > f->align_override) f->align_override = (u16)v; - } - } -} - -/* Walk attrs looking for ATTR_ALIGNED; returns 0 if absent. */ -static u32 attrs_pick_aligned(const Attr* a) { - u32 best = 0; - for (; a; a = a->next) { - if (a->kind == ATTR_ALIGNED) { - u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; - if (v > best) best = v; - } - } - return best; -} - -/* Parse a struct/union member-declaration list. The `{` has already been - * consumed. Fills `b` with each member's Field; bumps anonymous flags as - * needed. Bitfields are diagnosed (cg lacks the codegen for them in this - * slice). */ -static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { - while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { - DeclSpecs specs; - if (!parse_decl_specs(p, &specs)) { - perr(p, "expected member declaration"); - } - /* Anonymous struct/union member: `struct { int y; };` or - * `union { int a, b; };` directly inside another aggregate (C11 - * §6.7.2.1 ¶13). The shape is decl-specs immediately followed by - * `;` with no declarator. */ - if (is_punct(&p->cur, ';')) { - if (specs.type && (specs.type->kind == TY_STRUCT || - specs.type->kind == TY_UNION)) { - Field f; - memset(&f, 0, sizeof f); - f.name = 0; - f.type = specs.type; - f.flags = FIELD_ANON; - type_record_field(b, f); - advance(p); - continue; - } - perr(p, "declaration without declarator must be anonymous aggregate"); - } - /* One or more declarators separated by `,`. */ - for (;;) { - Sym mname = 0; - SrcLoc mloc = tok_loc(&p->cur); - const Type* mty; - Field f; - memset(&f, 0, sizeof f); - /* Anonymous bitfield: `unsigned : N;` — no declarator, just the - * width. Width 0 forces alignment to the next storage unit per - * §6.7.2.1 ¶12. We don't actually lay out the unit yet (the abi - * layout assumes named fields), but recording the entry keeps - * downstream layout/init consistent. */ - if (is_punct(&p->cur, ':')) { - advance(p); - i64 w = eval_const_int(p, mloc); - f.name = 0; - f.type = specs.type; - f.bitfield_width = (u16)w; - f.flags = FIELD_BITFIELD; - if (w == 0) f.flags |= FIELD_ZERO_WIDTH; - attrs_to_field(specs.attrs, &f); - type_record_field(b, f); - if (!accept_punct(p, ',')) break; - continue; - } - Attr* mattrs = NULL; - mty = parse_declarator_full_ex(p, specs.type, /*allow_abstract=*/0, - &mname, &mloc, &mattrs); - /* Bitfield form `: width` after the declarator name (or after the - * type with no name). Recognized to keep the parser unstuck on - * member lists with bitfields, but defers actual codegen — the - * field is still recorded and abi_record_layout treats it as a - * full storage unit, which is wrong for any cross-member reference - * but right enough for the bitfield row to land later (Phase 3 - * follow-up alongside cg_bitfield_load/store). */ - if (accept_punct(p, ':')) { - i64 w = eval_const_int(p, mloc); - f.name = mname; - f.type = mty; - f.bitfield_width = (u16)w; - f.flags = FIELD_BITFIELD; - if (w == 0) f.flags |= FIELD_ZERO_WIDTH; - } else { - f.name = mname; - f.type = mty; - f.flags = FIELD_NONE; - } - /* Decl-spec attrs apply to each declarator in this declaration. - * In-declarator and trailing attrs attach to this field only. */ - attrs_to_field(specs.attrs, &f); - attrs_to_field(mattrs, &f); - { - Attr* trailing = NULL; - parse_attrs_into(p, &trailing); - attrs_to_field(trailing, &f); - } - type_record_field(b, f); - if (!accept_punct(p, ',')) break; - } - expect_punct(p, ';', "';' after struct member declaration"); - } -} - -/* Parse `struct/union [tag] [{ members }]` after the keyword has been - * consumed. Returns the (possibly incomplete) record type. */ -static const Type* parse_struct_or_union(Parser* p, TypeKind kind, - Attr** anon_attrs_out) { - Sym tag_name = 0; - SrcLoc tag_loc; - TagDeclKind tdk = (kind == TY_STRUCT) ? TAG_STRUCT : TAG_UNION; - Attr* rec_attrs = NULL; - /* Attributes between `struct`/`union` keyword and the tag/body. */ - parse_attrs_into(p, &rec_attrs); - tag_loc = tok_loc(&p->cur); - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - tag_name = p->cur.v.ident; - advance(p); - } - int has_body = is_punct(&p->cur, '{'); - if (!has_body && tag_name == 0) { - perr(p, "expected tag name or '{' after struct/union"); - } - if (!has_body) { - /* Tag reference: `struct S` used as a type in a declaration. Look up - * the tag in the chain; if not found, install an incomplete tag in - * the current scope (§6.7.2.3 ¶7 — a forward declaration). */ - TagEntry* e = tag_lookup(p, tag_name); - if (e) { - if (e->kind != tdk) { - perr(p, "use of tag with wrong kind (struct vs union)"); - } - attr_list_append(&e->attrs, rec_attrs); - return e->type; - } - { - TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); - Type* t = type_record_forward(p->pool, kind, tid, tag_name); - TagEntry* te = tag_define(p, tag_name, tdk, t, /*complete=*/0); - attr_list_append(&te->attrs, rec_attrs); - return t; - } - } - /* Body: definition. If the tag was forward-declared in the same scope, - * complete that node in place; otherwise create a fresh forward node and - * complete it. The Type* identity is stable across completion so any - * pointer type built off the forward node automatically updates. */ - Type* target = NULL; - TagEntry* existing = tag_name ? tag_lookup_local(p, tag_name) : NULL; - TagEntry* te = NULL; - if (existing) { - if (existing->kind != tdk) { - perr(p, "tag redeclared with wrong kind"); - } - if (existing->complete) { - perr(p, "redefinition of tag"); - } - target = existing->type; - te = existing; - } else { - TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); - target = type_record_forward(p->pool, kind, tid, tag_name); - if (tag_name) { - te = tag_define(p, tag_name, tdk, target, /*complete=*/0); - } - } - expect_punct(p, '{', "'{' to start aggregate body"); - TypeRecordBuilder* b = - type_record_begin(p->pool, kind, target->rec.tag_id, tag_name); - parse_member_decls(p, b); - expect_punct(p, '}', "'}' after aggregate body"); - /* Trailing attributes (after `}`) attach to the record type. */ - parse_attrs_into(p, &rec_attrs); - if (te) { - attr_list_append(&te->attrs, rec_attrs); - } else if (anon_attrs_out) { - /* Anonymous record — let the caller hoist record attrs onto the - * surrounding declaration (e.g. into DeclSpecs.attrs). */ - attr_list_append(anon_attrs_out, rec_attrs); - } - /* Pull the accumulated fields out of the builder and install them on the - * target node so any pre-existing pointer-to-target types see complete - * fields. The builder's Type* (returned by type_record_end) is discarded; - * we keep `target` as the canonical Type*. */ - { - /* type_record_end allocates a fresh Type and exposes only the public - * Type*. We need access to the builder's accumulated `fields/nfields`. - * Doing it via type_record_end and reading back through `Type` would - * produce two equivalent records; the harmless cost is one extra - * Type node in the pool (struct types aren't interned). */ - const Type* fresh = type_record_end(p->pool, b); - type_record_install(target, (Field*)fresh->rec.fields, - fresh->rec.nfields); - } - /* Honor record-level packed / aligned(N). target is the canonical Type* - * (forward node completed in place), so writing to its rec.* is what - * abi_record_layout will read. */ - { - TypeRecordOpts opts; - memset(&opts, 0, sizeof opts); - attrs_to_record_opts(rec_attrs, &opts); - if (opts.packed) target->rec.packed = 1; - if (opts.align_override > target->rec.align_override) - target->rec.align_override = opts.align_override; - } - if (existing) { - existing->complete = 1; - } - return target; -} - -/* Parse `enum [tag] [{ K [= cexpr] (, K [= cexpr])* [,] }]` after the - * `enum` keyword has been consumed. Returns the enum type (interned). */ -static const Type* parse_enum(Parser* p, Attr** anon_attrs_out) { - Sym tag_name = 0; - SrcLoc tag_loc; - Attr* rec_attrs = NULL; - /* Attributes between `enum` keyword and tag/body. */ - parse_attrs_into(p, &rec_attrs); - tag_loc = tok_loc(&p->cur); - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - tag_name = p->cur.v.ident; - advance(p); - } - int has_body = is_punct(&p->cur, '{'); - if (!has_body && tag_name == 0) { - perr(p, "expected tag name or '{' after enum"); - } - if (!has_body) { - /* Tag reference. Per §6.7.2.3 ¶3 enum types must be defined where - * referenced; the tag lookup is mostly to keep the type identity - * consistent. If the tag isn't registered, treat the enum as - * synonymous with `int` — simplest behavior consistent with - * §6.7.2.2 ¶4 (enum compatible with int). */ - TagEntry* e = tag_lookup(p, tag_name); - if (e && e->kind == TAG_ENUM) { - attr_list_append(&e->attrs, rec_attrs); - return e->type; - } - /* Forward enum: install an incomplete enum-type at int width. */ - TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc); - const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p)); - { - TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, - /*complete=*/0); - attr_list_append(&te->attrs, rec_attrs); - } - return et; - } - /* Definition: parse enumerator list, bind each into the ordinary scope - * as SEK_ENUM_CST (§6.7.2.2 ¶3). Values default to 0 and increment by - * one; an `= cexpr` resets the running counter. */ - TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc); - const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p)); - expect_punct(p, '{', "'{'"); - i64 next_val = 0; - for (;;) { - Sym name; - SrcLoc nloc = tok_loc(&p->cur); - SymEntry* e; - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected enumerator name"); - } - name = p->cur.v.ident; - advance(p); - i64 val = next_val; - if (accept_punct(p, '=')) { - val = eval_const_int(p, nloc); - } - e = scope_define(p, name, SEK_ENUM_CST, et); - e->v.enum_value = val; - next_val = val + 1; - if (!accept_punct(p, ',')) break; - if (is_punct(&p->cur, '}')) break; /* trailing comma */ - } - expect_punct(p, '}', "'}' after enumerator list"); - /* Trailing attributes after `}` attach to the enum type. */ - parse_attrs_into(p, &rec_attrs); - if (tag_name) { - /* Replace any incomplete forward entry; otherwise install fresh. */ - TagEntry* existing = tag_lookup_local(p, tag_name); - if (existing) { - if (existing->kind != TAG_ENUM) { - perr(p, "tag redeclared with wrong kind"); - } - existing->complete = 1; - attr_list_append(&existing->attrs, rec_attrs); - } else { - TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, - /*complete=*/1); - attr_list_append(&te->attrs, rec_attrs); - } - } else if (anon_attrs_out) { - attr_list_append(anon_attrs_out, rec_attrs); - } - return et; -} - -/* Member lookup with anonymous-aggregate flattening (C11 §6.7.2.1 ¶13). - * Walks the record's fields; on a hit returns the field type and the byte - * offset from the search root. On an anonymous struct/union member, recurses - * into that field's type, accumulating the field's own offset. - * - * Returns 1 on success (out_type/out_offset filled), 0 if the name is not a - * member. Bitfields are signaled via *out_bitfield (parser then panics — - * cg_bitfield_load/store are stubs). */ -static int find_field(TargetABI* abi, const Type* rec, Sym name, - const Type** out_type, u32* out_offset, - const Field** out_field) { - if (!rec || (rec->kind != TY_STRUCT && rec->kind != TY_UNION)) return 0; - const ABIRecordLayout* L = abi_record_layout(abi, rec); - if (!L) return 0; - for (u16 i = 0; i < rec->rec.nfields; ++i) { - const Field* f = &rec->rec.fields[i]; - if (f->name == name && name != 0) { - *out_type = f->type; - *out_offset = L->fields[i].offset; - *out_field = f; - return 1; - } - if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || - f->type->kind == TY_UNION)) { - const Type* inner_ty = NULL; - u32 inner_off = 0; - const Field* inner_f = NULL; - if (find_field(abi, f->type, name, &inner_ty, &inner_off, &inner_f)) { - *out_type = inner_ty; - *out_offset = L->fields[i].offset + inner_off; - *out_field = inner_f; - return 1; - } - } - } - return 0; -} - -/* True when the current token starts a declaration-specifier sequence: a - * type keyword, a storage-class keyword, a qualifier, or a function - * specifier. Used at lookahead points (cast vs. paren expr; sizeof's - * inner form; for-init declarator vs. expression). The list mirrors - * parse_decl_specs's accepted set so the two stay in sync. - * - * Typedef-names are not yet implemented; when they land, they become - * the second branch here and dispatch on scope_lookup().kind == - * SEK_TYPEDEF, just like any other type-name token. */ -static int starts_type_name(const Parser* p, const Tok* t) { - if (t->kind != TOK_IDENT) return 0; - CKw k = ident_kw(p, t->v.ident); - switch (k) { - case KW_VOID: - case KW_CHAR: - case KW_SHORT: - case KW_INT: - case KW_LONG: - case KW_FLOAT: - case KW_DOUBLE: - case KW_SIGNED: - case KW_UNSIGNED: - case KW_BOOL: - case KW_STRUCT: - case KW_UNION: - case KW_ENUM: - case KW_CONST: - case KW_VOLATILE: - case KW_RESTRICT: - case KW_ATOMIC: - case KW_STATIC: - case KW_EXTERN: - case KW_INLINE: - case KW_NORETURN: - case KW_REGISTER: - case KW_AUTO: - case KW_TYPEDEF: - case KW_ALIGNAS: - case KW_THREAD_LOCAL: - return 1; - case KW_NONE: { - /* `__builtin_va_list` is a target-defined type-name (the va_list - * type produced by `abi_va_list_type`). Phase 9. */ - if (t->v.ident == p->sym_b_va_list) return 1; - /* Typedef-name. Cast away const for the lookup helper, which only - * reads scope state. */ - SymEntry* e = scope_lookup((Parser*)p, t->v.ident); - return e && e->kind == SEK_TYPEDEF; - } - default: - return 0; - } -} - -/* Walk a `*` chain at the front of a declarator (and optional qualifiers - * after each `*`), wrapping `base` in successive pointer types. Qualifiers - * after a `*` qualify the pointer just produced (`int *const p` → p is a - * const-qualified pointer to int). */ -static const Type* parse_pointer_layer(Parser* p, const Type* base) { - while (accept_punct(p, '*')) { - u16 q = 0; - base = type_ptr(p->pool, base); - for (;;) { - if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } - if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } - if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } - if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } - /* Pointer-layer attributes (e.g. `int * __attribute__((aligned(8))) p`). - * Phase 1 parses + drops. */ - if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } - break; - } - if (q) base = type_qualified(p->pool, base, q); - } - return base; -} - -/* Type-name (§6.7.7): specifier-qualifier-list (abstract-declarator)? - * The abstract declarator at this slice is just a `*` chain — array and - * function suffixes land in Phase 2. Used by sizeof / _Alignof / cast. */ -static const Type* parse_type_name(Parser* p) { - DeclSpecs specs; - Sym dummy_name = 0; - SrcLoc dummy_loc = {0, 0, 0}; - if (!parse_decl_specs(p, &specs)) { - perr(p, "expected type-name"); - } - /* Type-name accepts a full abstract declarator (pointer prefix + array - * and/or function suffixes); compound literals like `(int[]){...}` and - * casts like `(int (*)[3])` rely on this. */ - return parse_declarator_full(p, specs.type, /*allow_abstract=*/1, - &dummy_name, &dummy_loc); -} - -/* ============================================================ - * Literal parsing - * ============================================================ - * Integer literals are parsed by parse_int_literal (returns the value); - * the §6.4.4.1 type-by-suffix selection lives in int_literal_type and - * runs from parse_primary so cexpr / array-size paths that only need - * the value can ignore typing. Float literals share parse_float_literal - * and float_literal_type. */ -static i64 parse_int_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - i64 base = 10; - i64 acc = 0; - if (!s) perr(p, "bad numeric literal"); - if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - base = 16; - i = 2; - } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { - base = 2; - i = 2; - } else if (len >= 1 && s[0] == '0') { - base = 8; - i = 1; - } - for (; i < len; ++i) { - int c = (unsigned char)s[i]; - int dv; - /* Stop at suffix characters (u/U/l/L). */ - if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; - if (c >= '0' && c <= '9') - dv = c - '0'; - else if (c >= 'a' && c <= 'f') - dv = c - 'a' + 10; - else if (c >= 'A' && c <= 'F') - dv = c - 'A' + 10; - else - perr(p, "bad digit in numeric literal"); - if (dv >= base) perr(p, "digit out of range for base"); - acc = acc * base + dv; - } - return acc; -} - -/* §6.4.4.1 ¶5 — pick a TY_* tag for an integer constant from its - * suffix flags. Promotion-by-magnitude (e.g. an unsuffixed decimal that - * doesn't fit in `int` widening to `long`) is not modelled; corpus - * literals fit in their suffix family. */ -static const Type* int_literal_type(Parser* p, const Tok* t) { - int u = (t->flags & TF_INT_U) != 0; - int l = (t->flags & TF_INT_L) != 0; - int ll = (t->flags & TF_INT_LL) != 0; - TypeKind k; - if (ll) k = u ? TY_ULLONG : TY_LLONG; - else if (l) k = u ? TY_ULONG : TY_LONG; - else if (u) k = TY_UINT; - else k = TY_INT; - return type_prim(p->pool, k); -} - -/* Decimal/hex float-literal parser. Allowed source forms (§6.4.4.2): - * decimal: digits[.digits][e[+-]digits] - * hex: 0x hexdigits[.hexdigits][p[+-]digits] - * Either part of a fractional pair may be empty (`1.`, `.5`); the - * exponent is required for hex floats per the standard but accepted - * without here for resilience. The result type is selected by the - * f/F/l/L suffix and returned via `*ty_out`. */ -static double parse_float_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - int is_hex = 0; - double v = 0.0; - int exp = 0; /* additional power of base from fractional digits */ - int dec_exp = 0; /* explicit exponent (decimal: pow10; hex: pow2) */ - int frac_seen = 0; - if (!s) perr(p, "bad float literal"); - if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - is_hex = 1; - i = 2; - } - /* Integer part. */ - while (i < len) { - int c = (unsigned char)s[i]; - int dv; - if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' || - c == 'f' || c == 'F' || c == 'l' || c == 'L') - break; - if (c >= '0' && c <= '9') dv = c - '0'; - else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; - else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; - else perr(p, "bad digit in float literal"); - v = v * (is_hex ? 16.0 : 10.0) + (double)dv; - i++; - } - /* Fractional part. */ - if (i < len && s[i] == '.') { - i++; - while (i < len) { - int c = (unsigned char)s[i]; - int dv; - if (c == 'e' || c == 'E' || c == 'p' || c == 'P' || - c == 'f' || c == 'F' || c == 'l' || c == 'L') - break; - if (c >= '0' && c <= '9') dv = c - '0'; - else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; - else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; - else perr(p, "bad digit in float literal"); - v = v * (is_hex ? 16.0 : 10.0) + (double)dv; - exp -= 1; - frac_seen = 1; - i++; - } - } - (void)frac_seen; - /* Explicit exponent. Decimal uses e/E and base 10; hex uses p/P and base 2 - * applied to the (already-scaled) hex significand. */ - if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) { - int neg = 0; - int n = 0; - int hex_exp = (s[i] == 'p' || s[i] == 'P'); - i++; - if (i < len && (s[i] == '+' || s[i] == '-')) { - if (s[i] == '-') neg = 1; - i++; - } - while (i < len) { - int c = (unsigned char)s[i]; - if (c < '0' || c > '9') break; - n = n * 10 + (c - '0'); - i++; - } - dec_exp = neg ? -n : n; - if (hex_exp) { - /* For hex floats the explicit exponent is in base 2 and applies to - * the significand interpreted as the hex digits without the - * fractional adjustment we accumulated in `exp` (which is base-16 - * digits). Convert the base-16 fractional adjustment to base-2 by - * multiplying by 4, then combine with the explicit base-2 exp. */ - dec_exp += exp * 4; - exp = 0; - } - } - /* Apply the implicit fractional-digit exponent (decimal only — for hex - * we already folded `exp*4` into dec_exp above). */ - while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; } - while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; } - /* Apply the explicit exponent (base 10 for decimal, base 2 for hex). */ - if (is_hex) { - while (dec_exp < 0) { v /= 2.0; dec_exp++; } - while (dec_exp > 0) { v *= 2.0; dec_exp--; } - } else { - while (dec_exp < 0) { v /= 10.0; dec_exp++; } - while (dec_exp > 0) { v *= 10.0; dec_exp--; } - } - return v; -} - -static const Type* float_literal_type(Parser* p, const Tok* t) { - if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT); - if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE); - return type_prim(p->pool, TY_DOUBLE); -} - -/* ============================================================ - * Expressions — precedence climbing - * ============================================================ - * Pratt-style climber: each level consumes its operators and recurses - * into the next-tighter level. Each function leaves the result on the CG - * stack. The grammar follows C11 §6.5 top-down; only the productions - * needed by the spine are wired today, with the remaining ones marked - * with TODO comments at the call sites where they'll slot in. - * - * Level (loose → tight): - * assignment = `=` `+=` `-=` `*=` `/=` `%=` `&=` `|=` `^=` `<<=` `>>=` - * conditional = `? :` (TODO) - * logical_or = `||` (TODO) - * logical_and = `&&` (TODO) - * bit_or = `|` - * bit_xor = `^` - * bit_and = `&` - * equality = `==` `!=` - * relational = `<` `<=` `>` `>=` - * shift = `<<` `>>` - * additive = `+` `-` - * multiplicative = `*` `/` `%` - * cast = `(type) cast` (TODO) - * unary = `+ - ! ~ * & ++ --` `sizeof` (partial) - * postfix = `[] () . -> ++ --` (partial) - * primary = ident | num | `(` expr `)` | strlit | charlit - */ - -static void parse_expr(Parser* p); -static void parse_assign_expr(Parser* p); -static void parse_unary(Parser* p); -static void parse_postfix(Parser* p); - -/* Initializer entry points used by compound-literal lowering in parse_unary; - * the bodies live next to the rest of the initializer machinery further - * down. */ -typedef struct DeclSpecs DeclSpecs; -static const Type* complete_incomplete_array(Parser* p, const Type* ty); -static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, - const Type* ty); - -/* If the rvalue on top of the stack and the lvalue beneath it (the store - * target the parser is about to drive into cg_store) are both arithmetic - * but of different C types, emit the implicit §6.5.16.1 conversion the - * standard requires for `=`. Pointer/aggregate cases are the caller's - * responsibility — they don't need an arithmetic convert. */ -static void coerce_top_to_lvalue(Parser* p) { - const Type* src = cg_top_type(p->cg); - const Type* dst = cg_top2_type(p->cg); - if (!src || !dst || src == dst) return; - if (type_is_arith(src) && type_is_arith(dst)) { - cg_convert(p->cg, dst); - } -} - -/* Produce an rvalue on the stack. Three cases beyond the trivial scalar: - * - array lvalue: §6.3.2.1 array-to-pointer decay → take address, retag the - * resulting `T(*)[N]` as `T*` so subsequent ops see a pointer. - * - function lvalue: §6.3.2.1 function-to-pointer decay → take address; the - * type becomes `T(*)()` automatically because cg_addr wraps the operand - * type in TY_PTR. - * - scalar lvalue (LOCAL/GLOBAL/INDIRECT): cg_load. Idempotent on rvalues. */ -static void to_rvalue(Parser* p) { - const Type* t = cg_top_type(p->cg); - if (t) { - if (t->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem)); - return; - } - if (t->kind == TY_FUNC) { - cg_addr(p->cg); - return; - } - /* Aggregates do not load into a single scratch register — they are - * consumed by cg_call/cg_ret/struct-copy as addressable storage. The - * value stack already holds an lvalue (LOCAL/GLOBAL/INDIRECT) or the - * call return's hidden slot lvalue; leave it alone. */ - if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; - } - cg_load(p->cg); -} - -/* Decode one character (the first encoded code unit) from the token's - * spelling at offset `i`, advancing `*pi` past the consumed bytes. - * Handles the §6.4.4.4 escape sequences a freestanding compiler is - * required to recognize. */ -static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi, - SrcLoc loc) { - size_t i = *pi; - i64 v; - int c; - if (i >= len) compiler_panic(p->c, loc, "truncated character literal"); - if (s[i] != '\\') { - v = (unsigned char)s[i++]; - *pi = i; - return v; - } - /* Escape sequence. */ - i++; - if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal"); - c = (unsigned char)s[i++]; - switch (c) { - case 'n': v = '\n'; break; - case 't': v = '\t'; break; - case 'r': v = '\r'; break; - case 'b': v = '\b'; break; - case 'f': v = '\f'; break; - case 'v': v = '\v'; break; - case 'a': v = '\a'; break; - case '\\': v = '\\'; break; - case '\'': v = '\''; break; - case '"': v = '"'; break; - case '?': v = '?'; break; - case 'x': { - i64 hex = 0; - int any = 0; - while (i < len) { - int d = (unsigned char)s[i]; - int dv; - if (d >= '0' && d <= '9') dv = d - '0'; - else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10; - else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10; - else break; - hex = hex * 16 + dv; - any = 1; - i++; - } - if (!any) compiler_panic(p->c, loc, "\\x with no hex digits"); - v = hex & 0xff; - break; - } - default: - if (c >= '0' && c <= '7') { - i64 oct = c - '0'; - int n = 1; - while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') { - oct = oct * 8 + (s[i] - '0'); - i++; - n++; - } - v = oct & 0xff; - } else { - /* Unknown escape: implementation-defined; keep the literal byte. */ - v = c; - } - break; - } - *pi = i; - return v; -} - -static i64 decode_char_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - i64 v; - if (!s) perr(p, "bad char literal"); - /* Skip optional encoding prefix (`L`, `u`, `U`, `u8`). The flag bits - * tell us which one without re-parsing. */ - if (t->flags & TF_STR_U8) i = 2; - else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; - if (i >= len || s[i] != '\'') perr(p, "malformed character literal"); - i++; /* opening quote */ - if (i >= len || s[i] == '\'') perr(p, "empty character literal"); - v = decode_one_char(p, s, len, &i, t->loc); - /* Multi-character constants are valid C but undefined-implementation; - * the spine corpus only uses single-char constants. Diagnose extra - * source bytes before the closing quote conservatively. */ - if (i >= len || s[i] != '\'') { - perr(p, "multi-character constants are not supported"); - } - return v; -} - -/* Decode the content of a string-literal token (without the surrounding - * quotes / encoding prefix) into raw bytes. Returns a heap-allocated - * buffer of length `*nlen_out`; caller frees through the same heap. */ -static u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - Heap* h = p->c->env->heap; - u8* buf; - size_t k = 0; - if (!s) perr(p, "bad string literal"); - if (t->flags & TF_STR_U8) i = 2; - else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; - if (i >= len || s[i] != '"') perr(p, "malformed string literal"); - i++; - /* Conservative buffer: at most one byte per source byte, plus NUL. */ - buf = (u8*)h->alloc(h, len + 1, 1); - if (!buf) perr(p, "out of memory in string literal"); - while (i < len && s[i] != '"') { - i64 ch = decode_one_char(p, s, len, &i, t->loc); - buf[k++] = (u8)ch; - } - buf[k++] = 0; /* NUL terminator */ - *nlen_out = k; - return buf; -} - -/* Place decoded string bytes in .rodata and return an ObjSymId pointing at - * them. Used by string literals in primary. */ -static ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) { - ObjBuilder* ob = decl_obj(p->decls); - Sym secname = pool_intern_cstr(p->pool, ".rodata"); - ObjSecId sec = obj_section(ob, secname, SEC_RODATA, SF_ALLOC, 1u); - u32 base = obj_pos(ob, sec); - Sym lname; - ObjSymId sym; - char namebuf[32]; - static u32 counter; - /* Anonymous local symbol; the name is just for readability in objdump. */ - int wlen = 0; - u32 id = ++counter; - /* Tiny formatter — avoids stdio dependencies in the parser. */ - namebuf[wlen++] = '.'; - namebuf[wlen++] = 'L'; - namebuf[wlen++] = 'C'; - { - char digits[12]; - int dn = 0; - if (id == 0) digits[dn++] = '0'; - while (id) { - digits[dn++] = (char)('0' + (id % 10)); - id /= 10; - } - while (dn) namebuf[wlen++] = digits[--dn]; - } - namebuf[wlen] = 0; - lname = pool_intern(p->pool, namebuf, (size_t)wlen); - sym = obj_symbol(ob, lname, SB_LOCAL, SK_OBJ, sec, base, n); - { - u8* dst = obj_reserve(ob, sec, n); - if (dst) memcpy(dst, bytes, n); - } - return sym; -} - -/* Phase 9 — Builtins. - * - * `__builtin_*` and `__atomic_*` calls are not ordinary function references: - * they don't go through scope_lookup / cg_call. Instead the parser dispatches - * the name to a per-builtin handler that consumes the argument list and - * emits the corresponding cg primitive (cg_alloca, cg_va_*, cg_atomic_*) or - * folds the call to a constant (`__builtin_offsetof`, `__builtin_expect`). - * - * Pre: p->cur is TOK_IDENT for `name`, and peek1() is `(`. (The caller's - * responsibility — try_parse_builtin_call assumes both checks have run.) - * Returns 1 if the name matched a known builtin (token stream advanced - * past the closing `)`); 0 if not (no tokens consumed). - * - * Stack discipline mirrors a normal call: a non-void builtin leaves its - * result rvalue on the stack; a void-returning one (va_start/end/copy, - * atomic_store) pushes the same int-0 sentinel parse_postfix uses for - * void calls so higher levels never underflow. */ -static int try_parse_builtin_call(Parser* p); - -/* Walk a `__builtin_offsetof` member-designator (`.field` / `[index]` chain) - * starting from `rec` (a struct/union/array). Adds offsets into `*off` and - * descends through nested aggregates. Returns the leaf type. The first - * step must be a field name (`.` is implicit per §7.18); subsequent steps - * may be either form. */ -static const Type* offsetof_designator(Parser* p, const Type* base, - u32* off) { - const Type* cur = base; - /* First member name — required, no leading `.` per the macro contract - * but we accept the GCC form `,member-designator` directly here, which - * is `.name` written as just `name` for the leading element. */ - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name in __builtin_offsetof"); - } - for (;;) { - if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) { - Sym mname = p->cur.v.ident; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - if (!find_field(p->abi, cur, mname, &mty, &moff, &mf)) { - perr(p, "no such member in __builtin_offsetof"); - } - advance(p); - *off += moff; - cur = mty; - } else if (cur->kind == TY_ARRAY) { - /* `[index]` step — fall through to the bracket branch below. */ - } else { - perr(p, "__builtin_offsetof step into non-aggregate"); - } - /* Optional continuation: `.field` or `[index]`. */ - if (is_punct(&p->cur, '.')) { - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '.'"); - } - continue; - } - if (is_punct(&p->cur, '[')) { - advance(p); - i64 idx = eval_const_int(p, p->cur.loc); - expect_punct(p, ']', "']' in __builtin_offsetof"); - if (cur->kind != TY_ARRAY) { - perr(p, "__builtin_offsetof '[' on non-array"); - } - *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx); - cur = cur->arr.elem; - continue; - } - break; - } - return cur; -} - -static int try_parse_builtin_call(Parser* p) { - Sym name = p->cur.v.ident; - SrcLoc loc = p->cur.loc; - - /* Common dispatch: only the names below match. Falling through means the - * IDENT is not a builtin and parse_primary should resolve it normally. */ - if (name != p->sym_b_alloca && name != p->sym_b_ctz && - name != p->sym_b_expect && - name != p->sym_b_offsetof && name != p->sym_b_va_start && - name != p->sym_b_va_arg && name != p->sym_b_va_end && - name != p->sym_b_va_copy && name != p->sym_a_load_n && - name != p->sym_a_store_n && name != p->sym_a_exchange_n && - name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub && - name != p->sym_a_fetch_and && name != p->sym_a_fetch_or && - name != p->sym_a_fetch_xor && name != p->sym_a_cas_n && - name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) { - return 0; - } - advance(p); /* IDENT */ - expect_punct(p, '(', "'(' after builtin"); - - if (name == p->sym_b_offsetof) { - /* `__builtin_offsetof(type, member-designator)` — fold to a size_t - * constant. The type is the aggregate root; the designator chain - * accumulates field/element offsets. */ - const Type* root = parse_type_name(p); - expect_punct(p, ',', "',' in __builtin_offsetof"); - u32 off = 0; - (void)offsetof_designator(p, root, &off); - expect_punct(p, ')', "')' after __builtin_offsetof"); - cg_push_int(p->cg, (i64)off, ty_size_t(p)); - return 1; - } - - if (name == p->sym_b_expect) { - /* `__builtin_expect(expr, hint)` — value of the call is `expr`; the - * hint is evaluated for side effects (none in practice) then dropped. - * No backend-side intrinsic is needed for the corpus. */ - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __builtin_expect"); - parse_assign_expr(p); - cg_drop(p->cg); - expect_punct(p, ')', "')' after __builtin_expect"); - return 1; - } - - if (name == p->sym_b_alloca) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after __builtin_alloca"); - cg_set_loc(p->cg, loc); - cg_alloca(p->cg); - return 1; - } - - if (name == p->sym_b_ctz) { - /* __builtin_ctz(unsigned) — count trailing zeros, result `int`. - * UB when arg is 0; the inline lowering produces the arch's natural - * result for that case (typically the operand width). */ - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after __builtin_ctz"); - cg_set_loc(p->cg, loc); - cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ); - return 1; - } - - if (name == p->sym_b_va_start) { - /* `__builtin_va_start(ap, last)` — push &ap, parse-and-drop `last` - * (its name is required by C but the runtime impl ignores it). */ - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_start"); - parse_assign_expr(p); - cg_drop(p->cg); - expect_punct(p, ')', "')' after __builtin_va_start"); - cg_set_loc(p->cg, loc); - cg_va_start_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); /* void-call sentinel */ - return 1; - } - - if (name == p->sym_b_va_end) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ')', "')' after __builtin_va_end"); - cg_set_loc(p->cg, loc); - cg_va_end_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_copy) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_copy"); - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ')', "')' after __builtin_va_copy"); - cg_set_loc(p->cg, loc); - cg_va_copy_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_arg) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_arg"); - const Type* ty = parse_type_name(p); - expect_punct(p, ')', "')' after __builtin_va_arg"); - cg_set_loc(p->cg, loc); - cg_va_arg_(p->cg, ty); - return 1; - } - - if (name == p->sym_a_load_n) { - /* `__atomic_load_n(ptr, order)`. The order must be a constant - * matching one of the predefined `__ATOMIC_*` values (Phase 9 - * predefines 0–5 to align with the MemOrder enum). */ - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_load_n"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_load_n"); - cg_set_loc(p->cg, loc); - cg_atomic_load(p->cg, (MemOrder)ord); - return 1; - } - - if (name == p->sym_a_store_n) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_store_n"); - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_store_n"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_store_n"); - cg_set_loc(p->cg, loc); - cg_atomic_store(p->cg, (MemOrder)ord); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) { - /* `__atomic_thread_fence(order)` / `__atomic_signal_fence(order)`. - * Both consume an order constant. signal_fence is a compiler barrier - * only; on real arches we conservatively lower it the same as - * thread_fence (the backend's fence emits DMB ISH). */ - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after atomic fence"); - cg_set_loc(p->cg, loc); - cg_fence(p->cg, (MemOrder)ord); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_a_cas_n) { - /* `__atomic_compare_exchange_n(ptr, &expected, desired, weak, - * success_order, failure_order)`. - * On match: stores `desired` at *ptr; returns 1. - * On mismatch: stores *ptr (the prior value) at *expected; returns 0. - * - * Strategy: pin &expected to a local, lower the CAS to [prior, ok] - * via cg, save both to locals, conditionally store prior to *expected - * on the failure branch, then push ok as the i32 result. Routing - * through frame slots keeps the value stack balanced across the - * conditional. */ - parse_assign_expr(p); to_rvalue(p); /* ptr */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - - parse_assign_expr(p); to_rvalue(p); /* &expected */ - const Type* eptr_ty = cg_top_type(p->cg); - if (!eptr_ty || eptr_ty->kind != TY_PTR) { - perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer"); - } - const Type* val_ty = eptr_ty->ptr.pointee; - - /* Stash &expected. */ - FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd); - fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL; - FrameSlot eslot = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_swap(p->cg); /* [ptr, eslot_lv, &expected] */ - cg_store(p->cg); cg_drop(p->cg); /* [ptr] */ - - /* Load expected_val = *expected. */ - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_load(p->cg); - cg_deref(p->cg, val_ty); - cg_load(p->cg); /* [ptr, expected_val] */ - - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - parse_assign_expr(p); to_rvalue(p); /* desired */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - - /* Stack: [ptr, expected_val, desired]. */ - (void)eval_const_int(p, p->cur.loc); /* weak (ignored — strong CAS) */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - i64 succ = eval_const_int(p, p->cur.loc); - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - i64 fail = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_compare_exchange_n"); - - cg_set_loc(p->cg, loc); - cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail); - /* Stack: [prior, ok]. */ - - /* Stash ok. */ - const Type* ok_ty = cg_top_type(p->cg); - FrameSlotDesc okd; memset(&okd, 0, sizeof okd); - okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL; - FrameSlot okslot = cg_local(p->cg, &okd); - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); /* [prior] */ - - /* Stash prior. */ - FrameSlotDesc pd; memset(&pd, 0, sizeof pd); - pd.type = val_ty; - pd.size = abi_sizeof(p->abi, val_ty); - pd.align = abi_alignof(p->abi, val_ty); - pd.kind = FS_LOCAL; - FrameSlot pslot = cg_local(p->cg, &pd); - cg_push_local_typed(p->cg, pslot, val_ty); - cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); /* [] */ - - /* if (!ok) *expected = prior; */ - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_load(p->cg); - CGLabel L_done = cg_label_new(p->cg); - cg_branch_true(p->cg, L_done); - /* writeback */ - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_load(p->cg); - cg_deref(p->cg, val_ty); - cg_push_local_typed(p->cg, pslot, val_ty); - cg_load(p->cg); - cg_store(p->cg); cg_drop(p->cg); - cg_label_place(p->cg, L_done); - - /* Push ok as the i32 result. */ - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_load(p->cg); - return 1; - } - - /* The rmw family — exchange / fetch_{add,sub,and,or,xor} share the same - * (ptr, val, order) shape; map name → AtomicOp. */ - AtomicOp op; - if (name == p->sym_a_exchange_n) op = AO_XCHG; - else if (name == p->sym_a_fetch_add) op = AO_ADD; - else if (name == p->sym_a_fetch_sub) op = AO_SUB; - else if (name == p->sym_a_fetch_and) op = AO_AND; - else if (name == p->sym_a_fetch_or) op = AO_OR; - else if (name == p->sym_a_fetch_xor) op = AO_XOR; - else { perr(p, "internal: unhandled builtin"); } - - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in atomic builtin"); - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in atomic builtin"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after atomic builtin"); - cg_set_loc(p->cg, loc); - cg_atomic_rmw(p->cg, op, (MemOrder)ord); - return 1; -} - -static void parse_primary(Parser* p) { - Tok t = p->cur; - if (t.kind == TOK_NUM) { - i64 v = parse_int_literal(p, &t); - const Type* lty = int_literal_type(p, &t); - advance(p); - cg_push_int(p->cg, v, lty); - return; - } - if (t.kind == TOK_FLT) { - double v = parse_float_literal(p, &t); - const Type* lty = float_literal_type(p, &t); - advance(p); - cg_push_float(p->cg, v, lty); - return; - } - if (is_punct(&t, '(')) { - advance(p); - parse_expr(p); - expect_punct(p, ')', "')'"); - return; - } - if (t.kind == TOK_IDENT) { - SymEntry* e; - /* Reject keywords used as expressions. */ - if (ident_kw(p, t.v.ident) != KW_NONE) { - perr(p, "unexpected keyword in expression"); - } - /* Phase 9 — Builtins. Intercepted before scope_lookup because they - * have no SymEntry: `__builtin_*` and `__atomic_*` followed by `(` - * route to dedicated cg primitives (or fold to constants) instead - * of going through cg_call. */ - { - Tok n = peek1(p); - if (is_punct(&n, '(') && try_parse_builtin_call(p)) return; - } - e = scope_lookup(p, t.v.ident); - if (!e) { - size_t nlen = 0; - const char* nm = pool_str(p->pool, t.v.ident, &nlen); - compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen, - nm ? nm : "?"); - } - advance(p); - switch (e->kind) { - case SEK_LOCAL: - cg_push_local_typed(p->cg, e->v.slot, e->type); - if (e->vla_byte_slot != FRAME_SLOT_NONE) { - p->last_pushed_vla_slot = e->vla_byte_slot; - } - return; - case SEK_GLOBAL: - case SEK_FUNC: - cg_push_global(p->cg, e->v.sym, e->type); - return; - case SEK_ENUM_CST: - cg_push_int(p->cg, e->v.enum_value, e->type); - return; - case SEK_TYPEDEF: - default: - perr(p, "identifier is not a value"); - } - } - if (t.kind == TOK_CHR) { - i64 v = decode_char_literal(p, &t); - advance(p); - cg_push_int(p->cg, v, ty_int(p)); - return; - } - if (t.kind == TOK_STR) { - /* Per §6.4.5 ¶6 the literal has type `char[N]` (N includes the NUL). - * Push it as a GLOBAL lvalue of that array type so sizeof/_Alignof see - * the array, subscripting computes the right element offset, and the - * normal array-to-pointer decay in to_rvalue / parse_postfix '[' kicks - * in everywhere else. */ - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - ObjSymId sym = emit_string_to_rodata(p, bytes, n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); - { - const Type* char_ty = type_prim(p->pool, TY_CHAR); - const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0); - cg_push_global(p->cg, sym, arr_ty); - } - return; - } - perr(p, "expected expression"); -} - -static void parse_postfix(Parser* p) { - parse_primary(p); - for (;;) { - Tok t = p->cur; - if (is_punct(&t, P_INC)) { - advance(p); - cg_inc_dec(p->cg, BO_IADD, /*post=*/1); - continue; - } - if (is_punct(&t, P_DEC)) { - advance(p); - cg_inc_dec(p->cg, BO_ISUB, /*post=*/1); - continue; - } - if (is_punct(&t, '(')) { - /* Function call. The callee was pushed by parse_primary as a function - * lvalue (OPK_GLOBAL when SEK_FUNC); a function-pointer callee is also - * accepted (TY_PTR-to-TY_FUNC) — load it to a register and indirect. */ - const Type* top = cg_top_type(p->cg); - const Type* fn_type; - if (top && top->kind == TY_FUNC) { - fn_type = top; - } else if (top && top->kind == TY_PTR && top->ptr.pointee && - top->ptr.pointee->kind == TY_FUNC) { - fn_type = top->ptr.pointee; - /* Materialize the pointer rvalue (cg_call's force_reg fallback would - * also do this, but doing it here keeps the invariant that the value - * stack settles to a register before argument evaluation starts). */ - cg_load(p->cg); - } else { - perr(p, "called object is not a function"); - } - advance(p); /* '(' */ - u32 nargs = 0; - if (!is_punct(&p->cur, ')')) { - for (;;) { - parse_assign_expr(p); - to_rvalue(p); - ++nargs; - if (!accept_punct(p, ',')) break; - } - } - expect_punct(p, ')', "')' after argument list"); - if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) { - perr(p, "wrong number of arguments"); - } - if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) { - perr(p, "too few arguments to variadic function"); - } - cg_call(p->cg, nargs, fn_type); - /* cg_call leaves nothing on the stack for void-returning functions. - * Higher-level expression machinery (drop in stmt context, dispatch - * inside ternary, etc.) expects a top SValue, so push a sentinel - * int 0. Using the value of a void-returning call is invalid C; the - * sentinel just keeps stack discipline so the parser doesn't - * underflow on `f();` style statements. */ - if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) { - cg_push_int(p->cg, 0, ty_int(p)); - } - continue; - } - if (is_punct(&t, '[')) { - /* Subscript `e1[e2]` is `*((e1) + (e2))` per §6.5.2.1. We resolve the - * pointer side after parsing the index so the commutative `i[a]` form - * (where the bracketed side is the pointer/array) falls out naturally. */ - const Type* lt0 = cg_top_type(p->cg); - advance(p); /* '[' */ - /* If the left operand is an array/pointer, decay/load to get a pointer - * rvalue. Integer base is left alone — we'll commute below if needed. */ - if (lt0 && lt0->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem)); - } else if (lt0 && lt0->kind == TY_PTR) { - cg_load(p->cg); - } - parse_expr(p); - /* Decay/load the index side similarly. */ - { - const Type* it0 = cg_top_type(p->cg); - if (it0 && it0->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem)); - } else { - to_rvalue(p); - } - } - expect_punct(p, ']', "']' after subscript"); - { - const Type* lt = cg_top2_type(p->cg); - const Type* it = cg_top_type(p->cg); - const Type* elem; - if (lt && lt->kind == TY_PTR && type_is_int(it)) { - elem = lt->ptr.pointee; - } else if (it && it->kind == TY_PTR && type_is_int(lt)) { - /* Commute so the pointer is on the bottom for the add below. */ - cg_swap(p->cg); - elem = it->ptr.pointee; - } else { - perr(p, "invalid subscript: needs one pointer and one integer"); - } - if (!elem) perr(p, "subscript on incomplete pointee"); - u32 esz = abi_sizeof(p->abi, elem); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - cg_deref(p->cg, elem); - } - continue; - } - if (is_punct(&t, '.')) { - /* `e.member` — `e` must be an lvalue of struct/union type. The result - * is an lvalue of the field's type with the same address category as - * the parent (LOCAL/GLOBAL/INDIRECT all collapse to INDIRECT once we - * take the address). Anonymous aggregate members are flattened by - * find_field. */ - const Type* lt = cg_top_type(p->cg); - Sym mname; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - advance(p); /* '.' */ - if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) { - perr(p, "request for member in something that is not a struct or union"); - } - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '.'"); - } - mname = p->cur.v.ident; - advance(p); - if (!find_field(p->abi, lt, mname, &mty, &moff, &mf)) { - perr(p, "no such member"); - } - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, mty)); - if (moff > 0) { - cg_push_int(p->cg, (i64)moff, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, mty); - continue; - } - if (is_punct(&t, P_ARROW)) { - /* `e->member` — `e` must be a pointer to struct/union. */ - const Type* lt0; - const Type* rec_ty; - Sym mname; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - advance(p); /* `->` */ - to_rvalue(p); - lt0 = cg_top_type(p->cg); - if (!lt0 || lt0->kind != TY_PTR) { - perr(p, "'->' requires a pointer operand"); - } - rec_ty = lt0->ptr.pointee; - if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) { - perr(p, "'->' on pointer to non-struct/union"); - } - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '->'"); - } - mname = p->cur.v.ident; - advance(p); - if (!find_field(p->abi, rec_ty, mname, &mty, &moff, &mf)) { - perr(p, "no such member"); - } - if (moff > 0) { - cg_push_int(p->cg, (i64)moff, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, mty); - continue; - } - break; - } -} - -/* sizeof / _Alignof and cast all parse a type-name from inside parentheses; - * detection at `(` requires looking past the opening paren. The work is the - * same: dispatch on what comes next. */ -static void parse_unary(Parser* p) { - Tok t = p->cur; - /* Cast expression `(type-name) cast`. Disambiguated against `(expr)` - * by checking the token immediately after `(`. */ - if (is_punct(&t, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - const Type* dst; - const Type* src; - advance(p); /* '(' */ - dst = parse_type_name(p); - expect_punct(p, ')', "')' after type-name"); - /* Compound literal `(type-name) { init-list }` per §6.5.2.5. The - * literal has automatic storage in the enclosing block (function - * scope here — same lifetime as a local). Allocate a hidden frame - * slot, parse the brace initializer into it, and push the slot's - * lvalue. Outer postfix/cast machinery handles array-to-pointer - * decay if the consumer expects an rvalue. */ - if (is_punct(&p->cur, '{')) { - FrameSlotDesc fsd; - FrameSlot slot; - const Type* lit_ty = dst; - if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) { - lit_ty = complete_incomplete_array(p, lit_ty); - } - memset(&fsd, 0, sizeof fsd); - fsd.type = lit_ty; - fsd.size = abi_sizeof(p->abi, lit_ty); - fsd.align = abi_alignof(p->abi, lit_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - slot = cg_local(p->cg, &fsd); - if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT || - lit_ty->kind == TY_UNION)) { - init_at(p, slot, lit_ty, 0, lit_ty); - } else { - /* Scalar compound literal `(int){42}`. */ - init_at(p, slot, lit_ty, 0, lit_ty); - } - cg_push_local_typed(p->cg, slot, lit_ty); - return; - } - parse_unary(p); /* cast-expression */ - to_rvalue(p); - /* `(void) expr` is the C idiom for "discard the value"; we must not - * convert (no value to materialize) — drop the rvalue and push - * nothing. The corpus relies on this for `(void)42;` style stmts. */ - if (dst && dst->kind == TY_VOID) { - cg_drop(p->cg); - /* Leave nothing on stack. parse_stmt's expression-stmt path drops - * the result; our caller is parse_unary, so leave the stack - * exactly empty and synthesize a sentinel int 0 to keep value- - * stack discipline (so to_rvalue from a higher level still has - * a top). The expression `(void)e` cannot appear where a value - * is required, so this is dead-but-harmless. */ - cg_push_int(p->cg, 0, ty_int(p)); - return; - } - src = cg_top_type(p->cg); - /* Pointer-to-pointer cast is a no-op at the value level once the - * pointer is already in a register. Skip cg_convert (which would - * dispatch to the backend's same-class bitcast, not implemented for - * register-resident pointers). Update the SValue's type so later - * dereference picks the right pointee — easiest done by re-pushing - * with the new type. */ - if (src && src->kind == TY_PTR && dst->kind == TY_PTR) { - cg_retag_top(p->cg, dst); - return; - } - cg_convert(p->cg, dst); - return; - } - /* fall through to parse_postfix → parse_primary which handles `(expr)`. */ - } - if (is_punct(&t, '+')) { - advance(p); - parse_unary(p); - to_rvalue(p); - return; - } - if (is_punct(&t, '-')) { - advance(p); - parse_unary(p); - to_rvalue(p); - cg_unop(p->cg, UO_NEG); - return; - } - if (is_punct(&t, '!')) { - advance(p); - parse_unary(p); - to_rvalue(p); - /* Logical not via cmp == 0. */ - cg_push_int(p->cg, 0, ty_int(p)); - cg_cmp(p->cg, CMP_EQ); - return; - } - if (is_punct(&t, '~')) { - advance(p); - parse_unary(p); - to_rvalue(p); - cg_unop(p->cg, UO_BNOT); - return; - } - if (is_punct(&t, '&')) { - advance(p); - parse_unary(p); - /* The operand is required to be an lvalue; cg_addr panics otherwise. */ - cg_addr(p->cg); - return; - } - if (is_punct(&t, '*')) { - /* Dereference: parse the operand, force to a pointer rvalue, then - * derive the INDIRECT lvalue. The pointee type drives the next access. */ - const Type* pty; - const Type* pointee; - advance(p); - parse_unary(p); - to_rvalue(p); - pty = cg_top_type(p->cg); - if (!pty || pty->kind != TY_PTR) { - perr(p, "indirection requires pointer operand"); - } - pointee = pty->ptr.pointee; - if (pointee && pointee->kind == TY_VOID) { - perr(p, "dereferencing pointer to incomplete type"); - } - cg_deref(p->cg, pointee); - return; - } - if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) { - BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB; - advance(p); - parse_unary(p); - cg_inc_dec(p->cg, bop, /*post=*/0); - return; - } - if (is_kw(p, &t, KW_SIZEOF)) { - /* sizeof has two forms: `sizeof ( type-name )` and `sizeof unary`. - * The expression form must NOT evaluate its operand (per §6.5.3.4), - * which is awkward in single-pass codegen. The Phase 1 corpus only - * needs `sizeof(type-name)` and `sizeof(IDENT)` where IDENT is a - * declared object — both reducible to a type lookup with no - * emission. Other expression forms are diagnosed. */ - const Type* ty = NULL; - FrameSlot vla_slot = FRAME_SLOT_NONE; - advance(p); - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - ty = parse_type_name(p); - expect_punct(p, ')', "')'"); - } else { - /* `sizeof ( expression )` — parenthesized expression form. Fall - * through to the unary path so paren-primary handles the `(`. */ - p->last_pushed_vla_slot = FRAME_SLOT_NONE; - parse_unary(p); - ty = cg_top_type(p->cg); - vla_slot = p->last_pushed_vla_slot; - cg_drop(p->cg); - } - } else { - /* `sizeof unary-expression`: §6.5.3.4 says the operand is not - * evaluated. We parse it through the regular unary path and grab - * its type from the cg stack, then drop. lvalues stay as lvalues - * (no load is emitted) so for the corpus shapes (array, subscript, - * member access) this is side-effect-free. VLA operands need - * actual evaluation and are deferred. */ - p->last_pushed_vla_slot = FRAME_SLOT_NONE; - parse_unary(p); - ty = cg_top_type(p->cg); - vla_slot = p->last_pushed_vla_slot; - cg_drop(p->cg); - } - if (vla_slot != FRAME_SLOT_NONE) { - /* sizeof on a VLA-bound IDENT: emit the runtime byte-size load - * instead of the constant pointer width. */ - cg_push_local_typed(p->cg, vla_slot, ty_size_t(p)); - cg_load(p->cg); - } else { - cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p)); - } - return; - } - if (is_kw(p, &t, KW_GENERIC)) { - /* `_Generic ( controlling-expr , generic-association+ )` - * - * §6.5.1.1: the controlling-expression is not evaluated. Single-pass - * codegen makes that awkward — we instead evaluate it (cheap when the - * spine has no side-effecting operands), drop the value, and then - * emit code only for the matching association. Non-matching - * associations are token-skipped with paren/bracket/brace balancing - * so their assignment-expressions don't run. */ - advance(p); - expect_punct(p, '(', "'('"); - parse_assign_expr(p); - to_rvalue(p); - const Type* ctl_ty = cg_top_type(p->cg); - cg_drop(p->cg); - expect_punct(p, ',', "','"); - /* Walk associations. Track default position for use if no type - * matches. We need to be able to "rewind" — but the parser is - * single-pass, so the strategy is: first, scan associations once, - * recording the offset of each (in token bytes). We can't rewind - * tokens cheaply, so a different strategy: walk left-to-right, - * skipping non-matching assoc-exprs by token-balancing; on the - * first match, parse-and-emit the assoc-expr; on subsequent - * associations after a match, skip. If no match, fall back to - * default at end (we have to remember whether we saw default and - * its tokens are gone — so we record default position by buffering - * the default's sub-expr tokens... actually simplest: scan once, - * skipping every assoc-expr (no codegen), recording the matched - * one's parse position. Pp doesn't support rewind cheaply. - * - * Workable trick: since the parser is recursive-descent, we can - * "peek" tokens by reading until we find the matching assoc, then - * parse it once we're inside it. But that requires per-token - * lookahead beyond what `peek1` offers. Practical compromise: walk - * once, parsing the FIRST matching association inline (emitting - * code there), then skipping the rest. If we hit `default:` first - * before any match, buffer-skip and remember its location is - * impossible. So scan twice: pass 1 collects assoc types and - * positions (no codegen), pass 2 parses the chosen one. To do - * this we'd need a mark-and-rewind on the pp stream. - * - * Phase-3 pragmatic implementation: walk once. Parse each assoc- - * expr unconditionally into a no-op buffer when its type doesn't - * match — but again, we don't have a no-op codegen path. - * - * Workable compromise that covers the corpus row - * `_Generic((x), int: x, default: 0)`: walk associations - * left-to-right. For each: - * - Parse the assoc type-name (or `default`). - * - If we have not yet emitted a result and this assoc matches - * (or is default and we're at the end without a prior match), - * parse the assoc expression and emit. Otherwise skip the - * assoc-expr by balanced token count. - * - The default is held back until after a non-default scan. - * Without a real rewind, we instead make a single pass that - * remembers whether default has appeared, and on no match - * panics with a directive that the corpus row doesn't trigger. - * - * For the corpus row the controlling expr is `int`, the first - * association is `int:`, so the first-match path is hit before - * default. */ - int emitted = 0; - /* Buffer for the `default:` association's expression tokens, so we - * can replay it if no typed association matches. Recording happens - * at most once (the C standard allows at most one default). The - * trailing `,` or `)` that ended the recording is included so the - * replayed parse_assign_expr stops cleanly at the same boundary. */ - Tok* default_buf = NULL; - u32 default_len = 0; - for (;;) { - const Type* assoc_ty = NULL; - int is_default = 0; - if (is_kw(p, &p->cur, KW_DEFAULT)) { - advance(p); - is_default = 1; - } else { - assoc_ty = parse_type_name(p); - } - expect_punct(p, ':', "':' in _Generic association"); - int take = 0; - if (!emitted && !is_default && ctl_ty && assoc_ty && - ctl_ty->kind == assoc_ty->kind) { - take = 1; - } - if (take) { - parse_assign_expr(p); - emitted = 1; - } else if (is_default && !default_buf) { - /* Record default's assoc-expr tokens for later replay. */ - u32 cap = 16; - Tok* buf = arena_array(p->c->tu, Tok, cap); - u32 len = 0; - int paren_depth = 0, brack_depth = 0, brace_depth = 0; - while (p->cur.kind != TOK_EOF) { - if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { - if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; - } - if (len == cap) { - u32 new_cap = cap * 2; - Tok* nv = arena_array(p->c->tu, Tok, new_cap); - if (!nv) perr(p, "out of memory recording _Generic default"); - memcpy(nv, buf, len * sizeof(Tok)); - buf = nv; - cap = new_cap; - } - buf[len++] = p->cur; - if (is_punct(&p->cur, '(')) ++paren_depth; - else if (is_punct(&p->cur, ')')) --paren_depth; - else if (is_punct(&p->cur, '[')) ++brack_depth; - else if (is_punct(&p->cur, ']')) --brack_depth; - else if (is_punct(&p->cur, '{')) ++brace_depth; - else if (is_punct(&p->cur, '}')) --brace_depth; - advance(p); - } - /* Append a sentinel `,` so the replayed parse_assign_expr - * stops cleanly without falling through to pp_next. */ - if (len == cap) { - u32 new_cap = cap + 1; - Tok* nv = arena_array(p->c->tu, Tok, new_cap); - if (!nv) perr(p, "out of memory recording _Generic default"); - memcpy(nv, buf, len * sizeof(Tok)); - buf = nv; - cap = new_cap; - } - memset(&buf[len], 0, sizeof(Tok)); - buf[len].kind = TOK_PUNCT; - buf[len].v.punct = ','; - ++len; - default_buf = buf; - default_len = len; - } else { - /* Skip assoc-expr by token-balancing. */ - int paren_depth = 0; - int brack_depth = 0; - int brace_depth = 0; - while (p->cur.kind != TOK_EOF) { - if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { - if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; - } - if (is_punct(&p->cur, '(')) ++paren_depth; - else if (is_punct(&p->cur, ')')) --paren_depth; - else if (is_punct(&p->cur, '[')) ++brack_depth; - else if (is_punct(&p->cur, ']')) --brack_depth; - else if (is_punct(&p->cur, '{')) ++brace_depth; - else if (is_punct(&p->cur, '}')) --brace_depth; - advance(p); - } - } - if (!accept_punct(p, ',')) break; - } - if (!emitted && default_buf) { - /* No typed association matched; replay the default's recorded - * assoc-expr through the replay buffer, then resume the original - * stream at the `)`. */ - Tok* save_replay = p->replay; - u32 save_cap = p->replay_cap; - u32 save_len = p->replay_len; - u32 save_pos = p->replay_pos; - u8 save_active = p->replay_active; - Tok save_cur = p->cur; - int save_has_next = p->has_next; - p->replay = default_buf; - p->replay_cap = default_len; - p->replay_len = default_len; - p->replay_pos = 1; - p->replay_active = 1; - p->cur = default_buf[0]; - p->has_next = 0; - parse_assign_expr(p); - emitted = 1; - /* Restore the outer stream — we don't consume the trailing - * sentinel `,` from the recorded buffer; callers expect cur = `)` - * after the loop. */ - p->replay = save_replay; - p->replay_cap = save_cap; - p->replay_len = save_len; - p->replay_pos = save_pos; - p->replay_active = save_active; - p->cur = save_cur; - p->has_next = save_has_next; - } - expect_punct(p, ')', "')' after _Generic"); - if (!emitted) { - perr(p, "_Generic: no association matched and no default present"); - } - return; - } - if (is_kw(p, &t, KW_ALIGNOF)) { - /* `_Alignof ( type-name )` per §6.5.3.4 ¶1. The GNU `__alignof__` - * alias additionally accepts an expression operand, mirroring sizeof. - * Disambiguate at the `(`: type-name → parse_type_name; otherwise - * route through parse_unary, read the operand's type, drop. */ - const Type* ty; - advance(p); - expect_punct(p, '(', "'('"); - if (starts_type_name(p, &p->cur)) { - ty = parse_type_name(p); - } else { - parse_unary(p); - ty = cg_top_type(p->cg); - cg_drop(p->cg); - } - expect_punct(p, ')', "')'"); - cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p)); - return; - } - parse_postfix(p); - /* postfix may have left an lvalue or rvalue. Higher-level callers - * issue to_rvalue when they need the value. */ -} - -/* Binary operator levels: each takes a `next` pointer to the tighter level - * and a list of accepted operators with their codegen mapping. Inlined as - * a single function per level to keep the call graph readable. */ - -static int type_is_fp(const Type* t) { - return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE || - t->kind == TY_LDOUBLE); -} - -/* §6.3.1.8 usual arithmetic conversions (FP slice). When either operand - * is FP, both convert to the wider FP type. When both are integer, the - * caller's existing integer dispatch handles it. Returns the common - * arithmetic type, or NULL if the parser should fall through to integer - * dispatch. */ -static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) { - if (!type_is_fp(a) && !type_is_fp(b)) return NULL; - /* `long double` not yet wired through cg's FP path. */ - if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) { - return type_prim(p->pool, TY_LDOUBLE); - } - if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) { - return type_prim(p->pool, TY_DOUBLE); - } - return type_prim(p->pool, TY_FLOAT); -} - -/* Coerce the top two stack values to `common`, then dispatch the FP form - * of `bop` (BO_IADD→BO_FADD, etc.). */ -static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) { - /* Convert top (rhs) first; cg_convert pops+pushes, leaving stack - * shape unchanged. Then swap, convert lhs, swap back so [lhs, rhs] - * land in the right order for cg_binop. */ - if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); - cg_swap(p->cg); - if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); - cg_swap(p->cg); - BinOp fop; - switch (bop) { - case BO_IADD: fop = BO_FADD; break; - case BO_ISUB: fop = BO_FSUB; break; - case BO_IMUL: fop = BO_FMUL; break; - case BO_SDIV: fop = BO_FDIV; break; - default: - perr(p, "operator does not apply to floating types"); - return; - } - cg_binop(p->cg, fop); -} - -static void parse_mul(Parser* p) { - parse_unary(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, '*')) { - bop = BO_IMUL; - } else if (is_punct(&t, '/')) { - bop = BO_SDIV; - } else if (is_punct(&t, '%')) { - bop = BO_SREM; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_unary(p); - to_rvalue(p); - const Type* lt = cg_top2_type(p->cg); - const Type* rt = cg_top_type(p->cg); - const Type* common = common_fp_type(p, lt, rt); - if (common) { - emit_fp_binop(p, bop, common); - } else { - cg_binop(p->cg, bop); - } - } -} - -/* Apply C pointer arithmetic to the top two values on the stack: - * ptr + int → ptr + int * sizeof(*ptr) - * int + ptr → ptr + int * sizeof(*ptr) (commute, then scale) - * ptr - int → ptr - int * sizeof(*ptr) - * ptr - ptr → (ptr - ptr) / sizeof(*ptr) (ptrdiff_t result) - * int +/- int → integer add/sub - * Pops both operands and pushes the result. */ -static void emit_add_or_sub(Parser* p, BinOp bop) { - const Type* lt = cg_top2_type(p->cg); - const Type* rt = cg_top_type(p->cg); - int l_is_ptr = lt && lt->kind == TY_PTR; - int r_is_ptr = rt && rt->kind == TY_PTR; - if (bop == BO_IADD) { - if (l_is_ptr && type_is_int(rt)) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - return; - } - if (r_is_ptr && type_is_int(lt)) { - cg_swap(p->cg); - u32 esz = abi_sizeof(p->abi, rt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - return; - } - } else { /* BO_ISUB */ - if (l_is_ptr && type_is_int(rt)) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_ISUB); - return; - } - if (l_is_ptr && r_is_ptr) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - cg_binop(p->cg, BO_ISUB); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_SDIV); - } - return; - } - } - const Type* common = common_fp_type(p, lt, rt); - if (common) { - emit_fp_binop(p, bop, common); - return; - } - cg_binop(p->cg, bop); -} - -static void parse_add(Parser* p) { - parse_mul(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, '+')) { - bop = BO_IADD; - } else if (is_punct(&t, '-')) { - bop = BO_ISUB; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_mul(p); - to_rvalue(p); - emit_add_or_sub(p, bop); - } -} - -static void parse_shift(Parser* p) { - parse_add(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, P_SHL)) { - bop = BO_SHL; - } else if (is_punct(&t, P_SHR)) { - bop = BO_SHR_S; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_add(p); - to_rvalue(p); - cg_binop(p->cg, bop); - } -} - -static void parse_rel(Parser* p) { - parse_shift(p); - for (;;) { - Tok t = p->cur; - CmpOp cop; - if (is_punct(&t, '<')) { - cop = CMP_LT_S; - } else if (is_punct(&t, '>')) { - cop = CMP_GT_S; - } else if (is_punct(&t, P_LE)) { - cop = CMP_LE_S; - } else if (is_punct(&t, P_GE)) { - cop = CMP_GE_S; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_shift(p); - to_rvalue(p); - cg_cmp(p->cg, cop); - } -} - -static void parse_eq(Parser* p) { - parse_rel(p); - for (;;) { - Tok t = p->cur; - CmpOp cop; - if (is_punct(&t, P_EQ)) { - cop = CMP_EQ; - } else if (is_punct(&t, P_NE)) { - cop = CMP_NE; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_rel(p); - to_rvalue(p); - cg_cmp(p->cg, cop); - } -} - -static void parse_band(Parser* p) { - parse_eq(p); - while (is_punct(&p->cur, '&')) { - advance(p); - to_rvalue(p); - parse_eq(p); - to_rvalue(p); - cg_binop(p->cg, BO_AND); - } -} - -static void parse_bxor(Parser* p) { - parse_band(p); - while (is_punct(&p->cur, '^')) { - advance(p); - to_rvalue(p); - parse_band(p); - to_rvalue(p); - cg_binop(p->cg, BO_XOR); - } -} - -static void parse_bor(Parser* p) { - parse_bxor(p); - while (is_punct(&p->cur, '|')) { - advance(p); - to_rvalue(p); - parse_bxor(p); - to_rvalue(p); - cg_binop(p->cg, BO_OR); - } -} - -/* Logical && / || are short-circuiting: the right operand is evaluated - * only when the left does not already determine the result. We lower - * each as a label-driven branch sequence that materializes a 0/1 i32 - * result. Both produce an int rvalue regardless of operand types - * (per §6.5.13/14). - * - * a && b lowers to: a || b lowers to: - * <a>; jz Lfalse <a>; jnz Ltrue - * <b>; jz Lfalse <b>; jnz Ltrue - * store 1 → tmp; jmp Lend store 0 → tmp; jmp Lend - * Lfalse: store 0 → tmp Ltrue: store 1 → tmp - * Lend: load tmp Lend: load tmp - * - * The result is routed through a frame slot for the same reason ternary - * is: cg's abstract value stack is linear-flow only, so a naive push - * from each arm leaves two operands at the merge instead of one. */ -static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty; - fsd.size = abi_sizeof(p->abi, ty); - fsd.align = abi_alignof(p->abi, ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - return cg_local(p->cg, &fsd); -} - -static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) { - cg_push_local_typed(p->cg, tmp, ty); - cg_push_int(p->cg, v, ty); - cg_store(p->cg); - cg_drop(p->cg); -} - -static void parse_land(Parser* p) { - parse_bor(p); - while (is_punct(&p->cur, P_AND)) { - CGLabel L_false = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp = ll_tmp_slot(p, result_ty); - advance(p); - to_rvalue(p); - cg_branch_false(p->cg, L_false); - parse_bor(p); - to_rvalue(p); - cg_branch_false(p->cg, L_false); - ll_store_const(p, tmp, result_ty, 1); - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_false); - ll_store_const(p, tmp, result_ty, 0); - cg_label_place(p->cg, L_end); - cg_push_local_typed(p->cg, tmp, result_ty); - } -} - -static void parse_lor(Parser* p) { - parse_land(p); - while (is_punct(&p->cur, P_OR)) { - CGLabel L_true = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp = ll_tmp_slot(p, result_ty); - advance(p); - to_rvalue(p); - cg_branch_true(p->cg, L_true); - parse_land(p); - to_rvalue(p); - cg_branch_true(p->cg, L_true); - ll_store_const(p, tmp, result_ty, 0); - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_true); - ll_store_const(p, tmp, result_ty, 1); - cg_label_place(p->cg, L_end); - cg_push_local_typed(p->cg, tmp, result_ty); - } -} - -/* Ternary `c ? t : f`. The cg value stack is linear-flow only, so a naive - * "push from each arm" leaves the stack in an inconsistent state at the - * merge point. We materialize the result through a fresh local: each arm - * stores into the same slot, the merge label reloads. v1 picks the slot's - * type from the then-arm and assumes the else-arm is the same type - * (matches the §6.5.15 corpus rows; full usual-conversions rules slot in - * with Phase 7). - * - * `&&` / `||` use the same temp-slot merge pattern (see parse_land / - * parse_lor above); the ternary differs only in that its two arms are - * arbitrary expressions rather than the constant 0/1. */ -static void parse_ternary(Parser* p) { - parse_lor(p); - if (!is_punct(&p->cur, '?')) return; - CGLabel L_else = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp; - FrameSlotDesc fsd; - /* Pop the cond, branch on it. */ - advance(p); /* '?' */ - to_rvalue(p); - cg_branch_false(p->cg, L_else); - parse_assign_expr(p); - to_rvalue(p); - /* Update result_ty from the then-arm (a closer approximation than int). */ - result_ty = cg_top_type(p->cg); - if (!result_ty) result_ty = ty_int(p); - memset(&fsd, 0, sizeof fsd); - fsd.type = result_ty; - fsd.size = abi_sizeof(p->abi, result_ty); - fsd.align = abi_alignof(p->abi, result_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - tmp = cg_local(p->cg, &fsd); - /* Store then-arm value into tmp. cg_store needs [lv, rv]; the rvalue - * is already on top, so push the lvalue and swap. */ - cg_push_local_typed(p->cg, tmp, result_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); /* cg_store leaves the rvalue; drop in stmt-style usage */ - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_else); - expect_punct(p, ':', "':' in ternary"); - parse_assign_expr(p); - to_rvalue(p); - /* §6.5.15 ¶5 usual arithmetic conversions: if the else-arm's type - * differs from the slot type chosen from the then-arm, coerce so the - * store types line up. v1 only converts the else-arm down/up to match - * the then-arm; full common-type widening lives behind the buffered- - * arms rewrite that's still pending. */ - if (cg_top_type(p->cg) != result_ty) { - cg_convert(p->cg, result_ty); - } - cg_push_local_typed(p->cg, tmp, result_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - cg_label_place(p->cg, L_end); - /* At the merge, push the slot lvalue; callers can to_rvalue if needed. */ - cg_push_local_typed(p->cg, tmp, result_ty); -} - -static void parse_assign_expr(Parser* p) { - parse_ternary(p); - /* The LHS is now on the CG stack. If it's an lvalue we may consume it - * for assignment; otherwise we keep the rvalue as the final result. */ - Tok t = p->cur; - BinOp compound; - int is_simple_assign; - if (is_punct(&t, '=')) { - is_simple_assign = 1; - compound = (BinOp)0; - } else if (is_punct(&t, P_ADD_ASSIGN)) { - is_simple_assign = 0; compound = BO_IADD; - } else if (is_punct(&t, P_SUB_ASSIGN)) { - is_simple_assign = 0; compound = BO_ISUB; - } else if (is_punct(&t, P_MUL_ASSIGN)) { - is_simple_assign = 0; compound = BO_IMUL; - } else if (is_punct(&t, P_DIV_ASSIGN)) { - is_simple_assign = 0; compound = BO_SDIV; - } else if (is_punct(&t, P_MOD_ASSIGN)) { - is_simple_assign = 0; compound = BO_SREM; - } else if (is_punct(&t, P_AND_ASSIGN)) { - is_simple_assign = 0; compound = BO_AND; - } else if (is_punct(&t, P_OR_ASSIGN)) { - is_simple_assign = 0; compound = BO_OR; - } else if (is_punct(&t, P_XOR_ASSIGN)) { - is_simple_assign = 0; compound = BO_XOR; - } else if (is_punct(&t, P_SHL_ASSIGN)) { - is_simple_assign = 0; compound = BO_SHL; - } else if (is_punct(&t, P_SHR_ASSIGN)) { - is_simple_assign = 0; compound = BO_SHR_S; - } else { - return; - } - advance(p); - if (is_simple_assign) { - /* LHS lvalue is on stack. Parse RHS, store. The result of the - * assignment is the assigned value; for the spine we leave the stack - * empty after store (statement context), which is correct for - * `x = expr;` and for the for-init `i = 1` since the value is - * discarded. To support assignment-as-expression, we'd need to - * cg_dup the LHS first and re-load after store. */ - parse_assign_expr(p); - to_rvalue(p); - coerce_top_to_lvalue(p); - cg_store(p->cg); - return; - } - /* Compound: x += y → load x, compute, store. We need to keep the LHS - * lvalue and produce a new rvalue. Stack: [lv]. Sequence: - * dup [lv, lv] - * load [lv, x] - * parse RHS, rvalue [lv, x, y] - * binop [lv, x_op_y] - * store [] */ - cg_dup(p->cg); - cg_load(p->cg); - parse_assign_expr(p); - to_rvalue(p); - if (compound == BO_IADD || compound == BO_ISUB) { - /* `+=`/`-=` on a pointer needs the same scaling/decay as `+`/`-`. */ - emit_add_or_sub(p, compound); - } else { - cg_binop(p->cg, compound); - } - cg_store(p->cg); -} - -static void parse_expr(Parser* p) { - parse_assign_expr(p); - while (is_punct(&p->cur, ',')) { - advance(p); - /* Discard left, evaluate right. */ - cg_drop(p->cg); - parse_assign_expr(p); - } -} - -/* ============================================================ - * Declarations (slice: `int` / `void` / `char` only) - * ============================================================ - * DeclSpecs and parse_decl_specs are defined above (hoisted before the - * expression parsing section). What follows here is the declarator-and- - * initializer machinery built on top of them. */ - -/* Forward decl for parse_compound_stmt (mutually recursive with statement - * dispatch). */ -static void parse_stmt(Parser* p); -static void parse_compound_stmt(Parser* p); - -/* Allocate a frame slot for a local variable of `type` and bind `name` - * into the current scope. */ -static FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, - SrcLoc loc, u32 align_override) { - FrameSlotDesc fsd; - FrameSlot s; - SymEntry* e; - u32 nat = abi_alignof(p->abi, type); - memset(&fsd, 0, sizeof fsd); - fsd.type = type; - fsd.name = name; - fsd.loc = loc; - fsd.size = abi_sizeof(p->abi, type); - fsd.align = (align_override > nat) ? align_override : nat; - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - s = cg_local(p->cg, &fsd); - e = scope_define(p, name, SEK_LOCAL, type); - e->v.slot = s; - return s; -} - -static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { - return make_local_aligned(p, name, type, loc, 0); -} - -/* Forward decls for declarator components. */ -typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind; -typedef struct ParamInfo ParamInfo; -typedef struct DeclSuffix { - u8 kind; /* DSuffKind */ - /* DS_ARRAY */ - u32 count; /* element count; meaningful when !vla and !incomplete */ - u8 incomplete; /* true for `[]` (no size given) */ - u8 vla; /* true for `[expr]` with a non-constant size */ - /* When `vla` is set, the size expression has already been emitted and the - * resulting i64 (in bytes-of-elem-count) is held in this scratch slot. - * Materialized at suffix-parse time because the size expression's tokens - * are consumed there; init_declarator reads it back to drive cg_alloca. */ - FrameSlot vla_count_slot; - /* DS_FUNC */ - ParamInfo* params; - u16 nparams; - u8 variadic; -} DeclSuffix; - -typedef struct ParamInfo { - Sym name; - const Type* type; - SrcLoc loc; -} ParamInfo; - -static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, - u8* variadic_out); - -/* Parse a single trailing suffix (`[...]` or `(...)`) after a declarator's - * IDENT or parenthesized inner-declarator. Returns 1 if a suffix was consumed - * and filled into *out, 0 otherwise. */ -static int parse_decl_suffix(Parser* p, DeclSuffix* out) { - if (accept_punct(p, '[')) { - /* `[ qualifier* static? assignment-expression? ]` or `[ static qualifier* assign-expr ]`. - * Only constant integer expressions are accepted at this slice; non-constant - * sizes are VLA territory (Phase 9). */ - out->kind = DS_ARRAY; - out->count = 0; - out->incomplete = 0; - out->vla = 0; - /* Optional `static`/qualifiers before the size; recognized, no-op here. - * `[static N]` only changes parameter ABI hints (caller promises ≥N). */ - for (;;) { - if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) || - accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) || - accept_kw(p, KW_ATOMIC)) { - continue; - } - break; - } - if (accept_punct(p, ']')) { - out->incomplete = 1; - return 1; - } - /* Function-prototype parameter: any `[...]` decays to `T*` (§6.7.6.3 - * ¶7), so the size expression is unused. Consume tokens up to the - * matching `]` (handling `[*]`, `[n]`, nested brackets) and record - * the parameter as an incomplete array; the caller decays it to a - * pointer. */ - if (p->in_param_decl) { - int depth = 1; - while (depth > 0) { - if (p->cur.kind == TOK_EOF) { - perr(p, "unexpected EOF in parameter array bound"); - } - if (is_punct(&p->cur, '[')) ++depth; - else if (is_punct(&p->cur, ']')) { - --depth; - if (depth == 0) break; - } - advance(p); - } - out->incomplete = 1; - expect_punct(p, ']', "']' after array size"); - return 1; - } - /* Constant integer size: an expression starting with a numeric or - * character literal (or an enum constant) is routed through the - * constant evaluator so `[3+4]`, `[N*2]` etc. round-trip. Anything - * else kicks the suffix into VLA mode (§6.7.6.2 ¶4). */ - { - Tok t = p->cur; - int is_const_start = (t.kind == TOK_NUM || t.kind == TOK_CHR); - if (!is_const_start && t.kind == TOK_IDENT) { - SymEntry* e = scope_lookup(p, t.v.ident); - if (e && e->kind == SEK_ENUM_CST) is_const_start = 1; - if (!is_const_start) { - /* `sizeof` and `_Alignof` tokenize as identifiers but yield - * compile-time constants — admit them so `int a[_Alignof(T)]` - * lowers as a fixed-size array, not a VLA. */ - CKw k = ident_kw(p, t.v.ident); - if (k == KW_SIZEOF || k == KW_ALIGNOF) is_const_start = 1; - } - } - if (is_const_start) { - SrcLoc cloc = tok_loc(&p->cur); - i64 v = eval_const_int(p, cloc); - if (v < 0) perr(p, "negative array size"); - out->count = (u32)v; - } else { - /* VLA: emit the size-expression code now (the tokens go away after - * we return), spill its int value to a fresh i64 frame slot so - * init_declarator can pick it back up at the right time. */ - FrameSlotDesc fsd; - if (p->vla_pending) { - perr(p, "v1 supports only one VLA dimension per declarator"); - } - out->vla = 1; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty_size_t(p); - fsd.size = abi_sizeof(p->abi, fsd.type); - fsd.align = abi_alignof(p->abi, fsd.type); - fsd.kind = FS_LOCAL; - out->vla_count_slot = cg_local(p->cg, &fsd); - parse_assign_expr(p); - to_rvalue(p); - cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - p->vla_pending = 1; - p->vla_pending_count_slot = out->vla_count_slot; - } - } - expect_punct(p, ']', "']' after array size"); - return 1; - } - if (accept_punct(p, '(')) { - out->kind = DS_FUNC; - out->params = NULL; - out->nparams = 0; - out->variadic = 0; - parse_param_list(p, &out->params, &out->nparams, &out->variadic); - expect_punct(p, ')', "')' after parameter list"); - return 1; - } - return 0; -} - -/* Wrap `base` with a single suffix's transform. Used when materializing the - * declarator type from the collected suffix list. */ -static const Type* apply_decl_suffix(Parser* p, const Type* base, - const DeclSuffix* s) { - if (s->kind == DS_ARRAY) { - /* VLA: count is runtime; record an incomplete array type so the type - * system carries the elem-type but the size is treated as unknown. - * init_declarator notices the parser-side `vla_pending` flag and emits - * the alloca + bind. */ - return type_array(p->pool, base, s->count, s->incomplete || s->vla); - } - /* DS_FUNC */ - { - const Type** ptypes = NULL; - if (s->nparams) { - ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams); - for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type; - } - return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic); - } -} - -/* Parse a (possibly abstract) declarator. Supports: - * pointer-prefix? ( IDENT | '(' inner-declarator ')' ) suffix* - * where suffix is `[N]` or `(params)`. The inner declarator handles one level - * of nesting (e.g. `int (*fp)(int)`). Multiple nested parens would recurse - * naturally — for Phase 2 a single level covers all corpus cases. - * - * If `allow_abstract` is true, the IDENT may be absent (used by parameters). - * On success returns the declared type and writes *name_out (=0 if abstract). */ -static const Type* parse_declarator_full(Parser* p, const Type* base, - int allow_abstract, Sym* name_out, - SrcLoc* loc_out) { - return parse_declarator_full_ex(p, base, allow_abstract, name_out, loc_out, - NULL); -} - -static const Type* parse_declarator_full_ex(Parser* p, const Type* base, - int allow_abstract, Sym* name_out, - SrcLoc* loc_out, - Attr** attrs_out) { - /* Outer pointer prefix wraps `base` as we go. */ - base = parse_pointer_layer(p, base); - - /* Inner declarator: collect inner pointer prefix (innermost-first array) - * to wrap LATER (after we know the suffix-applied base). */ - Sym name = 0; - SrcLoc nloc = {0, 0, 0}; - u8 nptrs_inner = 0; - u16 inner_quals[8]; - int has_inner_parens = 0; - DeclSuffix inner_suffs[8]; - int n_inner_suffs = 0; - - if (is_punct(&p->cur, '(')) { - /* Disambiguate `(declarator)` vs. function suffix `(params)`. The token - * after `(` decides: - * `*` → inner-declarator pointer prefix - * IDENT (non-kw) → inner-declarator IDENT - * IDENT (type kw) → function suffix (parameters) - * `)` → function suffix `()` (unspecified args) - * Phase 2 doesn't have typedef-names; once they land, the IDENT branch - * also needs to dispatch on SEK_TYPEDEF. */ - Tok n = peek1(p); - int is_inner = 0; - if (is_punct(&n, '*')) { - is_inner = 1; - } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) { - /* Plain IDENT could be a declarator name OR a typedef-name (which - * makes the parens a function-parameter list). Disambiguate by - * peeking at the symbol table. */ - SymEntry* e = scope_lookup(p, n.v.ident); - if (!(e && e->kind == SEK_TYPEDEF)) is_inner = 1; - } - if (is_inner) { - has_inner_parens = 1; - advance(p); /* '(' */ - /* Inner pointer prefix: each `*` (with optional qualifiers) records one - * wrap layer. We store qualifiers per layer so we can apply them in - * reverse order below. */ - while (accept_punct(p, '*')) { - u16 q = 0; - if (nptrs_inner >= 8) perr(p, "too many pointer levels"); - for (;;) { - if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } - if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } - if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } - if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } - if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } - break; - } - inner_quals[nptrs_inner++] = q; - } - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - name = p->cur.v.ident; - nloc = tok_loc(&p->cur); - advance(p); - } else if (!allow_abstract) { - perr(p, "expected declarator name"); - } - /* Inner declarator may carry its own suffixes — `int (*ops[2])(int)` - * has `[2]` between IDENT and the closing `)`. Collect them so - * they wrap LAST (closest to IDENT), after the outer suffix and - * inner pointer layers. */ - if (starts_attr(p)) parse_and_discard_attributes(p); - while (n_inner_suffs < 8) { - if (!parse_decl_suffix(p, &inner_suffs[n_inner_suffs])) break; - ++n_inner_suffs; - if (starts_attr(p)) parse_and_discard_attributes(p); - } - expect_punct(p, ')', "')' after inner declarator"); - } - } - - if (!has_inner_parens) { - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - name = p->cur.v.ident; - nloc = tok_loc(&p->cur); - advance(p); - } else if (!allow_abstract) { - perr(p, "expected declarator name"); - } - } - - /* Optional attributes after the declarator-id (before any suffix). - * Honored when the caller supplies an `attrs_out` sink (e.g. struct - * members care about aligned / packed at this position); otherwise - * dropped to stay compatible with positions that ignore them. */ - if (starts_attr(p)) { - if (attrs_out) parse_attrs_into(p, attrs_out); - else parse_and_discard_attributes(p); - } - - /* Collect outer suffixes left-to-right; apply in reverse so the innermost - * suffix wraps `base` first. For `int a[5][3]` the resulting type is - * "array[5] of array[3] of int": [3] applied first → array[3], then [5] - * wraps that → array[5] of array[3]. */ - DeclSuffix suffs[8]; - int nsuffs = 0; - while (nsuffs < 8) { - if (!parse_decl_suffix(p, &suffs[nsuffs])) break; - ++nsuffs; - /* Attributes between/after suffixes — most commonly after a function - * declarator's `)`. Same sink rule as the post-id position. */ - if (starts_attr(p)) { - if (attrs_out) parse_attrs_into(p, attrs_out); - else parse_and_discard_attributes(p); - } - } - if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) { - perr(p, "too many declarator suffixes (raise the cap if needed)"); - } - for (int i = nsuffs - 1; i >= 0; --i) { - base = apply_decl_suffix(p, base, &suffs[i]); - } - - /* Apply inner pointer wraps. inner_quals[0] is the FIRST `*` consumed (the - * outermost in the chain `**fp` reads as "fp is ptr to ptr"); the LAST `*` - * is the one nearest the IDENT. Wrap from nearest-IDENT outward, so we - * iterate inner_quals in reverse. */ - for (int i = (int)nptrs_inner - 1; i >= 0; --i) { - base = type_ptr(p->pool, base); - if (inner_quals[i]) { - base = type_qualified(p->pool, base, inner_quals[i]); - } - } - - /* Apply inner declarator suffixes last — they sit closest to IDENT, so - * for `int (*ops[2])(int)` `[2]` wraps the (already-built) function- - * pointer type to give "array[2] of pointer to function(int) → int". */ - for (int i = n_inner_suffs - 1; i >= 0; --i) { - base = apply_decl_suffix(p, base, &inner_suffs[i]); - } - - if (name_out) *name_out = name; - if (loc_out) *loc_out = nloc; - return base; -} - -/* Non-abstract entry point used by ordinary declarations. */ -static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, - SrcLoc* loc_out) { - return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out); -} - -/* True if `ty` is char/signed char/unsigned char (the three element types - * permitted as the target of a string-literal initializer per §6.7.9 ¶14). */ -static int is_char_kind(const Type* ty) { - if (!ty) return 0; - return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR; -} - -/* Decode the string token at p->cur (must be TOK_STR) without advancing. - * Returns a heap-allocated byte buffer (caller frees) and writes the - * length (including the trailing NUL) to *nlen_out. Convenience wrapper - * around decode_string_literal, kept here so initializer code doesn't - * need to reach into the literal-parsing section. */ -static u8* peek_string_bytes(Parser* p, size_t* nlen_out) { - Tok t = p->cur; - if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string"); - return decode_string_literal(p, &t, nlen_out); -} - -/* Push the lvalue of a sub-object at byte offset `offset` within the array - * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. The - * value stack ends with an OPK_INDIRECT lvalue ready for cg_store. */ -static void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* elem_ty) { - cg_push_local_typed(p->cg, slot, arr_ty); - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, elem_ty)); - if (offset > 0) { - cg_push_int(p->cg, (i64)offset, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, elem_ty); -} - -/* Emit a load+store for one scalar leaf from the source pointer - * (`src_ptr_slot`, holding a pointer rvalue) to a sub-object of the - * destination slot. `src_ptr_ty` is the slot's declared type so we read - * it back at the right width before retagging to the leaf's pointer - * type. */ -static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty, - u32 dst_off, FrameSlot src_ptr_slot, - const Type* src_ptr_ty, u32 src_off, - const Type* leaf_ty) { - push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty); - cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty); - cg_load(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty)); - if (src_off > 0) { - cg_push_int(p->cg, (i64)src_off, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, leaf_ty); - cg_load(p->cg); - cg_store(p->cg); - cg_drop(p->cg); -} - -/* Walk a (possibly nested) aggregate type, emitting a leaf load+store - * for each scalar member. Used to lower `struct s = expr;` and - * `struct s = (struct S){...};` after the source's address has been - * spilled into `src_ptr_slot`. Bitfields and flexible array members are - * not supported here yet. */ -static void emit_walk_copy(Parser* p, FrameSlot dst_slot, - const Type* dst_arr_ty, u32 dst_off, - FrameSlot src_ptr_slot, const Type* src_ptr_ty, - u32 src_off, const Type* ty) { - if (ty->kind == TY_STRUCT) { - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - for (u16 i = 0; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - if (f->flags & FIELD_BITFIELD) continue; - u32 foff = L->fields[i].offset; - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff, - src_ptr_slot, src_ptr_ty, src_off + foff, f->type); - } - return; - } - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - for (u32 i = 0; i < ty->arr.count; ++i) { - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz, - src_ptr_slot, src_ptr_ty, src_off + i * esz, - ty->arr.elem); - } - return; - } - if (ty->kind == TY_UNION) { - /* Byte-wise copy preserves whichever member was active. */ - u32 sz = abi_sizeof(p->abi, ty); - const Type* uchar_ty = type_prim(p->pool, TY_UCHAR); - for (u32 i = 0; i < sz; ++i) { - emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i, - src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty); - } - return; - } - emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty, - src_off, ty); -} - -/* Source struct/union value is on top of the cg stack as an lvalue. - * Spill its address into a fresh pointer slot, then walk the type and - * copy each scalar leaf into the destination sub-object. */ -static void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, - const Type* dst_arr_ty, u32 dst_off, - const Type* ty) { - const Type* ptr_ty = type_ptr(p->pool, ty); +FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, + SrcLoc loc, u32 align_override) { FrameSlotDesc fsd; - FrameSlot src_ptr_slot; - cg_addr(p->cg); - memset(&fsd, 0, sizeof fsd); - fsd.type = ptr_ty; - fsd.size = abi_sizeof(p->abi, ptr_ty); - fsd.align = abi_alignof(p->abi, ptr_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - src_ptr_slot = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty); -} - -/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */ -static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty) { - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - for (u32 i = 0; i < ty->arr.count; ++i) { - zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem); - } - return; - } - if (ty->kind == TY_STRUCT) { - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - for (u16 i = 0; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type); - } - return; - } - if (ty->kind == TY_UNION) { - /* Zero the union's storage by zeroing the first non-bitfield field - * sized to the union's storage requirement. v1 just zeroes the first - * non-bitfield member; storage outside it stays whatever the OS - * gives a fresh stack slot. Tightening to a memset-equivalent is a - * Phase 6 concern. */ - if (ty->rec.nfields > 0) { - const Field* f = &ty->rec.fields[0]; - if (!(f->flags & FIELD_BITFIELD)) { - zero_init_at(p, slot, arr_ty, offset, f->type); - } - } - return; - } - push_subobject_lv(p, slot, arr_ty, offset, ty); - cg_push_int(p->cg, 0, ty); - cg_store(p->cg); - cg_drop(p->cg); -} - -/* Parse the initializer for the sub-object at `offset` of type `ty`. - * - * Aggregates (`{...}`) follow §6.7.9: - * - Designated initializers (`[i] = ...`, `.field = ...`, and chains - * such as `[i][j] = ...` or `.a.b = ...`) reset the cursor before - * each item; subsequent positional items continue from there. Gaps - * between the previous cursor and a forward designator are - * zero-filled. - * - Brace elision: a sub-aggregate without its own `{` consumes - * scalars from the parent's stream until its first scalar slot is - * filled. - * - String literals initialize char-arrays directly per §6.7.9 ¶14 - * (with or without surrounding braces). - * - * Scalars take a single assignment-expression, optionally wrapped in - * `{ x }` per §6.7.9 ¶11. */ -static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, - const Type* ty); - -/* Emit byte stores for a string literal initializing a char-array sub- - * object at `offset` whose declared element count is `count`. Bytes - * beyond the literal are zero-filled. Per §6.7.9 ¶14 it is well-formed - * to drop the terminating NUL when `count == strlen(s)`; longer arrays - * keep the NUL and zero-pad. */ -static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* elem_ty, u32 count) { - size_t n = 0; - u8* bytes = peek_string_bytes(p, &n); - size_t copy = n; - size_t i; - if (copy > count) copy = count; /* §6.7.9 ¶14 truncation */ - for (i = 0; i < copy; ++i) { - push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); - cg_push_int(p->cg, (i64)bytes[i], elem_ty); - cg_store(p->cg); - cg_drop(p->cg); - } - for (; i < count; ++i) { - push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); - cg_push_int(p->cg, 0, elem_ty); - cg_store(p->cg); - cg_drop(p->cg); - } - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); /* consume TOK_STR */ -} - -/* Parse a designator chain (`[const]` and `.ident` repeats) starting at - * the current token and ending at `=`. The chain navigates from the outer - * type `outer_ty` (offset_in `outer_offset`) down to a sub-object; - * returns the sub-object's type via *sub_ty_out and absolute byte offset - * via *sub_offset_out. Also writes the index of the FIRST designator - * (which selects the cursor position in the immediately-enclosing brace - * list): for an array that's the [i] index, for a struct that's the - * field index of the named member. */ -static void parse_designator_chain(Parser* p, const Type* outer_ty, - u32 outer_offset, const Type** sub_ty_out, - u32* sub_offset_out, u32* top_index_out) { - const Type* cur_ty = outer_ty; - u32 cur_off = outer_offset; - int first = 1; - for (;;) { - if (is_punct(&p->cur, '[')) { - i64 idx; - u32 esz; - SrcLoc cloc = tok_loc(&p->cur); - advance(p); - idx = eval_const_int(p, cloc); - expect_punct(p, ']', "']' after designator index"); - if (!cur_ty || cur_ty->kind != TY_ARRAY) { - perr(p, "array designator on non-array"); - } - if (idx < 0 || (u32)idx >= cur_ty->arr.count) { - perr(p, "array designator index out of range"); - } - esz = abi_sizeof(p->abi, cur_ty->arr.elem); - cur_off += (u32)idx * esz; - cur_ty = cur_ty->arr.elem; - if (first) *top_index_out = (u32)idx; - first = 0; - } else if (is_punct(&p->cur, '.')) { - Sym fname; - const Type* fty; - u32 foff; - const Field* ff; - u16 fi; - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected field name after '.'"); - } - fname = p->cur.v.ident; - advance(p); - if (!cur_ty || - (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) { - perr(p, "field designator on non-record type"); - } - if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) { - perr(p, "no such field in designator"); - } - cur_off += foff; - if (first) { - /* Find the field index for cursor advance in the parent loop. - * find_field returns the offset/type but not the index, so do a - * second linear scan here. Anonymous-member transparency: an - * IDENT inside a nested anonymous member belongs to the outer - * record's NTH visible position; we use the outer slot for - * cursor advance, scanning the outer record. */ - for (fi = 0; fi < cur_ty->rec.nfields; ++fi) { - const Field* g = &cur_ty->rec.fields[fi]; - if (g->name == fname && fname != 0) { - *top_index_out = fi; - break; - } - if ((g->flags & FIELD_ANON) && - (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) { - const Type* tmp_ty; - u32 tmp_off; - const Field* tmp_f; - if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off, - &tmp_f)) { - *top_index_out = fi; - break; - } - } - } - } - cur_ty = fty; - first = 0; - } else { - break; - } - } - if (first) perr(p, "internal: empty designator chain"); - expect_punct(p, '=', "'=' after designator"); - *sub_ty_out = cur_ty; - *sub_offset_out = cur_off; -} - -/* Parse a brace-elided sequence of scalars filling sub-objects of `ty` - * starting at `offset`. `count_out` is set to the number of scalars - * consumed; the function returns when the parent's initializer stream - * is exhausted (next token is `}` or `,`) or when `ty`'s scalar slots - * are full. */ -static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty); - -static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty, u32 start_field, - int braced) { - /* Iterate over the struct's fields, consuming initializers from the parent - * stream. With `braced=1`, we are inside this struct's own `{ ... }` and - * stop on `}`; with `braced=0`, we are eliding into the parent's stream - * and return as soon as the first scalar slot is filled (caller manages - * outer field index). Returns the number of fields consumed. - * - * In braced mode, designated initializers (`.field = ...`) reset `i`; - * gaps between the previous cursor and the designator are zero-filled. */ - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - u32 i = start_field; - u32 zero_lo = start_field; /* first not-yet-zero-filled field index */ - for (; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - u32 foff = offset + L->fields[i].offset; - if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break; - if (braced && is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - /* Zero-fill any fields the designator skipped over (or back-tracked - * past — duplicate inits are allowed but we just overwrite). */ - while (zero_lo < top_idx) { - const Field* zf = &ty->rec.fields[zero_lo]; - u32 zoff = offset + L->fields[zero_lo].offset; - zero_init_at(p, slot, arr_ty, zoff, zf->type); - ++zero_lo; - } - init_at(p, slot, arr_ty, sub_off, sub_ty); - i = top_idx; /* loop ++ advances past it */ - if (zero_lo <= top_idx) zero_lo = top_idx + 1; - goto next_item_struct; - } - init_at(p, slot, arr_ty, foff, f->type); - if (zero_lo <= i) zero_lo = i + 1; - if (!braced) { - /* Caller (parent's elision) only wanted us to consume one scalar's - * worth into our first non-bitfield slot. */ - ++i; - break; - } - next_item_struct: - if (!accept_punct(p, ',')) { - ++i; - break; - } - if (is_punct(&p->cur, '}')) { - ++i; - break; /* trailing comma */ - } - } - /* Zero-fill any unconsumed fields in braced mode. */ - if (braced) { - u32 j; - for (j = zero_lo; j < ty->rec.nfields; ++j) { - const Field* f = &ty->rec.fields[j]; - u32 foff = offset + L->fields[j].offset; - zero_init_at(p, slot, arr_ty, foff, f->type); - } - } - return i; -} - -static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty) { - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - init_at(p, slot, arr_ty, offset, ty->arr.elem); - (void)esz; - return 1; - } - if (ty->kind == TY_STRUCT) { - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); - return 1; - } - /* Scalar / pointer / union: consume one assignment-expr. */ - int had_brace = accept_punct(p, '{'); - push_subobject_lv(p, slot, arr_ty, offset, ty); - parse_assign_expr(p); - to_rvalue(p); - cg_store(p->cg); - cg_drop(p->cg); - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } - return 1; -} - -static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, - const Type* ty) { - if (ty->kind == TY_ARRAY) { - const Type* elem_ty = ty->arr.elem; - u32 esz = abi_sizeof(p->abi, elem_ty); - /* String literal initializing a char-array (with or without braces) per - * §6.7.9 ¶14. Wide character types are deferred (Phase 7). */ - if (is_char_kind(elem_ty)) { - if (p->cur.kind == TOK_STR) { - init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); - return; - } - if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { - advance(p); - init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); - accept_punct(p, ','); - expect_punct(p, '}', "'}' after string initializer"); - return; - } - } - if (!is_punct(&p->cur, '{')) { - /* Brace elision: the array consumes scalars from the parent stream. - * A bare assignment-expression on entry only fills one scalar slot - * worth, then returns. */ - init_elided(p, slot, arr_ty, offset, elem_ty); - return; - } - advance(p); /* '{' */ - { - u32 i = 0; - u32 zero_lo = 0; /* first index not yet zero-filled (after explicit init) */ - if (!is_punct(&p->cur, '}')) { - for (;;) { - if (is_punct(&p->cur, '[')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, - &top_idx); - while (zero_lo < top_idx) { - zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty); - ++zero_lo; - } - init_at(p, slot, arr_ty, sub_off, sub_ty); - i = top_idx + 1; - if (zero_lo < i) zero_lo = i; - } else { - if (i >= ty->arr.count) { - perr(p, "too many initializers for array"); - } - init_at(p, slot, arr_ty, offset + i * esz, elem_ty); - ++i; - if (zero_lo < i) zero_lo = i; - } - if (!accept_punct(p, ',')) break; - if (is_punct(&p->cur, '}')) break; - } - } - expect_punct(p, '}', "'}' after array initializer"); - { - u32 j; - for (j = zero_lo; j < ty->arr.count; ++j) { - zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty); - } - } - } - return; - } - if (ty->kind == TY_STRUCT) { - if (!is_punct(&p->cur, '{')) { - /* Brace elision into the parent's stream: take scalars for our first - * non-bitfield field, then return so the parent advances to its next - * sibling. */ - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); - return; - } - advance(p); /* '{' */ - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1); - expect_punct(p, '}', "'}' after struct initializer"); - return; - } - if (ty->kind == TY_UNION) { - /* `union U u = {.field = expr}` per §6.7.9 ¶7 names a specific - * member; without a designator the first non-bitfield member is - * initialized. Only one member can be active, so we honor the - * (optional) leading designator and ignore the rest. */ - int had_brace = accept_punct(p, '{'); - if (ty->rec.nfields == 0) { - if (had_brace) expect_punct(p, '}', "'}'"); - return; - } - if (had_brace && is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - init_at(p, slot, arr_ty, sub_off, sub_ty); - } else { - const Field* f = &ty->rec.fields[0]; - if (!(f->flags & FIELD_BITFIELD)) { - init_at(p, slot, arr_ty, offset, f->type); - } - } - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after union initializer"); - } - return; - } - /* Scalar (incl. pointer). */ - int had_brace = accept_punct(p, '{'); - push_subobject_lv(p, slot, arr_ty, offset, ty); - parse_assign_expr(p); - to_rvalue(p); - coerce_top_to_lvalue(p); - cg_store(p->cg); - cg_drop(p->cg); - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } -} - -/* ============================================================ - * Static-storage initializers (file-scope objects + static locals) - * ============================================================ - * - * A static-storage object's initializer is a constant expression that the - * compiler must materialize as bytes in the object file. We allocate a - * working buffer of `abi_sizeof(ty)` bytes (zero-filled), recursively walk - * the (possibly braced) initializer, and write each scalar's encoding at its - * computed offset. The buffer is then handed to decl_define_object as a - * single INIT_BYTES item — obj_reserve hands back uninitialized chunk - * storage, so we always patch the entire range. - * - * v1 scope: integer scalars only (eval_const_int). Pointer relocations are - * deferred — none of the Phase 4 corpus rows need them. Aggregates are - * positional brace lists with brace-elision elsewhere; designators arrive - * with Phase 6. */ - -static void encode_int_le(u8* dst, u32 size, i64 v) { - for (u32 i = 0; i < size; ++i) { - dst[i] = (u8)((v >> (8u * i)) & 0xffu); - } -} - -/* Encode a string literal at *buf+offset for a char-array sub-object of - * declared element count `count`. Bytes beyond the literal stay zero - * (buf is pre-zeroed by define_static_object). Truncation rules match - * §6.7.9 ¶14. */ -static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset, - u32 count) { - size_t n = 0; - u8* bytes = peek_string_bytes(p, &n); - size_t copy = n; - if (copy > count) copy = count; - if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object"); - memcpy(buf + offset, bytes, copy); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); -} - -/* Append one pending relocation to the parser-side list, growing on - * demand. Flushed by `define_static_object` after the section is pinned. */ -static void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, - i64 addend) { - if (p->static_relocs_len == p->static_relocs_cap) { - u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u; - void* nb = arena_array(p->c->tu, char, - nc * sizeof(*p->static_relocs)); - if (!nb) perr(p, "out of memory recording static relocs"); - if (p->static_relocs && p->static_relocs_len) { - memcpy(nb, p->static_relocs, - p->static_relocs_len * sizeof(*p->static_relocs)); - } - p->static_relocs = nb; - p->static_relocs_cap = nc; - } - p->static_relocs[p->static_relocs_len].offset = offset; - p->static_relocs[p->static_relocs_len].size = size; - p->static_relocs[p->static_relocs_len].target = target; - p->static_relocs[p->static_relocs_len].addend = addend; - ++p->static_relocs_len; -} - -/* Try to parse the current expression as an address constant of pointer - * type `ty`, recording it as a pending relocation. Forms supported: - * `&IDENT` — absolute reloc at offset, addend 0 - * `&IDENT [const]` — addend = const * sizeof(elem) - * `IDENT` — IDENT is an array; same as `&IDENT[0]` - * `IDENT [+|-] const` — pointer arithmetic; addend scaled by - * sizeof(*IDENT) - * Returns 1 on success (caller should not call eval_const_int), 0 if - * the current tokens don't look like an address constant (caller falls - * back to integer-constant evaluation, which handles `(T*)0`). */ -static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf, - u32 offset, u32 sz) { - Tok t = p->cur; - Sym name = 0; - SrcLoc nloc = tok_loc(&p->cur); - int saw_amp = 0; - i64 element_addend = 0; - i64 byte_addend = 0; + FrameSlot s; SymEntry* e; - const Type* tgt_ty; - ObjSymId tgt; - if (t.kind == TOK_STR) { - /* String literal as address constant (§6.6 ¶7). Mint a .rodata symbol - * and emit a reloc at the pointer slot. */ - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - ObjSymId str_sym = emit_string_to_rodata(p, bytes, n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); - (void)ty; - (void)buf; - srl_push(p, offset, sz, str_sym, 0); - return 1; - } - if (is_punct(&t, '&')) { - saw_amp = 1; - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - /* Not a recognized address-of form. Bail; caller will eval as int. */ - perr(p, "expected identifier after '&' in static initializer"); - } - name = p->cur.v.ident; - nloc = tok_loc(&p->cur); - advance(p); - } else if (t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) { - name = t.v.ident; - advance(p); - } else { - return 0; - } - e = scope_lookup(p, name); - if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) { - /* Address constants must reference an object with static storage - * duration / external-or-internal linkage. Functions also qualify - * (§6.7.9 ¶4 — addresses of objects of static storage duration; a - * function designator decays to such an address). */ - perr(p, "static initializer is not a constant address expression"); - } - tgt = e->v.sym; - tgt_ty = e->type; - /* Optional `[const]` after `&IDENT`. */ - if (saw_amp && is_punct(&p->cur, '[')) { - SrcLoc cloc; - advance(p); - cloc = tok_loc(&p->cur); - element_addend = eval_const_int(p, cloc); - expect_punct(p, ']', "']' after array-subscript constant"); - if (tgt_ty && tgt_ty->kind == TY_ARRAY) { - byte_addend += - element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); - } else { - byte_addend += element_addend; - } - } - /* Optional `+`/`-` const for pointer arithmetic. Without `&`, the - * IDENT must be an array (which decays to a pointer) for arithmetic - * to make sense. */ - while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) { - int neg = is_punct(&p->cur, '-'); - SrcLoc cloc; - i64 v; - advance(p); - cloc = tok_loc(&p->cur); - v = eval_const_int(p, cloc); - if (neg) v = -v; - /* Scale by element size if the base is array/pointer. */ - if (tgt_ty && tgt_ty->kind == TY_ARRAY) { - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); - } else if (tgt_ty && tgt_ty->kind == TY_PTR) { - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee); - } else if (saw_amp) { - /* `&scalar + const` measured in bytes-of(scalar). */ - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty); - } else { - byte_addend += v; - } - } - (void)nloc; - (void)ty; - (void)buf; - /* The reloc width (R_ABS32 vs R_ABS64) follows the destination - * pointer width. */ - srl_push(p, offset, sz, tgt, byte_addend); - return 1; -} - -static void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, - const Type* ty) { - if (ty->kind == TY_ARRAY) { - const Type* elem = ty->arr.elem; - u32 esz = abi_sizeof(p->abi, elem); - u32 i = 0; - int had_brace; - /* String literal initializer for char-arrays (with or without braces). */ - if (is_char_kind(elem)) { - if (p->cur.kind == TOK_STR) { - parse_static_string_at(p, buf, buflen, offset, ty->arr.count); - return; - } - if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { - advance(p); - parse_static_string_at(p, buf, buflen, offset, ty->arr.count); - accept_punct(p, ','); - expect_punct(p, '}', "'}' after string initializer"); - return; - } - } - had_brace = accept_punct(p, '{'); - if (!had_brace) { - perr(p, "expected '{' for static-storage array initializer"); - } - if (!is_punct(&p->cur, '}')) { - for (;;) { - if (is_punct(&p->cur, '[')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - parse_static_init_at(p, buf, buflen, sub_off, sub_ty); - i = top_idx + 1; - } else { - if (i >= ty->arr.count) { - perr(p, "too many initializers for array"); - } - parse_static_init_at(p, buf, buflen, offset + i * esz, elem); - ++i; - } - if (!accept_punct(p, ',')) break; - if (is_punct(&p->cur, '}')) break; - } - } - expect_punct(p, '}', "'}' after array initializer"); - return; - } - if (ty->kind == TY_STRUCT) { - int had_brace = accept_punct(p, '{'); - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - u32 i = 0; - if (!had_brace) { - perr(p, "expected '{' for static-storage struct initializer"); - } - while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { - const Field* f = &ty->rec.fields[i]; - if (is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - parse_static_init_at(p, buf, buflen, sub_off, sub_ty); - i = top_idx + 1; - if (!accept_punct(p, ',')) break; - continue; - } - parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset, - f->type); - ++i; - if (!accept_punct(p, ',')) break; - } - expect_punct(p, '}', "'}' after struct initializer"); - return; - } - if (ty->kind == TY_UNION) { - perr(p, "static-storage union initializer not supported in Phase 4"); - } - /* Scalar / pointer: integer constant or address-constant per §6.6 ¶9 - * (pointer-typed only). Address constants are recorded as pending - * relocations and resolved when the section gets pinned. */ - { - int had_brace = accept_punct(p, '{'); - SrcLoc cloc = tok_loc(&p->cur); - u32 sz = abi_sizeof(p->abi, ty); - if (offset + sz > buflen) perr(p, "initializer overflows object"); - if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) { - /* Address constant recorded as a reloc. Buffer bytes stay zero; - * the reloc carries the addend. */ - } else { - i64 v = eval_const_int(p, cloc); - encode_int_le(buf + offset, sz, v); - } - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } - } + u32 nat = abi_alignof(p->abi, type); + memset(&fsd, 0, sizeof fsd); + fsd.type = type; + fsd.name = name; + fsd.loc = loc; + fsd.size = abi_sizeof(p->abi, type); + fsd.align = (align_override > nat) ? align_override : nat; + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + s = cg_local(p->cg, &fsd); + e = scope_define(p, name, SEK_LOCAL, type); + e->v.slot = s; + return s; } -/* Choose the section a defining object decl with `quals` and `storage` - * should land in: const → .rodata, otherwise let decl_define_object pick - * .data/.bss based on whether the init is all zero. Returns OBJ_SEC_NONE - * when the default is appropriate. */ -static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) { - if ((quals & Q_CONST) != 0 && has_nonzero) { - Sym secname = pool_intern_cstr(p->pool, ".rodata"); - return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u); - } - return OBJ_SEC_NONE; +FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { + return make_local_aligned(p, name, type, loc, 0); } -/* Define a static-storage object: allocate the byte buffer, parse the - * (optional) initializer into it, route to .rodata / .data / .bss, and call - * obj_symbol_define. Used for both file-scope objects and static locals. - * `align_override` is the strictest `_Alignas` argument the declarator - * collected, or 0 for the natural type alignment. */ -static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, - u16 quals, int has_init, SrcLoc loc, - u32 align_override) { - ObjBuilder* ob = decl_obj(p->decls); - u32 size = abi_sizeof(p->abi, var_ty); - u32 align = abi_alignof(p->abi, var_ty); - if (align_override > align) align = align_override; - u8* buf = NULL; - int has_nonzero = 0; - ObjSecId override_sec; - /* TLS objects route to .tdata / .tbss with SF_TLS; decl_declare marked - * the symbol SK_TLS when the source carried `_Thread_local`. The - * .rodata override path is skipped — TLS storage is per-thread mutable - * even when declared `const`. */ - const ObjSym* os = obj_symbol_get(ob, sym); - int is_tls = (os && os->kind == SK_TLS); - - if (has_init) { - buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u); - memset(buf, 0, size); - /* Reset the pending-reloc list before parsing this initializer; the - * caller flushes the collected entries to obj_reloc once the section - * has been pinned. */ - p->static_relocs_len = 0; - parse_static_init_at(p, buf, size, 0, var_ty); - for (u32 i = 0; i < size; ++i) { - if (buf[i]) { has_nonzero = 1; break; } - } - /* Pointer-typed initializers count as nonzero — the bytes are - * patched by the loader at runtime, so we must place the object in - * .data (or .rodata when const) rather than .bss. */ - if (p->static_relocs_len) has_nonzero = 1; - } - - if (is_tls) { - /* TLS path: .tbss for zero-init, .tdata otherwise. The section flags - * mirror what clang emits for `_Thread_local` globals so the linker's - * existing PT_TLS / TLV layout code applies unchanged. */ - Sym sname; - ObjSecId sec; - u32 a = align ? align : 1u; - u32 base; - if (!has_init || !has_nonzero) { - sname = obj_secname_tbss(p->c); - sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, - SF_ALLOC | SF_WRITE | SF_TLS, a, 0, OBJ_SEC_NONE, 0); - base = obj_align_to(ob, sec, a); - obj_reserve_bss(ob, sec, base + size, a); - obj_symbol_define(ob, sym, sec, base, size); - return; - } - sname = obj_secname_tdata(p->c); - sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, a); - base = obj_align_to(ob, sec, a); - { - u8* dst = obj_reserve(ob, sec, size); - if (dst) memcpy(dst, buf, size); - } - obj_symbol_define(ob, sym, sec, base, size); - for (u32 i = 0; i < p->static_relocs_len; ++i) { - RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk, - p->static_relocs[i].target, p->static_relocs[i].addend); - } - p->static_relocs_len = 0; - (void)loc; - return; - } - - override_sec = pick_object_section(p, quals, has_nonzero); - if (override_sec != OBJ_SEC_NONE) { - /* .rodata path: emit bytes directly here so we can pin the section. - * obj_section dedupes by name, so multiple const inits share one - * .rodata — align each placement to the object's own requirement. */ - u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u); - { - u8* dst = obj_reserve(ob, override_sec, size); - if (dst && buf) memcpy(dst, buf, size); - } - obj_symbol_define(ob, sym, override_sec, base, size); - /* Flush pending pointer-init relocations against this section. */ - for (u32 i = 0; i < p->static_relocs_len; ++i) { - RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - obj_reloc(ob, override_sec, base + p->static_relocs[i].offset, rk, - p->static_relocs[i].target, p->static_relocs[i].addend); - } - p->static_relocs_len = 0; - (void)loc; - return; - } - - if (!has_init || !has_nonzero) { - /* BSS path. SSEM_NOBITS keeps the bytes off-disk; the loader zeros - * the section image. The symbol's value is the offset within the - * accumulated bss_size — bumped after we record this object. */ - Sym sname = pool_intern_cstr(p->pool, ".bss"); - ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, - SF_ALLOC | SF_WRITE, - align ? align : 1u, 0, OBJ_SEC_NONE, 0); - u32 a = align ? align : 1u; - u32 base = obj_align_to(ob, sec, a); - obj_reserve_bss(ob, sec, base + size, a); - obj_symbol_define(ob, sym, sec, base, size); - return; - } - /* .data path. */ - { - Sym sname = pool_intern_cstr(p->pool, ".data"); - ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE, - align ? align : 1u); - u32 base = obj_align_to(ob, sec, align ? align : 1u); - u8* dst = obj_reserve(ob, sec, size); - if (dst) memcpy(dst, buf, size); - obj_symbol_define(ob, sym, sec, base, size); - /* Flush pointer-init relocations against the .data section. */ - for (u32 i = 0; i < p->static_relocs_len; ++i) { - RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk, - p->static_relocs[i].target, p->static_relocs[i].addend); - } - p->static_relocs_len = 0; - } -} +/* ============================================================ + * Static-local symbol naming + * ============================================================ */ -/* Mint a unique linker name for a static local: `<orig>.<counter>`. The - * static_local_counter never resets across the TU, so two static locals in - * different functions never collide even if they share the source name. */ -static Sym mint_static_local_sym(Parser* p, Sym orig) { +/* Mint a unique linker name for a static local: `<orig>.<counter>`. */ +Sym mint_static_local_sym(Parser* p, Sym orig) { size_t olen = 0; const char* on = pool_str(p->pool, orig, &olen); char buf[128]; @@ -5201,70 +382,22 @@ static Sym mint_static_local_sym(Parser* p, Sym orig) { return pool_intern(p->pool, buf, wlen); } -/* If `ty` is an incomplete array (`T[]`), peek the initializer at p->cur - * and complete the type by counting the items it provides. Three cases: - * - `T` is a char-kind and the initializer is a string literal: count = - * decoded length (including NUL). - * - `{...}` initializer: record the braced range and count top-level - * items; positional only, no designators (sufficient for the corpus). - * After completion the parser is rewound to the recorded `{`. - * - Otherwise: panic (incomplete array with non-list init). - * Returns the completed array type. The caller should use this as the - * declared variable type going forward. */ -static const Type* complete_incomplete_array(Parser* p, const Type* ty) { - const Type* elem; - if (!ty || ty->kind != TY_ARRAY || !ty->arr.incomplete) return ty; - elem = ty->arr.elem; - if (is_char_kind(elem) && p->cur.kind == TOK_STR) { - Tok t = p->cur; - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - return type_array(p->pool, elem, (u32)n, /*incomplete=*/0); - } - if (is_punct(&p->cur, '{')) { - u32 cnt; - record_braced_block(p); - cnt = count_recorded_top_level_items(p->replay, p->replay_len); - /* String literal as the sole brace contents is also valid: `char s[] = - * {"hi"}`. Detect by replay[1] being TOK_STR; recompute count from the - * decoded length. */ - if (cnt == 1 && p->replay_len >= 3 && p->replay[1].kind == TOK_STR && - is_char_kind(elem)) { - Tok t = p->replay[1]; - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - cnt = (u32)n; - } - replay_rewind(p); - return type_array(p->pool, elem, cnt, /*incomplete=*/0); - } - perr(p, "initializer cannot complete incomplete array type"); -} +/* ============================================================ + * Declarations + * ============================================================ */ -/* Parse a single init-declarator after the decl-specs have been consumed. - * Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix* - * init = `=` (assign_expr | brace_init) */ +/* Parse a single init-declarator after the decl-specs have been consumed. */ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { SrcLoc loc; Sym name; const Type* var_ty = parse_declarator(p, specs->type, &name, &loc); - /* Typedef declaration: bind the name as SEK_TYPEDEF in the current - * scope so subsequent decl-spec sites can recognize it as a type - * specifier. No storage is allocated and an initializer is not - * permitted. */ if (specs->storage == DS_TYPEDEF) { if (is_punct(&p->cur, '=')) { perr(p, "typedef declarator cannot have initializer"); } { SymEntry* e = scope_define(p, name, SEK_TYPEDEF, var_ty); - /* `typedef T B[n]` (or via a VLA-typedef chain): snapshot the - * VLA byte size at typedef site so subsequent `B v` declarations - * each alloca the same captured runtime size, even if the names - * referenced in the size expression have since changed. */ if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) { FrameSlot count_slot = p->vla_pending_count_slot; const Type* elem_ty = var_ty->arr.elem; @@ -5292,7 +425,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { p->vla_pending = 0; p->vla_pending_count_slot = FRAME_SLOT_NONE; } else if (specs->vla_byte_slot != FRAME_SLOT_NONE) { - /* Typedef of a typedef'd VLA: chain the captured slot. */ e->vla_byte_slot = specs->vla_byte_slot; } } @@ -5300,10 +432,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { return; } - /* Static-storage locals are promoted to a globally-visible symbol with - * internal linkage; the local scope binds to that symbol so subsequent - * uses load through cg_push_global. The variable's storage persists - * across calls (§6.2.4 ¶3). Initializer must be a constant expression. */ if (specs->storage == DS_STATIC) { Decl decl_in; DeclId did; @@ -5326,9 +454,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { e = scope_define(p, name, SEK_GLOBAL, var_ty); e->v.sym = sym; has_init = accept_punct(p, '='); - /* `static T name[] = {...}` at block scope: peek the initializer to - * deduce the element count before emitting, the same as the file-scope - * path above (search for the matching call in parse_global_decl). */ if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { const Type* completed = complete_incomplete_array(p, var_ty); if (completed != var_ty) { @@ -5342,12 +467,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { return; } - /* `extern` block-scope declaration: declares the name but does not define - * storage. The matching defining declaration must appear elsewhere (file - * scope here, or another TU). Per §6.2.2 ¶4, if a prior file-scope - * declaration of this identifier is visible, the linkage of this extern - * decl matches it (so `static int g; ... { extern int g; }` resolves to - * the same internal-linkage symbol). */ if (specs->storage == DS_EXTERN) { Decl decl_in; DeclId did; @@ -5379,13 +498,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { return; } - /* VLA: the declarator type is `T[]` (incomplete array) with either a - * pending runtime count from an inline `T name[n]` suffix, or — when - * `name` was declared via a VLA typedef — a byte-size slot already - * captured at the typedef site (carried in specs->vla_byte_slot). The - * variable binds as `T*` (alloca's return) so subscript/pointer arith - * just work; the SymEntry's vla_byte_slot lets `sizeof(name)` emit a - * runtime load instead of the constant pointer width. */ if (var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete && (p->vla_pending || specs->vla_byte_slot != FRAME_SLOT_NONE)) { const Type* elem_ty = var_ty->arr.elem; @@ -5394,8 +506,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { FrameSlot ptr_slot; SymEntry* sym_entry; if (p->vla_pending) { - /* Inline VLA: derive byte size = count * sizeof(elem) and stash - * it in a fresh i64 slot. */ FrameSlot count_slot = p->vla_pending_count_slot; u32 esz = abi_sizeof(p->abi, elem_ty); FrameSlotDesc bsd; @@ -5419,7 +529,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { cg_store(p->cg); cg_drop(p->cg); } else { - /* Typedef'd VLA: byte-size already captured at typedef site. */ byte_slot = specs->vla_byte_slot; } ptr_slot = make_local(p, name, ptr_ty, loc); @@ -5445,9 +554,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { int has_init = is_punct(&p->cur, '='); FrameSlot s; if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { - /* `T name[] = ...`: peek the initializer to deduce the count, then - * allocate the slot with the now-complete type. The slot allocation - * has to wait until after sizing, so move it inside this branch. */ advance(p); /* '=' */ var_ty = complete_incomplete_array(p, var_ty); s = make_local_aligned(p, name, var_ty, loc, specs->align); @@ -5460,13 +566,10 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { cg_set_loc(p->cg, loc); if ((var_ty->kind == TY_STRUCT || var_ty->kind == TY_UNION) && !is_punct(&p->cur, '{')) { - /* §6.7.9 ¶13: an aggregate initializer that is not a brace list - * must be a single expression of compatible type — copy it. */ parse_assign_expr(p); emit_struct_copy_into_slot(p, s, var_ty, 0, var_ty); } else if (var_ty->kind == TY_ARRAY || var_ty->kind == TY_STRUCT || var_ty->kind == TY_UNION) { - /* Brace initializer (or string literal — Phase 6). */ init_at(p, s, var_ty, 0, var_ty); } else { cg_push_local_typed(p->cg, s, var_ty); @@ -5474,19 +577,13 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { to_rvalue(p); coerce_top_to_lvalue(p); cg_store(p->cg); - /* cg_store leaves the assigned value on the stack (C semantics); - * an init-declarator is statement-context, so drop it. */ cg_drop(p->cg); } } } } -static void parse_local_decl(Parser* p, const DeclSpecs* specs) { - /* `struct S { ... };`, `struct S;`, `enum E { ... };` introduce only a - * tag and have no declarator — accept the bare `;` here. Without a - * declarator there is also no `(...)` (not a function), so falling - * through to parse_init_declarator would panic. */ +void parse_local_decl(Parser* p, const DeclSpecs* specs) { if (accept_punct(p, ';')) return; parse_init_declarator(p, specs); while (accept_punct(p, ',')) { @@ -5496,791 +593,11 @@ static void parse_local_decl(Parser* p, const DeclSpecs* specs) { } /* ============================================================ - * Statements - * ============================================================ */ - -static void parse_if_stmt(Parser* p) { - CGLabel L_else = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - expect_punct(p, '(', "'('"); - parse_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')'"); - cg_branch_false(p->cg, L_else); - parse_stmt(p); - if (accept_kw(p, KW_ELSE)) { - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_else); - parse_stmt(p); - cg_label_place(p->cg, L_end); - } else { - cg_label_place(p->cg, L_else); - } -} - -static void parse_while_stmt(Parser* p) { - CGLabel L_top = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - CGLabel saved_break = p->cur_break; - CGLabel saved_continue = p->cur_continue; - expect_punct(p, '(', "'('"); - cg_label_place(p->cg, L_top); - parse_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')'"); - cg_branch_false(p->cg, L_end); - p->cur_break = L_end; - p->cur_continue = L_top; - parse_stmt(p); - p->cur_break = saved_break; - p->cur_continue = saved_continue; - cg_jump(p->cg, L_top); - cg_label_place(p->cg, L_end); -} - -static void parse_for_stmt(Parser* p) { - CGLabel L_top = cg_label_new(p->cg); - CGLabel L_step = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - CGLabel saved_break = p->cur_break; - CGLabel saved_continue = p->cur_continue; - - /* `for` introduces a block scope spanning init/cond/step/body so that - * an init declaration `int i=...` is visible only inside the loop. */ - scope_push(p); - expect_punct(p, '(', "'('"); - - /* init: declaration | expr | ; */ - if (!accept_punct(p, ';')) { - DeclSpecs specs; - if (parse_decl_specs(p, &specs)) { - parse_local_decl(p, &specs); /* consumes the trailing ';' */ - } else { - parse_expr(p); - cg_drop(p->cg); - expect_punct(p, ';', "';'"); - } - } - - cg_label_place(p->cg, L_top); - /* cond: optional. Empty cond means "always true". */ - if (!is_punct(&p->cur, ';')) { - parse_expr(p); - to_rvalue(p); - cg_branch_false(p->cg, L_end); - } - expect_punct(p, ';', "';'"); - - /* Need to remember the step expression — but since this is single-pass, - * we can't replay tokens. Standard trick: emit body before step by - * jumping over the step on first entry, then placing the step where - * the back-edge arrives. */ - { - CGLabel L_body = cg_label_new(p->cg); - cg_jump(p->cg, L_body); - cg_label_place(p->cg, L_step); - /* step: optional. */ - if (!is_punct(&p->cur, ')')) { - parse_expr(p); - cg_drop(p->cg); - } - cg_jump(p->cg, L_top); - expect_punct(p, ')', "')'"); - cg_label_place(p->cg, L_body); - - p->cur_break = L_end; - p->cur_continue = L_step; - parse_stmt(p); - p->cur_break = saved_break; - p->cur_continue = saved_continue; - - cg_jump(p->cg, L_step); - cg_label_place(p->cg, L_end); - } - scope_pop(p); -} - -static void parse_return_stmt(Parser* p) { - if (accept_punct(p, ';')) { - cg_ret(p->cg, 0); - return; - } - parse_expr(p); - to_rvalue(p); - expect_punct(p, ';', "';' after return value"); - cg_ret(p->cg, 1); -} - -static void parse_break_stmt(Parser* p) { - if (p->cur_break == 0) perr(p, "'break' outside of loop or switch"); - cg_jump(p->cg, p->cur_break); - expect_punct(p, ';', "';' after break"); -} - -static void parse_continue_stmt(Parser* p) { - if (p->cur_continue == 0) perr(p, "'continue' outside of loop"); - cg_jump(p->cg, p->cur_continue); - expect_punct(p, ';', "';' after continue"); -} - -static void parse_do_stmt(Parser* p) { - CGLabel L_top = cg_label_new(p->cg); - CGLabel L_cond = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - CGLabel saved_break = p->cur_break; - CGLabel saved_continue = p->cur_continue; - cg_label_place(p->cg, L_top); - p->cur_break = L_end; - p->cur_continue = L_cond; - parse_stmt(p); - p->cur_break = saved_break; - p->cur_continue = saved_continue; - cg_label_place(p->cg, L_cond); - if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body"); - advance(p); /* while */ - expect_punct(p, '(', "'('"); - parse_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after do-while condition"); - expect_punct(p, ';', "';' after do-while"); - cg_branch_true(p->cg, L_top); - cg_label_place(p->cg, L_end); -} - -static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) { - GotoLabel* gl; - for (gl = p->goto_labels; gl; gl = gl->next) { - if (gl->name == name) return gl; - } - gl = arena_new(p->c->tu, GotoLabel); - if (!gl) perr(p, "out of memory in label_get_or_create"); - memset(gl, 0, sizeof *gl); - gl->name = name; - gl->label = cg_label_new(p->cg); - gl->placed = 0; - gl->first_use = loc; - gl->next = p->goto_labels; - p->goto_labels = gl; - return gl; -} - -static void parse_goto_stmt(Parser* p) { - Sym name; - SrcLoc loc; - GotoLabel* gl; - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected label name after 'goto'"); - } - name = p->cur.v.ident; - loc = tok_loc(&p->cur); - advance(p); - expect_punct(p, ';', "';' after goto"); - gl = label_get_or_create(p, name, loc); - cg_jump(p->cg, gl->label); -} - -/* `IDENT ':' STMT` — labeled statement. The IDENT lookup happens in the label - * namespace, not the ordinary identifier scope. Caller has already verified - * that cur is a non-keyword IDENT and the next token is ':'. */ -static void parse_label_stmt(Parser* p) { - Sym name = p->cur.v.ident; - SrcLoc loc = tok_loc(&p->cur); - GotoLabel* gl; - advance(p); /* IDENT */ - advance(p); /* ':' */ - gl = label_get_or_create(p, name, loc); - if (gl->placed) perr(p, "duplicate label"); - gl->placed = 1; - cg_label_place(p->cg, gl->label); - parse_stmt(p); -} - -static void parse_case_stmt(Parser* p) { - i64 v; - CGLabel L; - CaseEntry* ce; - SrcLoc loc = tok_loc(&p->cur); - if (!p->cur_switch) perr(p, "'case' label not in switch statement"); - v = eval_const_int(p, loc); - expect_punct(p, ':', "':' after case constant"); - L = cg_label_new(p->cg); - cg_label_place(p->cg, L); - ce = arena_new(p->c->tu, CaseEntry); - if (!ce) perr(p, "out of memory in parse_case_stmt"); - ce->value = v; - ce->label = L; - ce->next = p->cur_switch->cases; - p->cur_switch->cases = ce; - parse_stmt(p); -} - -static void parse_default_stmt(Parser* p) { - CGLabel L; - if (!p->cur_switch) perr(p, "'default' label not in switch statement"); - expect_punct(p, ':', "':' after default"); - if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label"); - L = cg_label_new(p->cg); - cg_label_place(p->cg, L); - p->cur_switch->default_label = L; - parse_stmt(p); -} - -static void parse_switch_stmt(Parser* p) { - /* Single-pass lowering: evaluate the controlling expression once into a - * temp, jump over the body to the dispatch chain, parse the body (which - * places case/default labels and records (value, label) pairs in - * cur_switch), then emit a compare-and-branch chain that selects the - * matching label. Falls through to L_end if no case/default matches. */ - CGLabel L_dispatch = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - CGLabel saved_break = p->cur_break; - SwitchCtx ctx; - SwitchCtx* saved_switch = p->cur_switch; - FrameSlotDesc fsd; - const Type* vty; - CaseEntry* it; - CaseEntry* prev; - CaseEntry* head; - - expect_punct(p, '(', "'('"); - parse_expr(p); - to_rvalue(p); - vty = cg_top_type(p->cg); - if (!vty) vty = ty_int(p); - expect_punct(p, ')', "')' after switch expression"); - - memset(&ctx, 0, sizeof ctx); - memset(&fsd, 0, sizeof fsd); - fsd.type = vty; - fsd.size = abi_sizeof(p->abi, vty); - fsd.align = abi_alignof(p->abi, vty); - fsd.kind = FS_LOCAL; - ctx.value_slot = cg_local(p->cg, &fsd); - ctx.value_type = vty; - ctx.parent = saved_switch; - - /* Stash the value: stack has [rv]; want [lv, rv] then store. */ - cg_push_local_typed(p->cg, ctx.value_slot, vty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - - cg_jump(p->cg, L_dispatch); - - p->cur_switch = &ctx; - p->cur_break = L_end; - parse_stmt(p); - p->cur_break = saved_break; - p->cur_switch = saved_switch; - - /* Body fall-through exits the switch. */ - cg_jump(p->cg, L_end); - - /* Emit dispatch in source order — reverse the LIFO chain. */ - cg_label_place(p->cg, L_dispatch); - prev = NULL; - head = ctx.cases; - while (head) { - CaseEntry* nxt = head->next; - head->next = prev; - prev = head; - head = nxt; - } - for (it = prev; it; it = it->next) { - cg_push_local_typed(p->cg, ctx.value_slot, vty); - cg_load(p->cg); - cg_push_int(p->cg, it->value, vty); - cg_cmp(p->cg, CMP_EQ); - cg_branch_true(p->cg, it->label); - } - if (ctx.default_label) { - cg_jump(p->cg, ctx.default_label); - } - cg_label_place(p->cg, L_end); -} - -/* `_Static_assert ( constant-expression , string-literal ) ;` (§6.7.10). - * The expression is evaluated at compile time; failure aborts parsing - * with a diagnostic that includes the user's message. The C11 spec - * requires the message; C2x makes it optional, but we follow C11 here. */ -static void parse_static_assert(Parser* p) { - SrcLoc loc = tok_loc(&p->cur); - i64 v; - if (!accept_kw(p, KW_STATIC_ASSERT)) { - perr(p, "expected _Static_assert"); - } - expect_punct(p, '(', "'(' after _Static_assert"); - v = eval_const_int(p, tok_loc(&p->cur)); - expect_punct(p, ',', "',' separating _Static_assert args"); - if (p->cur.kind != TOK_STR) { - perr(p, "expected string literal as _Static_assert message"); - } - { - Tok msg = p->cur; - advance(p); - expect_punct(p, ')', "')' after _Static_assert"); - expect_punct(p, ';', "';' after _Static_assert"); - if (!v) { - size_t mlen = 0; - const char* mstr = pool_str(p->pool, msg.spelling, &mlen); - compiler_panic(p->c, loc, "static assertion failed: %.*s", - (int)mlen, mstr ? mstr : ""); - } - } -} - -/* GNU inline-asm statement (Track A — frontend only). - * - * asm-stmt := ('asm'|'__asm__') ['volatile'|'__volatile__'] ['goto'] - * '(' template-string - * [':' outputs [':' inputs [':' clobbers [':' labels]]]] - * ')' ';' - * outputs := output (',' output)* - * output := ['[' name ']'] string-literal '(' lvalue-expr ')' - * inputs := input (',' input)* - * input := ['[' name ']'] string-literal '(' expr ')' - * clobbers := string-literal (',' string-literal)* - * labels := identifier (',' identifier)* - * - * The leading 'asm'/'__asm__' keyword has already been consumed by - * parse_stmt. This function: - * 1. Captures each output's lvalue address into a fresh frame slot, so - * after cg_inline_asm pushes the result SValues we can store them - * back into the user's lvalues. - * 2. Pushes each input's rvalue onto the CG value stack in declaration - * order — cg_inline_asm consumes them per its docstring contract. - * 3. Calls cg_inline_asm. The cg layer (Track B) is still a panic - * stub, so any test exercising this path will hit the panic until - * Track B lands. - * 4. After return, walks outputs in REVERSE order and stores the - * top-of-stack SValue into the captured lvalue address. - * - * `volatile` / `__volatile__` are accepted and ignored (informational — - * see ASM.md §9 / INLINEASM.md §9). `goto` is accepted at the keyword - * level; the labels list, if present, is parsed and discarded — Track B - * will reject `asm goto` inside cg_inline_asm per INLINEASM.md §1. */ -typedef struct AsmOutLValue { - FrameSlot addr_slot; /* holds &lvalue (pointer) */ - const Type* ptr_ty; /* pointer-to type (for the slot/load) */ - const Type* val_ty; /* the lvalue's value type (for cg_deref) */ -} AsmOutLValue; - -static Sym parse_asm_operand_name(Parser* p) { - /* `[ ident ]` already known to start with `[` per the caller. Returns - * the interned ident Sym, or 0 if absent. */ - Sym name = 0; - if (!is_punct(&p->cur, '[')) return 0; - advance(p); - if (p->cur.kind != TOK_IDENT) { - perr(p, "expected identifier inside '[name]' on asm operand"); - } - name = p->cur.v.ident; - advance(p); - expect_punct(p, ']', "']' after asm operand name"); - return name; -} - -static const char* parse_asm_str(Parser* p, const char* what) { - /* Adjacent string literals are already fused at the pp-pull boundary - * (see fuse_string_lits). Decode the fused token to raw bytes and - * intern the resulting C-string into the global pool. */ - u8* bytes; - size_t nlen = 0; - Sym s; - Tok t; - if (p->cur.kind != TOK_STR) { - perr(p, "expected string literal in %s", what); - } - t = p->cur; - advance(p); - bytes = decode_string_literal(p, &t, &nlen); - /* decode_string_literal includes a trailing NUL in nlen; strip it - * before interning so the C-string view round-trips. */ - if (nlen > 0) nlen -= 1; - s = pool_intern(p->pool, (const char*)bytes, nlen); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - return pool_str(p->pool, s, NULL); -} - -static void parse_asm_stmt(Parser* p) { - /* The 'asm'/'__asm__' keyword was just consumed by parse_stmt. */ - const char* tmpl; - AsmConstraint* outs = NULL; - AsmConstraint* ins = NULL; - Sym* clobbers = NULL; - AsmOutLValue* out_lvs = NULL; - u32 nout = 0, nin = 0, nclob = 0; - u32 cap_out = 0, cap_in = 0, cap_clob = 0; - int saw_goto = 0; - SrcLoc loc = tok_loc(&p->cur); - - /* Optional `volatile` / `__volatile__` — informational, dropped. */ - for (;;) { - if (accept_kw(p, KW_VOLATILE)) continue; - if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) { - advance(p); - continue; - } - break; - } - /* Optional `goto`. */ - if (accept_kw(p, KW_GOTO)) saw_goto = 1; - - expect_punct(p, '(', "'(' after asm"); - - /* Template string. */ - tmpl = parse_asm_str(p, "asm template"); - - /* ---- outputs ---- */ - if (accept_punct(p, ':')) { - if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { - cap_out = 4; - outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out); - out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out); - for (;;) { - AsmConstraint c; - AsmOutLValue lv; - const Type* val_ty; - const Type* ptr_ty; - FrameSlotDesc fsd; - FrameSlot slot; - memset(&c, 0, sizeof c); - memset(&lv, 0, sizeof lv); - c.name = parse_asm_operand_name(p); /* 0 if absent */ - c.str = parse_asm_str(p, "asm output constraint"); - /* Per GCC: outputs require '=' or '+'. Track B will validate; - * the parser stays lenient here. */ - if (c.str && c.str[0] == '+') c.dir = ASM_INOUT; - else c.dir = ASM_OUT; - expect_punct(p, '(', "'(' before asm output lvalue"); - /* parse_assign_expr leaves an lvalue (or rvalue) on the stack. - * We need the address; cg_addr converts an lvalue → ptr rvalue. - * Then we stash that pointer in a scratch frame slot so we can - * store back into it after cg_inline_asm returns. */ - parse_assign_expr(p); - val_ty = cg_top_type(p->cg); - if (!val_ty) perr(p, "asm output: cannot determine lvalue type"); - c.type = val_ty; - cg_addr(p->cg); - ptr_ty = cg_top_type(p->cg); - if (!ptr_ty) perr(p, "asm output: cannot take address"); - memset(&fsd, 0, sizeof fsd); - fsd.type = ptr_ty; - fsd.size = 8; - fsd.align = 8; - fsd.kind = FS_LOCAL; - slot = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, slot, ptr_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - lv.addr_slot = slot; - lv.ptr_ty = ptr_ty; - lv.val_ty = val_ty; - expect_punct(p, ')', "')' after asm output lvalue"); - if (nout == cap_out) { - u32 nc = cap_out * 2; - AsmConstraint* nb = - (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); - AsmOutLValue* nlv = - (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc); - memcpy(nb, outs, sizeof(AsmConstraint) * nout); - memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout); - outs = nb; - out_lvs = nlv; - cap_out = nc; - } - outs[nout] = c; - out_lvs[nout] = lv; - nout++; - if (!accept_punct(p, ',')) break; - } - } - - /* ---- inputs ---- */ - if (accept_punct(p, ':')) { - if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { - cap_in = 4; - ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in); - for (;;) { - AsmConstraint c; - memset(&c, 0, sizeof c); - c.name = parse_asm_operand_name(p); - c.str = parse_asm_str(p, "asm input constraint"); - c.dir = ASM_IN; - expect_punct(p, '(', "'(' before asm input expression"); - /* Push input value onto the CG stack in declaration order. - * cg_inline_asm consumes them per its docstring. */ - parse_assign_expr(p); - to_rvalue(p); - /* Capture the rvalue's C type for the binder. cg_top_type - * is valid after to_rvalue while the value is still on top - * of the CG stack. */ - c.type = cg_top_type(p->cg); - expect_punct(p, ')', "')' after asm input expression"); - if (nin == cap_in) { - u32 nc = cap_in * 2; - AsmConstraint* nb = - (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); - memcpy(nb, ins, sizeof(AsmConstraint) * nin); - ins = nb; - cap_in = nc; - } - ins[nin++] = c; - if (!accept_punct(p, ',')) break; - } - } - - /* ---- clobbers ---- */ - if (accept_punct(p, ':')) { - if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { - cap_clob = 4; - clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob); - for (;;) { - const char* cstr; - Sym cs; - cstr = parse_asm_str(p, "asm clobber"); - cs = pool_intern_cstr(p->pool, cstr); - if (nclob == cap_clob) { - u32 nc = cap_clob * 2; - Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc); - memcpy(nb, clobbers, sizeof(Sym) * nclob); - clobbers = nb; - cap_clob = nc; - } - clobbers[nclob++] = cs; - if (!accept_punct(p, ',')) break; - } - } - - /* ---- labels (asm goto) ---- */ - if (accept_punct(p, ':')) { - /* Parse-only: consume identifier list. cg_inline_asm rejects - * asm-goto in v1 per INLINEASM.md §1. */ - if (!is_punct(&p->cur, ')')) { - for (;;) { - if (p->cur.kind != TOK_IDENT) { - perr(p, "expected label identifier in asm-goto label list"); - } - advance(p); - if (!accept_punct(p, ',')) break; - } - } - } - } - } - } - - expect_punct(p, ')', "')' to close asm"); - expect_punct(p, ';', "';' after asm statement"); - - (void)saw_goto; /* parsed; cg layer rejects asm-goto in v1 */ - - /* In-out ('+r') decomposition: for each ASM_INOUT output k, synthesize - * a matching input "<k>" carrying the current value of the bound - * lvalue. The binder's matching-constraint path copies the value into - * the output reg before the asm runs; the existing store-back loop - * below then writes the post-asm value back into the lvalue. GCC's - * matching-digit syntax tops out at "9", so v1 supports up to 10 - * +r constraints per asm statement. */ - u32 ninout = 0; - for (u32 i = 0; i < nout; ++i) { - if (outs[i].dir == ASM_INOUT) ninout++; - } - if (ninout > 0) { - static const char* const k_match_strs[10] = { - "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; - /* Grow ins[] to fit synthesized entries. */ - u32 need = nin + ninout; - if (need > cap_in) { - u32 nc = cap_in ? cap_in : 4; - while (nc < need) nc *= 2; - AsmConstraint* nb = - (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); - if (nin) memcpy(nb, ins, sizeof(AsmConstraint) * nin); - ins = nb; - cap_in = nc; - } - for (u32 i = 0; i < nout; ++i) { - if (outs[i].dir != ASM_INOUT) continue; - if (i >= 10) { - perr(p, "asm: '+r' constraint at output index >9 exceeds " - "matching-digit syntax"); - } - AsmOutLValue* lv = &out_lvs[i]; - /* Load lvalue's current value onto the cg stack: - * [scratch ptr-slot lvalue] → [ptr rvalue] → [rc lvalue] → [rc value] */ - cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty); - cg_load(p->cg); - cg_deref(p->cg, lv->val_ty); - cg_load(p->cg); - AsmConstraint mc; - memset(&mc, 0, sizeof mc); - mc.str = k_match_strs[i]; - mc.dir = ASM_IN; - mc.type = lv->val_ty; - ins[nin++] = mc; - } - } - - cg_set_loc(p->cg, loc); - cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob); - - /* For each output (in reverse so the last output is on top first), - * store the SValue cg_inline_asm pushed back into the captured lvalue - * via the address we stashed. cg_inline_asm is currently a panic stub - * (Track B), so this loop is unreachable until Track B lands. */ - if (nout > 0) { - u32 i; - for (i = nout; i-- > 0;) { - AsmOutLValue* lv = &out_lvs[i]; - /* Stack: [..., out_val]. Push the address (load from slot, then - * deref to make it an lvalue), swap, store, drop. */ - cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty); - cg_load(p->cg); - cg_deref(p->cg, lv->val_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - } - } -} - -static void parse_compound_stmt(Parser* p) { - expect_punct(p, '{', "'{'"); - scope_push(p); - while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { - /* Drain stray newlines & pp-hash artifacts. (PP normally consumes - * these, but we tolerate them here as a no-op safety net.) */ - if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { - advance(p); - continue; - } - if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) { - parse_static_assert(p); - continue; - } - { - DeclSpecs specs; - Tok save_tok = p->cur; /* nothing to roll back yet — accept reused below */ - (void)save_tok; - if (parse_decl_specs(p, &specs)) { - parse_local_decl(p, &specs); - } else { - parse_stmt(p); - } - } - } - expect_punct(p, '}', "'}'"); - scope_pop(p); -} - -static void parse_stmt(Parser* p) { - /* Each statement starts from an empty value stack; recycle scratch - * registers so a function body with many sequential reg-allocating - * operations isn't bounded by the backend's fixed scratch window. */ - cg_set_loc(p->cg, tok_loc(&p->cur)); - /* Labeled statement: `IDENT ':' STMT`. The IDENT must not be a keyword; - * peek1 disambiguates the label form from an expression statement that - * happens to start with an identifier. */ - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - Tok n = peek1(p); - if (is_punct(&n, ':')) { - parse_label_stmt(p); - return; - } - } - if (is_punct(&p->cur, '{')) { - parse_compound_stmt(p); - return; - } - if (is_punct(&p->cur, ';')) { - advance(p); - return; - } - if (is_kw(p, &p->cur, KW_IF)) { - advance(p); - parse_if_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_WHILE)) { - advance(p); - parse_while_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_FOR)) { - advance(p); - parse_for_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_DO)) { - advance(p); - parse_do_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_RETURN)) { - advance(p); - parse_return_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_BREAK)) { - advance(p); - parse_break_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_CONTINUE)) { - advance(p); - parse_continue_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_GOTO)) { - advance(p); - parse_goto_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_SWITCH)) { - advance(p); - parse_switch_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_CASE)) { - advance(p); - parse_case_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_DEFAULT)) { - advance(p); - parse_default_stmt(p); - return; - } - if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) { - advance(p); - parse_asm_stmt(p); - return; - } - /* Expression statement. */ - parse_expr(p); - cg_drop(p->cg); - expect_punct(p, ';', "';' after expression"); -} - -/* ============================================================ * External (top-level) declarations * ============================================================ */ -/* Parse a parameter-type-list. Returns the parameter type array and counts - * via out-pointers; `*variadic_out` is set if the list ends in `, ...`. - * - * Forms accepted: - * `(void)` — zero named params - * `()` — old-style "unspecified args"; treated as zero - * `(T1, T2, ...)` — named or abstract params, possibly trailing ellipsis - * - * Per §6.7.6.3, a parameter declared as `T x[N]` is rewritten to `T *x` (and - * `T x()` to `T (*x)()`); the §6.7.6.3 ¶7 "[static N]" form is a hint to the - * caller that the pointer points at ≥N elements — semantically still `T*`. */ -static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, - u8* variadic_out) { +void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, + u8* variadic_out) { ParamInfo* infos; u32 cap = 4; u32 n = 0; @@ -6289,11 +606,11 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, *nparams_out = 0; if (is_punct(&p->cur, ')')) { - return; /* `()` — no params recorded */ + return; } if (is_kw(p, &p->cur, KW_VOID)) { - Tok n = peek1(p); - if (is_punct(&n, ')')) { + Tok n2 = peek1(p); + if (is_punct(&n2, ')')) { advance(p); /* `void` */ return; /* `(void)` */ } @@ -6316,7 +633,6 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname, &ploc); p->in_param_decl--; - /* Adjust array/function parameter to pointer per §6.7.6.3. */ if (pty && pty->kind == TY_ARRAY) { pty = type_ptr(p->pool, pty->arr.elem); } else if (pty && pty->kind == TY_FUNC) { @@ -6338,15 +654,6 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, *nparams_out = (u16)n; } -/* Resolve or mint the ObjSymId for a function declaration. If the same - * function name was seen before in file scope (forward prototype, prior - * definition), reuse its symbol so the linker sees one definition. - * - * `dattrs` is the per-declarator attribute list (between `)` and `{`/`;`); - * combined with `specs->attrs` it feeds attr_list_to_decl so DF_WEAK / - * visibility / section / noreturn / alias_target land on the Decl before - * decl_declare mints the ObjSym. The out-params let parse_function_body - * propagate section_id and noreturn into CGFuncDesc. */ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, const DeclSpecs* specs, SrcLoc fname_loc, const Attr* dattrs, @@ -6358,11 +665,6 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, if (out_alias_target) *out_alias_target = 0; SymEntry* existing = scope_lookup(p, fname); if (existing && existing->kind == SEK_FUNC) { - /* Compatible-types check is Phase 10 territory; for v1 we trust the - * declarations agree. Returning the existing entry lets the body - * defs reuse the prior obj_sym. Attributes on a redeclaration apply - * only via the per-call decode here; the existing ObjSym already has - * its bind/visibility chosen at first sight. */ Decl tmp; memset(&tmp, 0, sizeof tmp); attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp); @@ -6398,10 +700,6 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, } } -/* Drive cg through a full function definition: build CGFuncDesc with the - * already-resolved symbol and ABI info, open a parameter scope, allocate - * FS_PARAM slots for each named param, dispatch cg_param, then parse the - * compound body. The `infos` array is the parser's per-param state. */ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, const ABIFuncInfo* abi, const ParamInfo* infos, u16 nparams, SrcLoc fname_loc, @@ -6411,9 +709,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, memset(&fd, 0, sizeof fd); fd.sym = fsym; - /* Phase 2: __attribute__((section)) on a function overrides the default - * .text placement. Falls back to the parser's default text section when - * no attribute named one. */ fd.text_section_id = (section_id != OBJ_SEC_NONE) ? section_id : p->text_sec; fd.group_id = OBJ_GROUP_NONE; @@ -6422,8 +717,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, fd.params = NULL; fd.nparams = nparams; fd.loc = fname_loc; - /* Propagate _Noreturn / __attribute__((noreturn)) to CG. Backends may - * elide the trailing epilogue; v1 backends ignore the bit. */ if (decl_flags & DF_NORETURN) fd.flags |= CGFD_NORETURN; if (nparams) { @@ -6433,11 +726,8 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, pds[i].index = i; pds[i].name = infos[i].name; pds[i].type = infos[i].type; - pds[i].slot = FRAME_SLOT_NONE; /* filled below */ + pds[i].slot = FRAME_SLOT_NONE; pds[i].abi = &abi->params[i]; - /* The aarch64 backend reads parts from `pds[i].abi->parts` directly; - * `incoming` is the materialized CGABIPart slot used by ABIs that - * pre-stage values. Leave NULL until a backend wires it up. */ pds[i].incoming = NULL; pds[i].nincoming = 0; pds[i].loc = infos[i].loc; @@ -6446,8 +736,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, } scope_push(p); /* parameter scope */ - /* Per-function label namespace and switch context — both are saved here - * for hygiene even though C forbids nested function definitions. */ GotoLabel* saved_goto_labels = p->goto_labels; SwitchCtx* saved_switch = p->cur_switch; p->goto_labels = NULL; @@ -6455,7 +743,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, cg_set_loc(p->cg, fname_loc); cg_func_begin(p->cg, &fd); - /* Allocate FS_PARAM slots and dispatch cg_param in declaration order. */ for (u16 i = 0; i < nparams; ++i) { FrameSlotDesc fsd; FrameSlot s; @@ -6478,14 +765,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, } parse_compound_stmt(p); - /* Implicit fall-through return: emit a return so the function's epilogue - * always has a tail to chain into. For non-void scalar returns this - * returns a zero value, which is undefined behavior at the language - * level but a useful safety belt against trailing-fall-through. - * Aggregate returns can't synthesize a typed zero rvalue (no scalar - * source), so emit a bare ret — the epilogue still runs and the - * return value is whatever was last written into the destination - * (UB by the same token). */ if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID && fn_ty->fn.ret->kind != TY_STRUCT && fn_ty->fn.ret->kind != TY_UNION) { cg_push_int(p->cg, 0, fn_ty->fn.ret); @@ -6493,7 +772,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, } else { cg_ret(p->cg, 0); } - /* All goto targets must have been placed by some `name:` in the body. */ for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) { if (!gl->placed) { compiler_panic(p->c, gl->first_use, "goto to undefined label"); @@ -6505,9 +783,7 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, scope_pop(p); } -/* Parse one external declaration: function definition, function prototype, - * or (deferred) global object declaration. The declarator is consumed by - * parse_declarator before we know whether a body or `;` follows. */ +/* Parse one external declaration. */ static void parse_external_decl(Parser* p) { DeclSpecs specs; Sym name; @@ -6518,14 +794,8 @@ static void parse_external_decl(Parser* p) { perr(p, "expected declaration"); } - /* Tag-only declarations at file scope: `struct S;`, `enum E { ... };`, - * etc. The decl-specs registered the tag; nothing else to do. */ if (accept_punct(p, ';')) return; - /* `typedef` at file scope: bind one-or-more declarator names as - * SEK_TYPEDEF in the current (file) scope. Goes through - * parse_declarator_full so compound targets (`typedef int (*FP)(int)`, - * `typedef int A[3]`) lower correctly. */ if (specs.storage == DS_TYPEDEF) { for (;;) { Sym tname = 0; @@ -6544,27 +814,17 @@ static void parse_external_decl(Parser* p) { return; } - /* Parse the declarator's pointer prefix and IDENT. Function and array - * declarator suffixes are recognized inline below. */ base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { perr(p, "expected declarator"); } name = p->cur.v.ident; loc = tok_loc(&p->cur); advance(p); - /* Per-declarator attrs collected from positions between the - * declarator-id and `=`/`,`/`;` (plus, for functions, between `)` - * and `{`/`;`). Chained in source order; attached to the SymEntry - * below so Phase 2 can read used/section/noreturn/alias/weak/ - * visibility/aligned without revisiting decl-spec attrs. */ Attr* dattrs = NULL; parse_attrs_into(p, &dattrs); - /* Array suffix(es) after the name pin this as an object declaration — - * e.g. `static int g[3] = {...}`. Apply each suffix in order so the - * resulting type is the array-of-T we'll allocate storage for. */ while (is_punct(&p->cur, '[')) { DeclSuffix s; if (!parse_decl_suffix(p, &s)) break; @@ -6574,8 +834,6 @@ static void parse_external_decl(Parser* p) { parse_attrs_into(p, &dattrs); if (is_punct(&p->cur, '(')) { - /* Function declaration or definition: build the type from the param - * list, then dispatch on `{` (definition) vs `;` (prototype). */ ParamInfo* infos = NULL; u16 nparams = 0; u8 variadic = 0; @@ -6587,7 +845,6 @@ static void parse_external_decl(Parser* p) { advance(p); /* '(' */ parse_param_list(p, &infos, &nparams, &variadic); expect_punct(p, ')', "')' after parameter list"); - /* Attributes between `)` and `{`/`;` (e.g. `noreturn`, `section`). */ parse_attrs_into(p, &dattrs); if (nparams) { @@ -6611,11 +868,6 @@ static void parse_external_decl(Parser* p) { return; } if (accept_punct(p, ';')) { - /* Function prototype. If it carries `__attribute__((alias("t")))`, - * resolve `t` now and define this symbol as a copy of t's binding. - * Cross-TU aliases aren't in scope: the target must already be - * defined in this TU (matches the §"Alias resolution" note in - * doc/ATTRIBUTE.md). */ if (fn_alias_target != 0) { SymEntry* te = scope_lookup(p, fn_alias_target); if (!te) { @@ -6637,13 +889,12 @@ static void parse_external_decl(Parser* p) { obj_symbol_define(ob, fent->v.sym, ts->section_id, ts->value, ts->size); } - return; /* prototype only */ + return; } perr(p, "expected '{' or ';' after function declarator"); } - /* Global object declaration: `int g;`, `int g = 7;`, `extern int g;`, - * `static T g = ...;`, `const T g = ...;`. */ + /* Global object declaration. */ for (;;) { int has_init = is_punct(&p->cur, '='); int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init; @@ -6652,12 +903,6 @@ static void parse_external_decl(Parser* p) { SymEntry* e = NULL; if (existing && existing->kind == SEK_GLOBAL) { - /* Redeclaration: reuse the prior ObjSymId so the linker sees one - * symbol. Compatible-types checks live in Phase 10. - * §6.2.7 composite type: if either declarator gives a complete array - * size where the other is incomplete, the composite is the complete - * one — propagate that to the SymEntry so later uses (e.g. sizeof) - * see the size. */ sym = existing->v.sym; e = existing; if (e->type && base_ty && e->type->kind == TY_ARRAY && @@ -6677,10 +922,6 @@ static void parse_external_decl(Parser* p) { decl_in.storage = DS_STATIC; decl_in.linkage = DL_INTERNAL; } else { - /* File-scope objects without an explicit storage class still have - * static storage duration and external linkage (§6.2.2 ¶5, - * §6.2.4 ¶3). Storing DS_EXTERN drives decl_declare to mint an - * obj_sym; DS_AUTO is reserved for block-scope autos. */ decl_in.storage = DS_EXTERN; decl_in.linkage = DL_EXTERNAL; } @@ -6695,8 +936,6 @@ static void parse_external_decl(Parser* p) { } attr_list_append(&e->attrs, dattrs); - /* The effective alignment is the max of _Alignas and any - * __attribute__((aligned(N))) seen in decl-specs or per-declarator. */ u32 attr_align = attrs_pick_aligned(specs.attrs); { u32 a2 = attrs_pick_aligned(dattrs); @@ -6706,9 +945,6 @@ static void parse_external_decl(Parser* p) { if (has_init) { advance(p); /* '=' */ - /* `T name[] = {...}` at file scope: peek the initializer to deduce - * the element count, then carry the completed type into the static - * emit + SymEntry. Mirrors the block-scope path. */ if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) { const Type* completed = complete_incomplete_array(p, base_ty); if (completed != base_ty) { @@ -6719,27 +955,20 @@ static void parse_external_decl(Parser* p) { define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/1, loc, align_eff); } else if (!is_pure_extern) { - /* Tentative def: emit a BSS reservation now. End-of-TU coalescing of - * multiple tentative defs into one is a Phase 4 follow-up; the - * Phase 4 corpus only has a single tentative def per TU. */ define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/0, loc, align_eff); } if (!accept_punct(p, ',')) break; - /* Next declarator: parse pointer prefix + IDENT, then loop. */ base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { perr(p, "expected declarator after ','"); } name = p->cur.v.ident; loc = tok_loc(&p->cur); advance(p); - /* Reset per-declarator attrs for this iteration; collect post-IDENT - * and post-suffix attrs (e.g. `int a, b __attribute__((aligned(4)))`). */ dattrs = NULL; parse_attrs_into(p, &dattrs); - /* Optional array suffix on a global declarator (e.g. `int g[3]`). */ while (is_punct(&p->cur, '[')) { DeclSuffix s; if (!parse_decl_suffix(p, &s)) break; @@ -6750,8 +979,6 @@ static void parse_external_decl(Parser* p) { expect_punct(p, ';', "';' after global declaration"); } -static void parse_static_assert(Parser* p); - static void parse_translation_unit(Parser* p) { while (p->cur.kind != TOK_EOF) { if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { @@ -6784,13 +1011,10 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { p.abi = c->abi; p.pool = c->global; - /* Intern keyword spellings. The lexer doesn't know about keywords; this - * is the canonical bucketization site (DESIGN §5.1 / lex.h §6.4). */ for (i = (CKw)1; i < KW_COUNT; ++i) { p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]); } - /* Builtin / atomic spellings — Phase 9. */ p.sym_b_alloca = pool_intern_cstr(p.pool, "__builtin_alloca"); p.sym_b_ctz = pool_intern_cstr(p.pool, "__builtin_ctz"); p.sym_b_expect = pool_intern_cstr(p.pool, "__builtin_expect"); @@ -6815,18 +1039,12 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { p.sym_a_thread_fence = pool_intern_cstr(p.pool, "__atomic_thread_fence"); p.sym_a_signal_fence = pool_intern_cstr(p.pool, "__atomic_signal_fence"); - /* File scope. */ p.scope = scope_new(&p, NULL); - /* Default text section. -ffunction-sections / explicit attribute(section) - * cases will replace this per-function; the spine uses ".text". */ text_name = pool_intern_cstr(p.pool, ".text"); p.text_sec = obj_section(decl_obj(decls), text_name, SEC_TEXT, SF_ALLOC | SF_EXEC, 4u); - /* Pull the first token. PP yields preprocessed C tokens; directives - * have already been consumed. fetch_tok performs adjacent-string-literal - * fusion (C11 §6.4.5 ¶5) before tokens reach the parse productions. */ p.cur = fetch_tok(&p); parse_translation_unit(&p); diff --git a/src/parse/parse_expr.c b/src/parse/parse_expr.c @@ -0,0 +1,1795 @@ +/* parse_expr.c — precedence climbing, unary/primary, literal decoding, + * constant evaluation. */ + +#include "parse/parse_priv.h" + +static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } +static const Type* ty_size_t(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + + +static CKw ident_kw(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +static int accept_kw(Parser* p, CKw k) { + if (is_kw(p, &p->cur, k)) { + advance(p); + return 1; + } + return 0; +} + +/* ============================================================ + * Literal parsing + * ============================================================ */ + +i64 parse_int_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + i64 base = 10; + i64 acc = 0; + if (!s) perr(p, "bad numeric literal"); + if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + i = 2; + } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { + base = 2; + i = 2; + } else if (len >= 1 && s[0] == '0') { + base = 8; + i = 1; + } + for (; i < len; ++i) { + int c = (unsigned char)s[i]; + int dv; + if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; + if (c >= '0' && c <= '9') + dv = c - '0'; + else if (c >= 'a' && c <= 'f') + dv = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + dv = c - 'A' + 10; + else + perr(p, "bad digit in numeric literal"); + if (dv >= base) perr(p, "digit out of range for base"); + acc = acc * base + dv; + } + return acc; +} + +static const Type* int_literal_type(Parser* p, const Tok* t) { + int u = (t->flags & TF_INT_U) != 0; + int l = (t->flags & TF_INT_L) != 0; + int ll = (t->flags & TF_INT_LL) != 0; + TypeKind k; + if (ll) k = u ? TY_ULLONG : TY_LLONG; + else if (l) k = u ? TY_ULONG : TY_LONG; + else if (u) k = TY_UINT; + else k = TY_INT; + return type_prim(p->pool, k); +} + +static double parse_float_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + int is_hex = 0; + double v = 0.0; + int exp = 0; + int dec_exp = 0; + int frac_seen = 0; + if (!s) perr(p, "bad float literal"); + if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + is_hex = 1; + i = 2; + } + while (i < len) { + int c = (unsigned char)s[i]; + int dv; + if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' || + c == 'f' || c == 'F' || c == 'l' || c == 'L') + break; + if (c >= '0' && c <= '9') dv = c - '0'; + else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; + else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; + else perr(p, "bad digit in float literal"); + v = v * (is_hex ? 16.0 : 10.0) + (double)dv; + i++; + } + if (i < len && s[i] == '.') { + i++; + while (i < len) { + int c = (unsigned char)s[i]; + int dv; + if (c == 'e' || c == 'E' || c == 'p' || c == 'P' || + c == 'f' || c == 'F' || c == 'l' || c == 'L') + break; + if (c >= '0' && c <= '9') dv = c - '0'; + else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; + else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; + else perr(p, "bad digit in float literal"); + v = v * (is_hex ? 16.0 : 10.0) + (double)dv; + exp -= 1; + frac_seen = 1; + i++; + } + } + (void)frac_seen; + if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) { + int neg = 0; + int n = 0; + int hex_exp = (s[i] == 'p' || s[i] == 'P'); + i++; + if (i < len && (s[i] == '+' || s[i] == '-')) { + if (s[i] == '-') neg = 1; + i++; + } + while (i < len) { + int c = (unsigned char)s[i]; + if (c < '0' || c > '9') break; + n = n * 10 + (c - '0'); + i++; + } + dec_exp = neg ? -n : n; + if (hex_exp) { + dec_exp += exp * 4; + exp = 0; + } + } + while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; } + while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; } + if (is_hex) { + while (dec_exp < 0) { v /= 2.0; dec_exp++; } + while (dec_exp > 0) { v *= 2.0; dec_exp--; } + } else { + while (dec_exp < 0) { v /= 10.0; dec_exp++; } + while (dec_exp > 0) { v *= 10.0; dec_exp--; } + } + return v; +} + +static const Type* float_literal_type(Parser* p, const Tok* t) { + if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT); + if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE); + return type_prim(p->pool, TY_DOUBLE); +} + +static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi, + SrcLoc loc) { + size_t i = *pi; + i64 v; + int c; + if (i >= len) compiler_panic(p->c, loc, "truncated character literal"); + if (s[i] != '\\') { + v = (unsigned char)s[i++]; + *pi = i; + return v; + } + i++; + if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal"); + c = (unsigned char)s[i++]; + switch (c) { + case 'n': v = '\n'; break; + case 't': v = '\t'; break; + case 'r': v = '\r'; break; + case 'b': v = '\b'; break; + case 'f': v = '\f'; break; + case 'v': v = '\v'; break; + case 'a': v = '\a'; break; + case '\\': v = '\\'; break; + case '\'': v = '\''; break; + case '"': v = '"'; break; + case '?': v = '?'; break; + case 'x': { + i64 hex = 0; + int any = 0; + while (i < len) { + int d = (unsigned char)s[i]; + int dv; + if (d >= '0' && d <= '9') dv = d - '0'; + else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10; + else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10; + else break; + hex = hex * 16 + dv; + any = 1; + i++; + } + if (!any) compiler_panic(p->c, loc, "\\x with no hex digits"); + v = hex & 0xff; + break; + } + default: + if (c >= '0' && c <= '7') { + i64 oct = c - '0'; + int n = 1; + while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') { + oct = oct * 8 + (s[i] - '0'); + i++; + n++; + } + v = oct & 0xff; + } else { + v = c; + } + break; + } + *pi = i; + return v; +} + +i64 decode_char_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + i64 v; + if (!s) perr(p, "bad char literal"); + if (t->flags & TF_STR_U8) i = 2; + else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; + if (i >= len || s[i] != '\'') perr(p, "malformed character literal"); + i++; + if (i >= len || s[i] == '\'') perr(p, "empty character literal"); + v = decode_one_char(p, s, len, &i, t->loc); + if (i >= len || s[i] != '\'') { + perr(p, "multi-character constants are not supported"); + } + return v; +} + +u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + Heap* h = p->c->env->heap; + u8* buf; + size_t k = 0; + if (!s) perr(p, "bad string literal"); + if (t->flags & TF_STR_U8) i = 2; + else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; + if (i >= len || s[i] != '"') perr(p, "malformed string literal"); + i++; + buf = (u8*)h->alloc(h, len + 1, 1); + if (!buf) perr(p, "out of memory in string literal"); + while (i < len && s[i] != '"') { + i64 ch = decode_one_char(p, s, len, &i, t->loc); + buf[k++] = (u8)ch; + } + buf[k++] = 0; + *nlen_out = k; + return buf; +} + +ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) { + ObjBuilder* ob = decl_obj(p->decls); + Sym secname = pool_intern_cstr(p->pool, ".rodata"); + ObjSecId sec = obj_section(ob, secname, SEC_RODATA, SF_ALLOC, 1u); + u32 base = obj_pos(ob, sec); + Sym lname; + ObjSymId sym; + char namebuf[32]; + static u32 counter; + int wlen = 0; + u32 id = ++counter; + namebuf[wlen++] = '.'; + namebuf[wlen++] = 'L'; + namebuf[wlen++] = 'C'; + { + char digits[12]; + int dn = 0; + if (id == 0) digits[dn++] = '0'; + while (id) { + digits[dn++] = (char)('0' + (id % 10)); + id /= 10; + } + while (dn) namebuf[wlen++] = digits[--dn]; + } + namebuf[wlen] = 0; + lname = pool_intern(p->pool, namebuf, (size_t)wlen); + sym = obj_symbol(ob, lname, SB_LOCAL, SK_OBJ, sec, base, n); + { + u8* dst = obj_reserve(ob, sec, n); + if (dst) memcpy(dst, bytes, n); + } + return sym; +} + +/* ============================================================ + * Constant expression evaluator (cexpr_*) + * ============================================================ */ + +static i64 cexpr_unary(Parser* p, SrcLoc loc); + +static i64 cexpr_mul(Parser* p, SrcLoc loc) { + i64 v = cexpr_unary(p, loc); + for (;;) { + if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc); + else if (accept_punct(p, '/')) { + i64 r = cexpr_unary(p, loc); + if (r == 0) compiler_panic(p->c, loc, "division by zero in constant"); + v = v / r; + } else if (accept_punct(p, '%')) { + i64 r = cexpr_unary(p, loc); + if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant"); + v = v % r; + } else break; + } + return v; +} +static i64 cexpr_add(Parser* p, SrcLoc loc) { + i64 v = cexpr_mul(p, loc); + for (;;) { + if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc); + else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc); + else break; + } + return v; +} +static i64 cexpr_shift(Parser* p, SrcLoc loc) { + i64 v = cexpr_add(p, loc); + for (;;) { + if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc); + else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc); + else break; + } + return v; +} +static i64 cexpr_rel(Parser* p, SrcLoc loc) { + i64 v = cexpr_shift(p, loc); + for (;;) { + if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc); + else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc); + else if (is_punct(&p->cur, '<')) { + advance(p); v = v < cexpr_shift(p, loc); + } else if (is_punct(&p->cur, '>')) { + advance(p); v = v > cexpr_shift(p, loc); + } else break; + } + return v; +} +static i64 cexpr_eq(Parser* p, SrcLoc loc) { + i64 v = cexpr_rel(p, loc); + for (;;) { + if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc)); + else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc)); + else break; + } + return v; +} +static i64 cexpr_band(Parser* p, SrcLoc loc) { + i64 v = cexpr_eq(p, loc); + while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) { + advance(p); + v = v & cexpr_eq(p, loc); + } + return v; +} +static i64 cexpr_bxor(Parser* p, SrcLoc loc) { + i64 v = cexpr_band(p, loc); + while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc); + return v; +} +static i64 cexpr_bor(Parser* p, SrcLoc loc) { + i64 v = cexpr_bxor(p, loc); + while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) { + advance(p); + v = v | cexpr_bxor(p, loc); + } + return v; +} + +static i64 cexpr_unary(Parser* p, SrcLoc loc) { + if (accept_punct(p, '+')) return cexpr_unary(p, loc); + if (accept_punct(p, '-')) return -cexpr_unary(p, loc); + if (accept_punct(p, '~')) return ~cexpr_unary(p, loc); + if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1; + if (accept_kw(p, KW_SIZEOF)) { + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after sizeof type-name"); + return (i64)abi_sizeof(p->abi, t); + } + } + } + parse_unary(p); + { + const Type* ty = cg_top_type(p->cg); + i64 sz = (i64)abi_sizeof(p->abi, ty); + cg_drop(p->cg); + return sz; + } + } + if (accept_kw(p, KW_ALIGNOF)) { + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after _Alignof type-name"); + return (i64)abi_alignof(p->abi, t); + } + } + } + parse_unary(p); + { + const Type* ty = cg_top_type(p->cg); + i64 al = (i64)abi_alignof(p->abi, ty); + cg_drop(p->cg); + return al; + } + } + if (accept_punct(p, '(')) { + if (starts_type_name(p, &p->cur)) { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after cast type-name"); + { + i64 v = cexpr_unary(p, loc); + u32 sz = abi_sizeof(p->abi, t); + int is_signed = abi_type_info(p->abi, t).signed_; + if (sz < 8) { + u64 mask = (1ull << (sz * 8u)) - 1ull; + u64 uv = (u64)v & mask; + if (is_signed) { + u64 sign = 1ull << (sz * 8u - 1u); + v = (i64)((uv ^ sign) - sign); + } else { + v = (i64)uv; + } + } + return v; + } + } + { + i64 v = cexpr_bor(p, loc); + expect_punct(p, ')', "')' in constant expression"); + return v; + } + } + if (p->cur.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &p->cur); + advance(p); + return v; + } + if (p->cur.kind == TOK_CHR) { + i64 v = decode_char_literal(p, &p->cur); + advance(p); + return v; + } + if (p->cur.kind == TOK_IDENT) { + SymEntry* e = scope_lookup(p, p->cur.v.ident); + if (e && e->kind == SEK_ENUM_CST) { + advance(p); + return e->v.enum_value; + } + compiler_panic(p->c, loc, "non-constant identifier in constant expression"); + } + compiler_panic(p->c, loc, "expected constant expression"); +} + +i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); } + +/* ============================================================ + * to_rvalue + * ============================================================ */ + +void to_rvalue(Parser* p) { + const Type* t = cg_top_type(p->cg); + if (t) { + if (t->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem)); + return; + } + if (t->kind == TY_FUNC) { + cg_addr(p->cg); + return; + } + if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; + } + cg_load(p->cg); +} + +/* ============================================================ + * coerce_top_to_lvalue (used by assignment / initializers) + * ============================================================ */ + +void coerce_top_to_lvalue(Parser* p) { + const Type* src = cg_top_type(p->cg); + const Type* dst = cg_top2_type(p->cg); + if (!src || !dst || src == dst) return; + if (type_is_arith(src) && type_is_arith(dst)) { + cg_convert(p->cg, dst); + } +} + +/* ============================================================ + * Builtin call handling + * ============================================================ */ + +static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) { + const Type* cur = base; + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name in __builtin_offsetof"); + } + for (;;) { + if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) { + Sym mname = p->cur.v.ident; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + /* find_field is static in parse_type.c; we need it here. + * We call abi_record_layout directly inline. */ + const ABIRecordLayout* L = abi_record_layout(p->abi, cur); + if (!L) perr(p, "no such member in __builtin_offsetof"); + int found = 0; + for (u16 i = 0; i < cur->rec.nfields; ++i) { + const Field* f = &cur->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + } + (void)mf; + if (!found) perr(p, "no such member in __builtin_offsetof"); + advance(p); + *off += moff; + cur = mty; + } else if (cur->kind == TY_ARRAY) { + /* fall through to bracket branch */ + } else { + perr(p, "__builtin_offsetof step into non-aggregate"); + } + if (is_punct(&p->cur, '.')) { + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '.'"); + } + continue; + } + if (is_punct(&p->cur, '[')) { + advance(p); + i64 idx = eval_const_int(p, p->cur.loc); + expect_punct(p, ']', "']' in __builtin_offsetof"); + if (cur->kind != TY_ARRAY) { + perr(p, "__builtin_offsetof '[' on non-array"); + } + *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx); + cur = cur->arr.elem; + continue; + } + break; + } + return cur; +} + +static int try_parse_builtin_call(Parser* p) { + Sym name = p->cur.v.ident; + SrcLoc loc = p->cur.loc; + + if (name != p->sym_b_alloca && name != p->sym_b_ctz && + name != p->sym_b_expect && + name != p->sym_b_offsetof && name != p->sym_b_va_start && + name != p->sym_b_va_arg && name != p->sym_b_va_end && + name != p->sym_b_va_copy && name != p->sym_a_load_n && + name != p->sym_a_store_n && name != p->sym_a_exchange_n && + name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub && + name != p->sym_a_fetch_and && name != p->sym_a_fetch_or && + name != p->sym_a_fetch_xor && name != p->sym_a_cas_n && + name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) { + return 0; + } + advance(p); /* IDENT */ + expect_punct(p, '(', "'(' after builtin"); + + if (name == p->sym_b_offsetof) { + const Type* root = parse_type_name(p); + expect_punct(p, ',', "',' in __builtin_offsetof"); + u32 off = 0; + (void)offsetof_designator(p, root, &off); + expect_punct(p, ')', "')' after __builtin_offsetof"); + cg_push_int(p->cg, (i64)off, ty_size_t(p)); + return 1; + } + + if (name == p->sym_b_expect) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __builtin_expect"); + parse_assign_expr(p); + cg_drop(p->cg); + expect_punct(p, ')', "')' after __builtin_expect"); + return 1; + } + + if (name == p->sym_b_alloca) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after __builtin_alloca"); + cg_set_loc(p->cg, loc); + cg_alloca(p->cg); + return 1; + } + + if (name == p->sym_b_ctz) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after __builtin_ctz"); + cg_set_loc(p->cg, loc); + cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ); + return 1; + } + + if (name == p->sym_b_va_start) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_start"); + parse_assign_expr(p); + cg_drop(p->cg); + expect_punct(p, ')', "')' after __builtin_va_start"); + cg_set_loc(p->cg, loc); + cg_va_start_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_end) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ')', "')' after __builtin_va_end"); + cg_set_loc(p->cg, loc); + cg_va_end_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_copy) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_copy"); + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ')', "')' after __builtin_va_copy"); + cg_set_loc(p->cg, loc); + cg_va_copy_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_arg) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_arg"); + const Type* ty = parse_type_name(p); + expect_punct(p, ')', "')' after __builtin_va_arg"); + cg_set_loc(p->cg, loc); + cg_va_arg_(p->cg, ty); + return 1; + } + + if (name == p->sym_a_load_n) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_load_n"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_load_n"); + cg_set_loc(p->cg, loc); + cg_atomic_load(p->cg, (MemOrder)ord); + return 1; + } + + if (name == p->sym_a_store_n) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_store_n"); + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_store_n"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_store_n"); + cg_set_loc(p->cg, loc); + cg_atomic_store(p->cg, (MemOrder)ord); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) { + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after atomic fence"); + cg_set_loc(p->cg, loc); + cg_fence(p->cg, (MemOrder)ord); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_a_cas_n) { + parse_assign_expr(p); to_rvalue(p); /* ptr */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + + parse_assign_expr(p); to_rvalue(p); /* &expected */ + const Type* eptr_ty = cg_top_type(p->cg); + if (!eptr_ty || eptr_ty->kind != TY_PTR) { + perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer"); + } + const Type* val_ty = eptr_ty->ptr.pointee; + + FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd); + fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL; + FrameSlot eslot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_swap(p->cg); + cg_store(p->cg); cg_drop(p->cg); + + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_load(p->cg); + cg_deref(p->cg, val_ty); + cg_load(p->cg); + + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + parse_assign_expr(p); to_rvalue(p); /* desired */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + + (void)eval_const_int(p, p->cur.loc); /* weak */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + i64 succ = eval_const_int(p, p->cur.loc); + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + i64 fail = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_compare_exchange_n"); + + cg_set_loc(p->cg, loc); + cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail); + + const Type* ok_ty = cg_top_type(p->cg); + FrameSlotDesc okd; memset(&okd, 0, sizeof okd); + okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL; + FrameSlot okslot = cg_local(p->cg, &okd); + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); + + FrameSlotDesc pd; memset(&pd, 0, sizeof pd); + pd.type = val_ty; + pd.size = abi_sizeof(p->abi, val_ty); + pd.align = abi_alignof(p->abi, val_ty); + pd.kind = FS_LOCAL; + FrameSlot pslot = cg_local(p->cg, &pd); + cg_push_local_typed(p->cg, pslot, val_ty); + cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); + + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_load(p->cg); + CGLabel L_done = cg_label_new(p->cg); + cg_branch_true(p->cg, L_done); + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_load(p->cg); + cg_deref(p->cg, val_ty); + cg_push_local_typed(p->cg, pslot, val_ty); + cg_load(p->cg); + cg_store(p->cg); cg_drop(p->cg); + cg_label_place(p->cg, L_done); + + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_load(p->cg); + return 1; + } + + AtomicOp op; + if (name == p->sym_a_exchange_n) op = AO_XCHG; + else if (name == p->sym_a_fetch_add) op = AO_ADD; + else if (name == p->sym_a_fetch_sub) op = AO_SUB; + else if (name == p->sym_a_fetch_and) op = AO_AND; + else if (name == p->sym_a_fetch_or) op = AO_OR; + else if (name == p->sym_a_fetch_xor) op = AO_XOR; + else { perr(p, "internal: unhandled builtin"); } + + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in atomic builtin"); + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in atomic builtin"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after atomic builtin"); + cg_set_loc(p->cg, loc); + cg_atomic_rmw(p->cg, op, (MemOrder)ord); + return 1; +} + +/* ============================================================ + * parse_primary, parse_postfix, parse_unary + * ============================================================ */ + +static void parse_primary(Parser* p) { + Tok t = p->cur; + if (t.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &t); + const Type* lty = int_literal_type(p, &t); + advance(p); + cg_push_int(p->cg, v, lty); + return; + } + if (t.kind == TOK_FLT) { + double v = parse_float_literal(p, &t); + const Type* lty = float_literal_type(p, &t); + advance(p); + cg_push_float(p->cg, v, lty); + return; + } + if (is_punct(&t, '(')) { + advance(p); + parse_expr(p); + expect_punct(p, ')', "')'"); + return; + } + if (t.kind == TOK_IDENT) { + SymEntry* e; + if (ident_kw(p, t.v.ident) != KW_NONE) { + perr(p, "unexpected keyword in expression"); + } + { + Tok n = peek1(p); + if (is_punct(&n, '(') && try_parse_builtin_call(p)) return; + } + e = scope_lookup(p, t.v.ident); + if (!e) { + size_t nlen = 0; + const char* nm = pool_str(p->pool, t.v.ident, &nlen); + compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen, + nm ? nm : "?"); + } + advance(p); + switch (e->kind) { + case SEK_LOCAL: + cg_push_local_typed(p->cg, e->v.slot, e->type); + if (e->vla_byte_slot != FRAME_SLOT_NONE) { + p->last_pushed_vla_slot = e->vla_byte_slot; + } + return; + case SEK_GLOBAL: + case SEK_FUNC: + cg_push_global(p->cg, e->v.sym, e->type); + return; + case SEK_ENUM_CST: + cg_push_int(p->cg, e->v.enum_value, e->type); + return; + case SEK_TYPEDEF: + default: + perr(p, "identifier is not a value"); + } + } + if (t.kind == TOK_CHR) { + i64 v = decode_char_literal(p, &t); + advance(p); + cg_push_int(p->cg, v, ty_int(p)); + return; + } + if (t.kind == TOK_STR) { + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + ObjSymId sym = emit_string_to_rodata(p, bytes, n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); + { + const Type* char_ty = type_prim(p->pool, TY_CHAR); + const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0); + cg_push_global(p->cg, sym, arr_ty); + } + return; + } + perr(p, "expected expression"); +} + +static void parse_postfix(Parser* p) { + parse_primary(p); + for (;;) { + Tok t = p->cur; + if (is_punct(&t, P_INC)) { + advance(p); + cg_inc_dec(p->cg, BO_IADD, /*post=*/1); + continue; + } + if (is_punct(&t, P_DEC)) { + advance(p); + cg_inc_dec(p->cg, BO_ISUB, /*post=*/1); + continue; + } + if (is_punct(&t, '(')) { + const Type* top = cg_top_type(p->cg); + const Type* fn_type; + if (top && top->kind == TY_FUNC) { + fn_type = top; + } else if (top && top->kind == TY_PTR && top->ptr.pointee && + top->ptr.pointee->kind == TY_FUNC) { + fn_type = top->ptr.pointee; + cg_load(p->cg); + } else { + perr(p, "called object is not a function"); + } + advance(p); /* '(' */ + u32 nargs = 0; + if (!is_punct(&p->cur, ')')) { + for (;;) { + parse_assign_expr(p); + to_rvalue(p); + ++nargs; + if (!accept_punct(p, ',')) break; + } + } + expect_punct(p, ')', "')' after argument list"); + if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) { + perr(p, "wrong number of arguments"); + } + if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) { + perr(p, "too few arguments to variadic function"); + } + cg_call(p->cg, nargs, fn_type); + if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) { + cg_push_int(p->cg, 0, ty_int(p)); + } + continue; + } + if (is_punct(&t, '[')) { + const Type* lt0 = cg_top_type(p->cg); + advance(p); /* '[' */ + if (lt0 && lt0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem)); + } else if (lt0 && lt0->kind == TY_PTR) { + cg_load(p->cg); + } + parse_expr(p); + { + const Type* it0 = cg_top_type(p->cg); + if (it0 && it0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem)); + } else { + to_rvalue(p); + } + } + expect_punct(p, ']', "']' after subscript"); + { + const Type* lt = cg_top2_type(p->cg); + const Type* it = cg_top_type(p->cg); + const Type* elem; + if (lt && lt->kind == TY_PTR && type_is_int(it)) { + elem = lt->ptr.pointee; + } else if (it && it->kind == TY_PTR && type_is_int(lt)) { + cg_swap(p->cg); + elem = it->ptr.pointee; + } else { + perr(p, "invalid subscript: needs one pointer and one integer"); + } + if (!elem) perr(p, "subscript on incomplete pointee"); + u32 esz = abi_sizeof(p->abi, elem); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + cg_deref(p->cg, elem); + } + continue; + } + if (is_punct(&t, '.')) { + const Type* lt = cg_top_type(p->cg); + Sym mname; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + advance(p); /* '.' */ + if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) { + perr(p, "request for member in something that is not a struct or union"); + } + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '.'"); + } + mname = p->cur.v.ident; + advance(p); + { + const ABIRecordLayout* L = abi_record_layout(p->abi, lt); + if (!L) perr(p, "no such member"); + int found = 0; + for (u16 i = 0; i < lt->rec.nfields; ++i) { + const Field* f = &lt->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + /* anonymous member flattening */ + if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || + f->type->kind == TY_UNION)) { + const Type* inner_ty = NULL; + u32 inner_off = 0; + const Field* inner_f = NULL; + const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); + if (IL) { + for (u16 j = 0; j < f->type->rec.nfields; ++j) { + const Field* ff = &f->type->rec.fields[j]; + if (ff->name == mname && mname != 0) { + inner_ty = ff->type; + inner_off = IL->fields[j].offset; + inner_f = ff; + break; + } + } + } + if (inner_ty) { + mty = inner_ty; + moff = L->fields[i].offset + inner_off; + mf = inner_f; + found = 1; + break; + } + } + } + if (!found) perr(p, "no such member"); + } + (void)mf; + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, mty)); + if (moff > 0) { + cg_push_int(p->cg, (i64)moff, ty_size_t(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, mty); + continue; + } + if (is_punct(&t, P_ARROW)) { + const Type* lt0; + const Type* rec_ty; + Sym mname; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + advance(p); /* `->` */ + to_rvalue(p); + lt0 = cg_top_type(p->cg); + if (!lt0 || lt0->kind != TY_PTR) { + perr(p, "'->' requires a pointer operand"); + } + rec_ty = lt0->ptr.pointee; + if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) { + perr(p, "'->' on pointer to non-struct/union"); + } + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '->'"); + } + mname = p->cur.v.ident; + advance(p); + { + const ABIRecordLayout* L = abi_record_layout(p->abi, rec_ty); + if (!L) perr(p, "no such member"); + int found = 0; + for (u16 i = 0; i < rec_ty->rec.nfields; ++i) { + const Field* f = &rec_ty->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || + f->type->kind == TY_UNION)) { + const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); + if (IL) { + for (u16 j = 0; j < f->type->rec.nfields; ++j) { + const Field* ff = &f->type->rec.fields[j]; + if (ff->name == mname && mname != 0) { + mty = ff->type; + moff = L->fields[i].offset + IL->fields[j].offset; + mf = ff; + found = 1; + break; + } + } + } + if (found) break; + } + } + if (!found) perr(p, "no such member"); + } + (void)mf; + if (moff > 0) { + cg_push_int(p->cg, (i64)moff, ty_size_t(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, mty); + continue; + } + break; + } +} + +void parse_unary(Parser* p) { + Tok t = p->cur; + if (is_punct(&t, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + const Type* dst; + const Type* src; + advance(p); /* '(' */ + dst = parse_type_name(p); + expect_punct(p, ')', "')' after type-name"); + if (is_punct(&p->cur, '{')) { + FrameSlotDesc fsd; + FrameSlot slot; + const Type* lit_ty = dst; + if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) { + lit_ty = complete_incomplete_array(p, lit_ty); + } + memset(&fsd, 0, sizeof fsd); + fsd.type = lit_ty; + fsd.size = abi_sizeof(p->abi, lit_ty); + fsd.align = abi_alignof(p->abi, lit_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + slot = cg_local(p->cg, &fsd); + if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT || + lit_ty->kind == TY_UNION)) { + init_at(p, slot, lit_ty, 0, lit_ty); + } else { + init_at(p, slot, lit_ty, 0, lit_ty); + } + cg_push_local_typed(p->cg, slot, lit_ty); + return; + } + parse_unary(p); + to_rvalue(p); + if (dst && dst->kind == TY_VOID) { + cg_drop(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return; + } + src = cg_top_type(p->cg); + if (src && src->kind == TY_PTR && dst->kind == TY_PTR) { + cg_retag_top(p->cg, dst); + return; + } + cg_convert(p->cg, dst); + return; + } + } + if (is_punct(&t, '+')) { + advance(p); + parse_unary(p); + to_rvalue(p); + return; + } + if (is_punct(&t, '-')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_NEG); + return; + } + if (is_punct(&t, '!')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_push_int(p->cg, 0, ty_int(p)); + cg_cmp(p->cg, CMP_EQ); + return; + } + if (is_punct(&t, '~')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_BNOT); + return; + } + if (is_punct(&t, '&')) { + advance(p); + parse_unary(p); + cg_addr(p->cg); + return; + } + if (is_punct(&t, '*')) { + const Type* pty; + const Type* pointee; + advance(p); + parse_unary(p); + to_rvalue(p); + pty = cg_top_type(p->cg); + if (!pty || pty->kind != TY_PTR) { + perr(p, "indirection requires pointer operand"); + } + pointee = pty->ptr.pointee; + if (pointee && pointee->kind == TY_VOID) { + perr(p, "dereferencing pointer to incomplete type"); + } + cg_deref(p->cg, pointee); + return; + } + if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) { + BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB; + advance(p); + parse_unary(p); + cg_inc_dec(p->cg, bop, /*post=*/0); + return; + } + if (is_kw(p, &t, KW_SIZEOF)) { + const Type* ty = NULL; + FrameSlot vla_slot = FRAME_SLOT_NONE; + advance(p); + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + ty = parse_type_name(p); + expect_punct(p, ')', "')'"); + } else { + p->last_pushed_vla_slot = FRAME_SLOT_NONE; + parse_unary(p); + ty = cg_top_type(p->cg); + vla_slot = p->last_pushed_vla_slot; + cg_drop(p->cg); + } + } else { + p->last_pushed_vla_slot = FRAME_SLOT_NONE; + parse_unary(p); + ty = cg_top_type(p->cg); + vla_slot = p->last_pushed_vla_slot; + cg_drop(p->cg); + } + if (vla_slot != FRAME_SLOT_NONE) { + cg_push_local_typed(p->cg, vla_slot, ty_size_t(p)); + cg_load(p->cg); + } else { + cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p)); + } + return; + } + if (is_kw(p, &t, KW_GENERIC)) { + advance(p); + expect_punct(p, '(', "'('"); + parse_assign_expr(p); + to_rvalue(p); + const Type* ctl_ty = cg_top_type(p->cg); + cg_drop(p->cg); + expect_punct(p, ',', "','"); + int emitted = 0; + Tok* default_buf = NULL; + u32 default_len = 0; + for (;;) { + const Type* assoc_ty = NULL; + int is_default = 0; + if (is_kw(p, &p->cur, KW_DEFAULT)) { + advance(p); + is_default = 1; + } else { + assoc_ty = parse_type_name(p); + } + expect_punct(p, ':', "':' in _Generic association"); + int take = 0; + if (!emitted && !is_default && ctl_ty && assoc_ty && + ctl_ty->kind == assoc_ty->kind) { + take = 1; + } + if (take) { + parse_assign_expr(p); + emitted = 1; + } else if (is_default && !default_buf) { + u32 cap = 16; + Tok* buf = arena_array(p->c->tu, Tok, cap); + u32 len = 0; + int paren_depth = 0, brack_depth = 0, brace_depth = 0; + while (p->cur.kind != TOK_EOF) { + if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { + if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; + } + if (len == cap) { + u32 new_cap = cap * 2; + Tok* nv = arena_array(p->c->tu, Tok, new_cap); + if (!nv) perr(p, "out of memory recording _Generic default"); + memcpy(nv, buf, len * sizeof(Tok)); + buf = nv; + cap = new_cap; + } + buf[len++] = p->cur; + if (is_punct(&p->cur, '(')) ++paren_depth; + else if (is_punct(&p->cur, ')')) --paren_depth; + else if (is_punct(&p->cur, '[')) ++brack_depth; + else if (is_punct(&p->cur, ']')) --brack_depth; + else if (is_punct(&p->cur, '{')) ++brace_depth; + else if (is_punct(&p->cur, '}')) --brace_depth; + advance(p); + } + if (len == cap) { + u32 new_cap = cap + 1; + Tok* nv = arena_array(p->c->tu, Tok, new_cap); + if (!nv) perr(p, "out of memory recording _Generic default"); + memcpy(nv, buf, len * sizeof(Tok)); + buf = nv; + cap = new_cap; + } + memset(&buf[len], 0, sizeof(Tok)); + buf[len].kind = TOK_PUNCT; + buf[len].v.punct = ','; + ++len; + default_buf = buf; + default_len = len; + } else { + int paren_depth = 0; + int brack_depth = 0; + int brace_depth = 0; + while (p->cur.kind != TOK_EOF) { + if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { + if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; + } + if (is_punct(&p->cur, '(')) ++paren_depth; + else if (is_punct(&p->cur, ')')) --paren_depth; + else if (is_punct(&p->cur, '[')) ++brack_depth; + else if (is_punct(&p->cur, ']')) --brack_depth; + else if (is_punct(&p->cur, '{')) ++brace_depth; + else if (is_punct(&p->cur, '}')) --brace_depth; + advance(p); + } + } + if (!accept_punct(p, ',')) break; + } + if (!emitted && default_buf) { + Tok* save_replay = p->replay; + u32 save_cap = p->replay_cap; + u32 save_len = p->replay_len; + u32 save_pos = p->replay_pos; + u8 save_active = p->replay_active; + Tok save_cur = p->cur; + int save_has_next = p->has_next; + p->replay = default_buf; + p->replay_cap = default_len; + p->replay_len = default_len; + p->replay_pos = 1; + p->replay_active = 1; + p->cur = default_buf[0]; + p->has_next = 0; + parse_assign_expr(p); + emitted = 1; + p->replay = save_replay; + p->replay_cap = save_cap; + p->replay_len = save_len; + p->replay_pos = save_pos; + p->replay_active = save_active; + p->cur = save_cur; + p->has_next = save_has_next; + } + expect_punct(p, ')', "')' after _Generic"); + if (!emitted) { + perr(p, "_Generic: no association matched and no default present"); + } + return; + } + if (is_kw(p, &t, KW_ALIGNOF)) { + const Type* ty; + advance(p); + expect_punct(p, '(', "'('"); + if (starts_type_name(p, &p->cur)) { + ty = parse_type_name(p); + } else { + parse_unary(p); + ty = cg_top_type(p->cg); + cg_drop(p->cg); + } + expect_punct(p, ')', "')'"); + cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p)); + return; + } + parse_postfix(p); +} + +/* ============================================================ + * Binary operator levels + * ============================================================ */ + +static int type_is_fp(const Type* t) { + return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE || + t->kind == TY_LDOUBLE); +} + +static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) { + if (!type_is_fp(a) && !type_is_fp(b)) return NULL; + if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) { + return type_prim(p->pool, TY_LDOUBLE); + } + if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) { + return type_prim(p->pool, TY_DOUBLE); + } + return type_prim(p->pool, TY_FLOAT); +} + +static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) { + if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); + cg_swap(p->cg); + if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); + cg_swap(p->cg); + BinOp fop; + switch (bop) { + case BO_IADD: fop = BO_FADD; break; + case BO_ISUB: fop = BO_FSUB; break; + case BO_IMUL: fop = BO_FMUL; break; + case BO_SDIV: fop = BO_FDIV; break; + default: + perr(p, "operator does not apply to floating types"); + return; + } + cg_binop(p->cg, fop); +} + +static void parse_mul(Parser* p) { + parse_unary(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '*')) { + bop = BO_IMUL; + } else if (is_punct(&t, '/')) { + bop = BO_SDIV; + } else if (is_punct(&t, '%')) { + bop = BO_SREM; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_unary(p); + to_rvalue(p); + const Type* lt = cg_top2_type(p->cg); + const Type* rt = cg_top_type(p->cg); + const Type* common = common_fp_type(p, lt, rt); + if (common) { + emit_fp_binop(p, bop, common); + } else { + cg_binop(p->cg, bop); + } + } +} + +static void emit_add_or_sub(Parser* p, BinOp bop) { + const Type* lt = cg_top2_type(p->cg); + const Type* rt = cg_top_type(p->cg); + int l_is_ptr = lt && lt->kind == TY_PTR; + int r_is_ptr = rt && rt->kind == TY_PTR; + if (bop == BO_IADD) { + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + if (r_is_ptr && type_is_int(lt)) { + cg_swap(p->cg); + u32 esz = abi_sizeof(p->abi, rt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + } else { /* BO_ISUB */ + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_ISUB); + return; + } + if (l_is_ptr && r_is_ptr) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + cg_binop(p->cg, BO_ISUB); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_SDIV); + } + return; + } + } + const Type* common = common_fp_type(p, lt, rt); + if (common) { + emit_fp_binop(p, bop, common); + return; + } + cg_binop(p->cg, bop); +} + +static void parse_add(Parser* p) { + parse_mul(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '+')) { + bop = BO_IADD; + } else if (is_punct(&t, '-')) { + bop = BO_ISUB; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_mul(p); + to_rvalue(p); + emit_add_or_sub(p, bop); + } +} + +static void parse_shift(Parser* p) { + parse_add(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, P_SHL)) { + bop = BO_SHL; + } else if (is_punct(&t, P_SHR)) { + bop = BO_SHR_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_add(p); + to_rvalue(p); + cg_binop(p->cg, bop); + } +} + +static void parse_rel(Parser* p) { + parse_shift(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, '<')) { + cop = CMP_LT_S; + } else if (is_punct(&t, '>')) { + cop = CMP_GT_S; + } else if (is_punct(&t, P_LE)) { + cop = CMP_LE_S; + } else if (is_punct(&t, P_GE)) { + cop = CMP_GE_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_shift(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_eq(Parser* p) { + parse_rel(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, P_EQ)) { + cop = CMP_EQ; + } else if (is_punct(&t, P_NE)) { + cop = CMP_NE; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_rel(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_band(Parser* p) { + parse_eq(p); + while (is_punct(&p->cur, '&')) { + advance(p); + to_rvalue(p); + parse_eq(p); + to_rvalue(p); + cg_binop(p->cg, BO_AND); + } +} + +static void parse_bxor(Parser* p) { + parse_band(p); + while (is_punct(&p->cur, '^')) { + advance(p); + to_rvalue(p); + parse_band(p); + to_rvalue(p); + cg_binop(p->cg, BO_XOR); + } +} + +static void parse_bor(Parser* p) { + parse_bxor(p); + while (is_punct(&p->cur, '|')) { + advance(p); + to_rvalue(p); + parse_bxor(p); + to_rvalue(p); + cg_binop(p->cg, BO_OR); + } +} + +static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty; + fsd.size = abi_sizeof(p->abi, ty); + fsd.align = abi_alignof(p->abi, ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + return cg_local(p->cg, &fsd); +} + +static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) { + cg_push_local_typed(p->cg, tmp, ty); + cg_push_int(p->cg, v, ty); + cg_store(p->cg); + cg_drop(p->cg); +} + +static void parse_land(Parser* p) { + parse_bor(p); + while (is_punct(&p->cur, P_AND)) { + CGLabel L_false = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp = ll_tmp_slot(p, result_ty); + advance(p); + to_rvalue(p); + cg_branch_false(p->cg, L_false); + parse_bor(p); + to_rvalue(p); + cg_branch_false(p->cg, L_false); + ll_store_const(p, tmp, result_ty, 1); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_false); + ll_store_const(p, tmp, result_ty, 0); + cg_label_place(p->cg, L_end); + cg_push_local_typed(p->cg, tmp, result_ty); + } +} + +static void parse_lor(Parser* p) { + parse_land(p); + while (is_punct(&p->cur, P_OR)) { + CGLabel L_true = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp = ll_tmp_slot(p, result_ty); + advance(p); + to_rvalue(p); + cg_branch_true(p->cg, L_true); + parse_land(p); + to_rvalue(p); + cg_branch_true(p->cg, L_true); + ll_store_const(p, tmp, result_ty, 0); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_true); + ll_store_const(p, tmp, result_ty, 1); + cg_label_place(p->cg, L_end); + cg_push_local_typed(p->cg, tmp, result_ty); + } +} + +static void parse_ternary(Parser* p) { + parse_lor(p); + if (!is_punct(&p->cur, '?')) return; + CGLabel L_else = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp; + FrameSlotDesc fsd; + advance(p); /* '?' */ + to_rvalue(p); + cg_branch_false(p->cg, L_else); + parse_assign_expr(p); + to_rvalue(p); + result_ty = cg_top_type(p->cg); + if (!result_ty) result_ty = ty_int(p); + memset(&fsd, 0, sizeof fsd); + fsd.type = result_ty; + fsd.size = abi_sizeof(p->abi, result_ty); + fsd.align = abi_alignof(p->abi, result_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + tmp = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, tmp, result_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_else); + expect_punct(p, ':', "':' in ternary"); + parse_assign_expr(p); + to_rvalue(p); + if (cg_top_type(p->cg) != result_ty) { + cg_convert(p->cg, result_ty); + } + cg_push_local_typed(p->cg, tmp, result_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_label_place(p->cg, L_end); + cg_push_local_typed(p->cg, tmp, result_ty); +} + +void parse_assign_expr(Parser* p) { + parse_ternary(p); + Tok t = p->cur; + BinOp compound; + int is_simple_assign; + if (is_punct(&t, '=')) { + is_simple_assign = 1; + compound = (BinOp)0; + } else if (is_punct(&t, P_ADD_ASSIGN)) { + is_simple_assign = 0; compound = BO_IADD; + } else if (is_punct(&t, P_SUB_ASSIGN)) { + is_simple_assign = 0; compound = BO_ISUB; + } else if (is_punct(&t, P_MUL_ASSIGN)) { + is_simple_assign = 0; compound = BO_IMUL; + } else if (is_punct(&t, P_DIV_ASSIGN)) { + is_simple_assign = 0; compound = BO_SDIV; + } else if (is_punct(&t, P_MOD_ASSIGN)) { + is_simple_assign = 0; compound = BO_SREM; + } else if (is_punct(&t, P_AND_ASSIGN)) { + is_simple_assign = 0; compound = BO_AND; + } else if (is_punct(&t, P_OR_ASSIGN)) { + is_simple_assign = 0; compound = BO_OR; + } else if (is_punct(&t, P_XOR_ASSIGN)) { + is_simple_assign = 0; compound = BO_XOR; + } else if (is_punct(&t, P_SHL_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHL; + } else if (is_punct(&t, P_SHR_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHR_S; + } else { + return; + } + advance(p); + if (is_simple_assign) { + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_lvalue(p); + cg_store(p->cg); + return; + } + cg_dup(p->cg); + cg_load(p->cg); + parse_assign_expr(p); + to_rvalue(p); + if (compound == BO_IADD || compound == BO_ISUB) { + emit_add_or_sub(p, compound); + } else { + cg_binop(p->cg, compound); + } + cg_store(p->cg); +} + +void parse_expr(Parser* p) { + parse_assign_expr(p); + while (is_punct(&p->cur, ',')) { + advance(p); + cg_drop(p->cg); + parse_assign_expr(p); + } +} + +/* parse_cond_expr is the ternary level, provided for completeness */ +void parse_cond_expr(Parser* p) { + parse_ternary(p); +} diff --git a/src/parse/parse_init.c b/src/parse/parse_init.c @@ -0,0 +1,808 @@ +/* parse_init.c — runtime and static-storage initializers. + * + * Covers §6.7.9 (initializers): + * - Runtime aggregate/scalar initializers (init_at, init_elided, + * init_struct_fields, init_string_at, parse_designator_chain, + * push_subobject_lv, emit_copy_leaf, emit_walk_copy, + * emit_struct_copy_into_slot, zero_init_at) + * - Static-storage object definition (parse_static_init_at, + * parse_static_string_at, try_parse_addr_const, encode_int_le, + * pick_object_section, define_static_object, srl_push) + */ + +#include "parse/parse_priv.h" + +/* ============================================================ + * File-local helpers + * ============================================================ */ + +static const Type* ty_size_t_init(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + +static SrcLoc tok_loc_init(const Tok* t) { return t->loc; } + +static CKw ident_kw_init(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +/* True if `ty` is char/signed char/unsigned char. */ +int is_char_kind(const Type* ty) { + if (!ty) return 0; + return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR; +} + +/* Decode the string token at p->cur without advancing. Returns a heap- + * allocated byte buffer (caller frees) and writes length (including NUL) + * to *nlen_out. */ +static u8* peek_string_bytes(Parser* p, size_t* nlen_out) { + Tok t = p->cur; + if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string"); + return decode_string_literal(p, &t, nlen_out); +} + +/* ============================================================ + * Runtime initializers + * ============================================================ */ + +/* Forward declaration for mutual recursion. */ +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, + const Type* ty); +static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty); + +/* Push the lvalue of a sub-object at byte offset `offset` within the array + * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */ +void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty) { + cg_push_local_typed(p->cg, slot, arr_ty); + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, elem_ty)); + if (offset > 0) { + cg_push_int(p->cg, (i64)offset, ty_size_t_init(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, elem_ty); +} + +/* Emit a load+store for one scalar leaf. */ +static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty, + u32 dst_off, FrameSlot src_ptr_slot, + const Type* src_ptr_ty, u32 src_off, + const Type* leaf_ty) { + push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty); + cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty); + cg_load(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty)); + if (src_off > 0) { + cg_push_int(p->cg, (i64)src_off, ty_size_t_init(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, leaf_ty); + cg_load(p->cg); + cg_store(p->cg); + cg_drop(p->cg); +} + +/* Walk a (possibly nested) aggregate, emitting a leaf load+store for each + * scalar member. */ +static void emit_walk_copy(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + FrameSlot src_ptr_slot, const Type* src_ptr_ty, + u32 src_off, const Type* ty) { + if (ty->kind == TY_STRUCT) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + for (u16 i = 0; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + if (f->flags & FIELD_BITFIELD) continue; + u32 foff = L->fields[i].offset; + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff, + src_ptr_slot, src_ptr_ty, src_off + foff, f->type); + } + return; + } + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + for (u32 i = 0; i < ty->arr.count; ++i) { + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz, + src_ptr_slot, src_ptr_ty, src_off + i * esz, + ty->arr.elem); + } + return; + } + if (ty->kind == TY_UNION) { + u32 sz = abi_sizeof(p->abi, ty); + const Type* uchar_ty = type_prim(p->pool, TY_UCHAR); + for (u32 i = 0; i < sz; ++i) { + emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i, + src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty); + } + return; + } + emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty, + src_off, ty); +} + +/* Source struct/union value is on top of the cg stack as an lvalue. + * Spill its address into a fresh pointer slot, then walk the type and + * copy each scalar leaf into the destination sub-object. */ +void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + const Type* ty) { + const Type* ptr_ty = type_ptr(p->pool, ty); + FrameSlotDesc fsd; + FrameSlot src_ptr_slot; + cg_addr(p->cg); + memset(&fsd, 0, sizeof fsd); + fsd.type = ptr_ty; + fsd.size = abi_sizeof(p->abi, ptr_ty); + fsd.align = abi_alignof(p->abi, ptr_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + src_ptr_slot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty); +} + +/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */ +static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + for (u32 i = 0; i < ty->arr.count; ++i) { + zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem); + } + return; + } + if (ty->kind == TY_STRUCT) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + for (u16 i = 0; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type); + } + return; + } + if (ty->kind == TY_UNION) { + if (ty->rec.nfields > 0) { + const Field* f = &ty->rec.fields[0]; + if (!(f->flags & FIELD_BITFIELD)) { + zero_init_at(p, slot, arr_ty, offset, f->type); + } + } + return; + } + push_subobject_lv(p, slot, arr_ty, offset, ty); + cg_push_int(p->cg, 0, ty); + cg_store(p->cg); + cg_drop(p->cg); +} + +/* Emit byte stores for a string literal initializing a char-array sub-object. */ +static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty, u32 count) { + size_t n = 0; + u8* bytes = peek_string_bytes(p, &n); + size_t copy = n; + size_t i; + if (copy > count) copy = count; + for (i = 0; i < copy; ++i) { + push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); + cg_push_int(p->cg, (i64)bytes[i], elem_ty); + cg_store(p->cg); + cg_drop(p->cg); + } + for (; i < count; ++i) { + push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); + cg_push_int(p->cg, 0, elem_ty); + cg_store(p->cg); + cg_drop(p->cg); + } + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); /* consume TOK_STR */ +} + +/* Parse a designator chain (`[const]` and `.ident` repeats) ending at `=`. */ +static void parse_designator_chain(Parser* p, const Type* outer_ty, + u32 outer_offset, const Type** sub_ty_out, + u32* sub_offset_out, u32* top_index_out) { + const Type* cur_ty = outer_ty; + u32 cur_off = outer_offset; + int first = 1; + for (;;) { + if (is_punct(&p->cur, '[')) { + i64 idx; + u32 esz; + SrcLoc cloc = tok_loc_init(&p->cur); + advance(p); + idx = eval_const_int(p, cloc); + expect_punct(p, ']', "']' after designator index"); + if (!cur_ty || cur_ty->kind != TY_ARRAY) { + perr(p, "array designator on non-array"); + } + if (idx < 0 || (u32)idx >= cur_ty->arr.count) { + perr(p, "array designator index out of range"); + } + esz = abi_sizeof(p->abi, cur_ty->arr.elem); + cur_off += (u32)idx * esz; + cur_ty = cur_ty->arr.elem; + if (first) *top_index_out = (u32)idx; + first = 0; + } else if (is_punct(&p->cur, '.')) { + Sym fname; + const Type* fty; + u32 foff; + const Field* ff; + u16 fi; + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected field name after '.'"); + } + fname = p->cur.v.ident; + advance(p); + if (!cur_ty || + (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) { + perr(p, "field designator on non-record type"); + } + if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) { + perr(p, "no such field in designator"); + } + cur_off += foff; + if (first) { + for (fi = 0; fi < cur_ty->rec.nfields; ++fi) { + const Field* g = &cur_ty->rec.fields[fi]; + if (g->name == fname && fname != 0) { + *top_index_out = fi; + break; + } + if ((g->flags & FIELD_ANON) && + (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) { + const Type* tmp_ty; + u32 tmp_off; + const Field* tmp_f; + if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off, + &tmp_f)) { + *top_index_out = fi; + break; + } + } + } + } + cur_ty = fty; + first = 0; + } else { + break; + } + } + if (first) perr(p, "internal: empty designator chain"); + expect_punct(p, '=', "'=' after designator"); + *sub_ty_out = cur_ty; + *sub_offset_out = cur_off; +} + +static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty, u32 start_field, + int braced) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + u32 i = start_field; + u32 zero_lo = start_field; + for (; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + u32 foff = offset + L->fields[i].offset; + if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break; + if (braced && is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + while (zero_lo < top_idx) { + const Field* zf = &ty->rec.fields[zero_lo]; + u32 zoff = offset + L->fields[zero_lo].offset; + zero_init_at(p, slot, arr_ty, zoff, zf->type); + ++zero_lo; + } + init_at(p, slot, arr_ty, sub_off, sub_ty); + i = top_idx; + if (zero_lo <= top_idx) zero_lo = top_idx + 1; + goto next_item_struct; + } + init_at(p, slot, arr_ty, foff, f->type); + if (zero_lo <= i) zero_lo = i + 1; + if (!braced) { + ++i; + break; + } + next_item_struct: + if (!accept_punct(p, ',')) { + ++i; + break; + } + if (is_punct(&p->cur, '}')) { + ++i; + break; + } + } + if (braced) { + u32 j; + for (j = zero_lo; j < ty->rec.nfields; ++j) { + const Field* f = &ty->rec.fields[j]; + u32 foff = offset + L->fields[j].offset; + zero_init_at(p, slot, arr_ty, foff, f->type); + } + } + return i; +} + +static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + init_at(p, slot, arr_ty, offset, ty->arr.elem); + (void)esz; + return 1; + } + if (ty->kind == TY_STRUCT) { + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); + return 1; + } + /* Scalar / pointer / union: consume one assignment-expr. */ + int had_brace = accept_punct(p, '{'); + push_subobject_lv(p, slot, arr_ty, offset, ty); + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } + return 1; +} + +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + const Type* elem_ty = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem_ty); + if (is_char_kind(elem_ty)) { + if (p->cur.kind == TOK_STR) { + init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); + return; + } + if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { + advance(p); + init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); + accept_punct(p, ','); + expect_punct(p, '}', "'}' after string initializer"); + return; + } + } + if (!is_punct(&p->cur, '{')) { + init_elided(p, slot, arr_ty, offset, elem_ty); + return; + } + advance(p); /* '{' */ + { + u32 i = 0; + u32 zero_lo = 0; + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (is_punct(&p->cur, '[')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, + &top_idx); + while (zero_lo < top_idx) { + zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty); + ++zero_lo; + } + init_at(p, slot, arr_ty, sub_off, sub_ty); + i = top_idx + 1; + if (zero_lo < i) zero_lo = i; + } else { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + init_at(p, slot, arr_ty, offset + i * esz, elem_ty); + ++i; + if (zero_lo < i) zero_lo = i; + } + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + } + expect_punct(p, '}', "'}' after array initializer"); + { + u32 j; + for (j = zero_lo; j < ty->arr.count; ++j) { + zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty); + } + } + } + return; + } + if (ty->kind == TY_STRUCT) { + if (!is_punct(&p->cur, '{')) { + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); + return; + } + advance(p); /* '{' */ + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1); + expect_punct(p, '}', "'}' after struct initializer"); + return; + } + if (ty->kind == TY_UNION) { + int had_brace = accept_punct(p, '{'); + if (ty->rec.nfields == 0) { + if (had_brace) expect_punct(p, '}', "'}'"); + return; + } + if (had_brace && is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + init_at(p, slot, arr_ty, sub_off, sub_ty); + } else { + const Field* f = &ty->rec.fields[0]; + if (!(f->flags & FIELD_BITFIELD)) { + init_at(p, slot, arr_ty, offset, f->type); + } + } + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after union initializer"); + } + return; + } + /* Scalar (incl. pointer). */ + int had_brace = accept_punct(p, '{'); + push_subobject_lv(p, slot, arr_ty, offset, ty); + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_lvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } +} + +/* ============================================================ + * Static-storage initializers + * ============================================================ */ + +void encode_int_le(u8* dst, u32 size, i64 v) { + for (u32 i = 0; i < size; ++i) { + dst[i] = (u8)((v >> (8u * i)) & 0xffu); + } +} + +/* Encode a string literal at *buf+offset for a char-array sub-object. */ +static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset, + u32 count) { + size_t n = 0; + u8* bytes = peek_string_bytes(p, &n); + size_t copy = n; + if (copy > count) copy = count; + if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object"); + memcpy(buf + offset, bytes, copy); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); +} + +/* Append one pending relocation to the parser-side list. */ +void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend) { + if (p->static_relocs_len == p->static_relocs_cap) { + u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u; + void* nb = arena_array(p->c->tu, char, + nc * sizeof(*p->static_relocs)); + if (!nb) perr(p, "out of memory recording static relocs"); + if (p->static_relocs && p->static_relocs_len) { + memcpy(nb, p->static_relocs, + p->static_relocs_len * sizeof(*p->static_relocs)); + } + p->static_relocs = nb; + p->static_relocs_cap = nc; + } + p->static_relocs[p->static_relocs_len].offset = offset; + p->static_relocs[p->static_relocs_len].size = size; + p->static_relocs[p->static_relocs_len].target = target; + p->static_relocs[p->static_relocs_len].addend = addend; + ++p->static_relocs_len; +} + +/* Try to parse the current expression as an address constant. */ +static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf, + u32 offset, u32 sz) { + Tok t = p->cur; + Sym name = 0; + SrcLoc nloc = tok_loc_init(&p->cur); + int saw_amp = 0; + i64 element_addend = 0; + i64 byte_addend = 0; + SymEntry* e; + const Type* tgt_ty; + ObjSymId tgt; + if (t.kind == TOK_STR) { + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + ObjSymId str_sym = emit_string_to_rodata(p, bytes, n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); + (void)ty; + (void)buf; + srl_push(p, offset, sz, str_sym, 0); + return 1; + } + if (is_punct(&t, '&')) { + saw_amp = 1; + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected identifier after '&' in static initializer"); + } + name = p->cur.v.ident; + nloc = tok_loc_init(&p->cur); + advance(p); + } else if (t.kind == TOK_IDENT && ident_kw_init(p, t.v.ident) == KW_NONE) { + name = t.v.ident; + advance(p); + } else { + return 0; + } + e = scope_lookup(p, name); + if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) { + perr(p, "static initializer is not a constant address expression"); + } + tgt = e->v.sym; + tgt_ty = e->type; + if (saw_amp && is_punct(&p->cur, '[')) { + SrcLoc cloc; + advance(p); + cloc = tok_loc_init(&p->cur); + element_addend = eval_const_int(p, cloc); + expect_punct(p, ']', "']' after array-subscript constant"); + if (tgt_ty && tgt_ty->kind == TY_ARRAY) { + byte_addend += + element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); + } else { + byte_addend += element_addend; + } + } + while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) { + int neg = is_punct(&p->cur, '-'); + SrcLoc cloc; + i64 v; + advance(p); + cloc = tok_loc_init(&p->cur); + v = eval_const_int(p, cloc); + if (neg) v = -v; + if (tgt_ty && tgt_ty->kind == TY_ARRAY) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); + } else if (tgt_ty && tgt_ty->kind == TY_PTR) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee); + } else if (saw_amp) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty); + } else { + byte_addend += v; + } + } + (void)nloc; + (void)ty; + (void)buf; + srl_push(p, offset, sz, tgt, byte_addend); + return 1; +} + +void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + const Type* elem = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem); + u32 i = 0; + int had_brace; + if (is_char_kind(elem)) { + if (p->cur.kind == TOK_STR) { + parse_static_string_at(p, buf, buflen, offset, ty->arr.count); + return; + } + if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { + advance(p); + parse_static_string_at(p, buf, buflen, offset, ty->arr.count); + accept_punct(p, ','); + expect_punct(p, '}', "'}' after string initializer"); + return; + } + } + had_brace = accept_punct(p, '{'); + if (!had_brace) { + perr(p, "expected '{' for static-storage array initializer"); + } + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (is_punct(&p->cur, '[')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + parse_static_init_at(p, buf, buflen, sub_off, sub_ty); + i = top_idx + 1; + } else { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + parse_static_init_at(p, buf, buflen, offset + i * esz, elem); + ++i; + } + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + } + expect_punct(p, '}', "'}' after array initializer"); + return; + } + if (ty->kind == TY_STRUCT) { + int had_brace = accept_punct(p, '{'); + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + u32 i = 0; + if (!had_brace) { + perr(p, "expected '{' for static-storage struct initializer"); + } + while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { + const Field* f = &ty->rec.fields[i]; + if (is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + parse_static_init_at(p, buf, buflen, sub_off, sub_ty); + i = top_idx + 1; + if (!accept_punct(p, ',')) break; + continue; + } + parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset, + f->type); + ++i; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, '}', "'}' after struct initializer"); + return; + } + if (ty->kind == TY_UNION) { + perr(p, "static-storage union initializer not supported in Phase 4"); + } + /* Scalar / pointer. */ + { + int had_brace = accept_punct(p, '{'); + SrcLoc cloc = tok_loc_init(&p->cur); + u32 sz = abi_sizeof(p->abi, ty); + if (offset + sz > buflen) perr(p, "initializer overflows object"); + if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) { + /* Address constant recorded as a reloc. */ + } else { + i64 v = eval_const_int(p, cloc); + encode_int_le(buf + offset, sz, v); + } + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } + } +} + +/* Choose the section a defining object decl should land in. */ +static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) { + if ((quals & Q_CONST) != 0 && has_nonzero) { + Sym secname = pool_intern_cstr(p->pool, ".rodata"); + return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u); + } + return OBJ_SEC_NONE; +} + +/* Define a static-storage object. */ +void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, + u16 quals, int has_init, SrcLoc loc, + u32 align_override) { + ObjBuilder* ob = decl_obj(p->decls); + u32 size = abi_sizeof(p->abi, var_ty); + u32 align = abi_alignof(p->abi, var_ty); + if (align_override > align) align = align_override; + u8* buf = NULL; + int has_nonzero = 0; + ObjSecId override_sec; + const ObjSym* os = obj_symbol_get(ob, sym); + int is_tls = (os && os->kind == SK_TLS); + + if (has_init) { + buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u); + memset(buf, 0, size); + p->static_relocs_len = 0; + parse_static_init_at(p, buf, size, 0, var_ty); + for (u32 i = 0; i < size; ++i) { + if (buf[i]) { has_nonzero = 1; break; } + } + if (p->static_relocs_len) has_nonzero = 1; + } + + if (is_tls) { + Sym sname; + ObjSecId sec; + u32 a = align ? align : 1u; + u32 base; + if (!has_init || !has_nonzero) { + sname = obj_secname_tbss(p->c); + sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, + SF_ALLOC | SF_WRITE | SF_TLS, a, 0, OBJ_SEC_NONE, 0); + base = obj_align_to(ob, sec, a); + obj_reserve_bss(ob, sec, base + size, a); + obj_symbol_define(ob, sym, sec, base, size); + return; + } + sname = obj_secname_tdata(p->c); + sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, a); + base = obj_align_to(ob, sec, a); + { + u8* dst = obj_reserve(ob, sec, size); + if (dst) memcpy(dst, buf, size); + } + obj_symbol_define(ob, sym, sec, base, size); + for (u32 i = 0; i < p->static_relocs_len; ++i) { + RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; + obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk, + p->static_relocs[i].target, p->static_relocs[i].addend); + } + p->static_relocs_len = 0; + (void)loc; + return; + } + + override_sec = pick_object_section(p, quals, has_nonzero); + if (override_sec != OBJ_SEC_NONE) { + u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u); + { + u8* dst = obj_reserve(ob, override_sec, size); + if (dst && buf) memcpy(dst, buf, size); + } + obj_symbol_define(ob, sym, override_sec, base, size); + for (u32 i = 0; i < p->static_relocs_len; ++i) { + RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; + obj_reloc(ob, override_sec, base + p->static_relocs[i].offset, rk, + p->static_relocs[i].target, p->static_relocs[i].addend); + } + p->static_relocs_len = 0; + (void)loc; + return; + } + + if (!has_init || !has_nonzero) { + Sym sname = pool_intern_cstr(p->pool, ".bss"); + ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, + SF_ALLOC | SF_WRITE, + align ? align : 1u, 0, OBJ_SEC_NONE, 0); + u32 a = align ? align : 1u; + u32 base = obj_align_to(ob, sec, a); + obj_reserve_bss(ob, sec, base + size, a); + obj_symbol_define(ob, sym, sec, base, size); + return; + } + /* .data path. */ + { + Sym sname = pool_intern_cstr(p->pool, ".data"); + ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE, + align ? align : 1u); + u32 base = obj_align_to(ob, sec, align ? align : 1u); + u8* dst = obj_reserve(ob, sec, size); + if (dst) memcpy(dst, buf, size); + obj_symbol_define(ob, sym, sec, base, size); + for (u32 i = 0; i < p->static_relocs_len; ++i) { + RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; + obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk, + p->static_relocs[i].target, p->static_relocs[i].addend); + } + p->static_relocs_len = 0; + } +} diff --git a/src/parse/parse_priv.h b/src/parse/parse_priv.h @@ -0,0 +1,431 @@ +/* parse_priv.h — private header shared across parse_*.c modules. + * + * Declares: Parser struct, shared types (Scope, SymEntry, TagEntry, + * DeclSpecs, TypeSpecAccum, CKw, TagDeclKind), forward decls of + * cross-module functions, and inline/shared helpers. */ + +#pragma once + +#include "parse/parse.h" + +#include <stdarg.h> +#include <string.h> + +#include "abi/abi.h" +#include "arch/arch.h" +#include "cg/cg.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "debug/debug.h" +#include "decl/decl.h" +#include "decl/decl_attrs.h" +#include "lex/lex.h" +#include "obj/obj.h" +#include "parse/attr.h" +#include "pp/pp.h" +#include "type/type.h" + +/* Type-aware push for locals — exposed by cg.c, not in cg.h. */ +extern void cg_push_local_typed(CG*, FrameSlot, const Type*); +/* Pop pointer rvalue, push INDIRECT lvalue of given pointee. */ +extern void cg_deref(CG*, const Type* pointee); +/* Read SValue.type at top of stack without popping. */ +extern const Type* cg_top_type(CG*); +/* Read SValue.type at second-from-top. */ +extern const Type* cg_top2_type(CG*); +/* Replace the type tag on the top SValue without emitting code. */ +extern void cg_retag_top(CG*, const Type*); + +/* ============================================================ + * Keywords + * ============================================================ */ +typedef enum CKw { + KW_NONE = 0, + KW_AUTO, + KW_BREAK, + KW_CASE, + KW_CHAR, + KW_CONST, + KW_CONTINUE, + KW_DEFAULT, + KW_DO, + KW_DOUBLE, + KW_ELSE, + KW_ENUM, + KW_EXTERN, + KW_FLOAT, + KW_FOR, + KW_GOTO, + KW_IF, + KW_INLINE, + KW_INT, + KW_LONG, + KW_REGISTER, + KW_RESTRICT, + KW_RETURN, + KW_SHORT, + KW_SIGNED, + KW_SIZEOF, + KW_STATIC, + KW_STRUCT, + KW_SWITCH, + KW_TYPEDEF, + KW_UNION, + KW_UNSIGNED, + KW_VOID, + KW_VOLATILE, + KW_WHILE, + KW_BOOL, /* _Bool */ + KW_COMPLEX, /* _Complex */ + KW_IMAGINARY, /* _Imaginary */ + KW_ALIGNAS, /* _Alignas */ + KW_ALIGNOF, /* _Alignof */ + KW_ATOMIC, /* _Atomic */ + KW_GENERIC, /* _Generic */ + KW_NORETURN, /* _Noreturn */ + KW_STATIC_ASSERT, /* _Static_assert */ + KW_THREAD_LOCAL, /* _Thread_local */ + KW_ASM, /* GNU `asm` */ + KW_BUILTIN_ASM, /* GNU `__asm__` */ + KW_COUNT +} CKw; + +/* ============================================================ + * Scope stack types + * ============================================================ */ + +typedef enum SymEntryKind { + SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */ + SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */ + SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */ + SEK_TYPEDEF, /* typedef name */ + SEK_ENUM_CST, /* enumeration constant */ +} SymEntryKind; + +typedef struct SymEntry SymEntry; +struct SymEntry { + Sym name; + u8 kind; /* SymEntryKind */ + u8 pad[3]; + const Type* type; + union { + FrameSlot slot; + ObjSymId sym; + i64 enum_value; + } v; + FrameSlot vla_byte_slot; + struct Attr* attrs; + SymEntry* next; +}; + +typedef struct TagEntry TagEntry; +struct TagEntry { + Sym name; + u8 kind; /* TagDeclKind */ + u8 complete; + u16 pad; + Type* type; + struct Attr* attrs; + TagEntry* next; +}; + +typedef struct Scope Scope; +struct Scope { + SymEntry* entries; /* LIFO */ + TagEntry* tags; /* LIFO */ + Scope* parent; +}; + +/* ============================================================ + * Switch/goto control-flow types + * ============================================================ */ + +typedef struct CaseEntry CaseEntry; +struct CaseEntry { + i64 value; + CGLabel label; + CaseEntry* next; +}; + +typedef struct SwitchCtx SwitchCtx; +struct SwitchCtx { + CaseEntry* cases; + CGLabel default_label; + FrameSlot value_slot; + const Type* value_type; + SwitchCtx* parent; +}; + +typedef struct GotoLabel GotoLabel; +struct GotoLabel { + Sym name; + CGLabel label; + u8 placed; + u8 pad[3]; + SrcLoc first_use; + GotoLabel* next; +}; + +/* ============================================================ + * Parser context + * ============================================================ */ + +typedef struct Parser { + Compiler* c; + Pp* pp; + DeclTable* decls; + CG* cg; + Debug* debug; + TargetABI* abi; + Pool* pool; + + Tok cur; + Tok next; + int has_next; + + Tok pending; + int has_pending; + + Sym kw_sym[KW_COUNT]; + + Sym sym_b_alloca; + Sym sym_b_ctz; + Sym sym_b_expect; + Sym sym_b_offsetof; + Sym sym_b_va_list; + Sym sym_b_va_start; + Sym sym_b_va_arg; + Sym sym_b_va_end; + Sym sym_b_va_copy; + Sym sym_attribute; + Sym sym_volatile_alias; + Sym sym_alignof_alias; + Sym sym_a_load_n; + Sym sym_a_store_n; + Sym sym_a_exchange_n; + Sym sym_a_fetch_add; + Sym sym_a_fetch_sub; + Sym sym_a_fetch_and; + Sym sym_a_fetch_or; + Sym sym_a_fetch_xor; + Sym sym_a_cas_n; + Sym sym_a_thread_fence; + Sym sym_a_signal_fence; + + Scope* scope; + + ObjSecId text_sec; + + CGLabel cur_break; + CGLabel cur_continue; + + SwitchCtx* cur_switch; + + GotoLabel* goto_labels; + + u8 vla_pending; + FrameSlot vla_pending_count_slot; + + FrameSlot last_pushed_vla_slot; + + u8 in_param_decl; + + u32 static_local_counter; + + u32 compound_literal_counter; + + Tok* replay; + u32 replay_cap; + u32 replay_len; + u32 replay_pos; + u8 replay_active; + + struct { + u32 offset; + u32 size; + ObjSymId target; + i64 addend; + } *static_relocs; + u32 static_relocs_len; + u32 static_relocs_cap; +} Parser; + +/* ============================================================ + * DeclSpecs and TypeSpecAccum + * ============================================================ */ + +typedef struct DeclSpecs { + const Type* type; + DeclStorage storage; + u32 flags; /* DeclFlag */ + u16 quals; + u32 align; + FrameSlot vla_byte_slot; + Attr* attrs; +} DeclSpecs; + +typedef struct TypeSpecAccum { + u8 saw_void; + u8 saw_char; + u8 saw_int; + u8 saw_short; + u8 long_count; + u8 saw_signed; + u8 saw_unsigned; + u8 saw_bool; + u8 saw_float; + u8 saw_double; + u8 saw_explicit_type; +} TypeSpecAccum; + +/* ============================================================ + * Shared token/diagnostic helpers (defined in parse.c) + * ============================================================ */ + +_Noreturn void perr(Parser* p, const char* fmt, ...); +void advance(Parser* p); +Tok peek1(Parser* p); +void expect_punct(Parser* p, u32 punct, const char* what); +int accept_punct(Parser* p, u32 punct); + +/* ============================================================ + * Scope/tag ops (defined in parse.c) + * ============================================================ */ + +Scope* scope_new(Parser* p, Scope* parent); +void scope_push(Parser* p); +void scope_pop(Parser* p); +SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, const Type* type); +SymEntry* scope_lookup(Parser* p, Sym name); +TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, int complete); +TagEntry* tag_lookup(Parser* p, Sym name); +TagEntry* tag_lookup_local(Parser* p, Sym name); + +/* ============================================================ + * Token predicate helpers (defined in parse.c — file-scope static, + * exposed here as inline equivalents; each .c file sees its own copy) + * ============================================================ */ + +static inline int is_punct(const Tok* t, u32 punct) { + return t->kind == TOK_PUNCT && t->v.punct == punct; +} + +static inline int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; } + +static inline CKw ident_kw_inline(const Parser* p, Sym name) { + CKw i; + for (i = (CKw)1; i < KW_COUNT; ++i) { + if (p->kw_sym[i] == name) return i; + } + if (name == p->sym_alignof_alias) return KW_ALIGNOF; + return KW_NONE; +} + +static inline int is_kw(const Parser* p, const Tok* t, CKw k) { + if (t->kind != TOK_IDENT) return 0; + if (t->v.ident == p->kw_sym[k]) return 1; + if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1; + return 0; +} + +/* ============================================================ + * Shared types (needed across multiple modules) + * ============================================================ */ + +typedef struct ParamInfo { + Sym name; + const Type* type; + SrcLoc loc; +} ParamInfo; + +/* ============================================================ + * Declarator suffix types (defined in parse_type.c, shared here) + * ============================================================ */ + +typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind; +typedef struct DeclSuffix { + u8 kind; /* DSuffKind */ + u32 count; /* element count; meaningful when !vla and !incomplete */ + u8 incomplete; /* true for `[]` (no size given) */ + u8 vla; /* true for `[expr]` with a non-constant size */ + FrameSlot vla_count_slot; + ParamInfo* params; + u16 nparams; + u8 variadic; +} DeclSuffix; + +/* ============================================================ + * Cross-module forward declarations + * ============================================================ */ + +/* parse_type.c */ + +int parse_decl_specs(Parser* p, DeclSpecs* out); +const Type* parse_struct_or_union(Parser* p, TypeKind kind, Attr** anon_attrs_out); +const Type* parse_enum(Parser* p, Attr** anon_attrs_out); +const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc); +const Type* parse_type_name(Parser* p); +const Type* parse_pointer_layer(Parser* p, const Type* base); +const Type* parse_declarator_full(Parser* p, const Type* base, int allow_abstract, + Sym* name_out, SrcLoc* loc_out); +const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract, + Sym* name_out, SrcLoc* loc_out, Attr** attrs_out); +const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, SrcLoc* loc_out); +const Type* complete_incomplete_array(Parser* p, const Type* ty); +int starts_type_name(const Parser* p, const Tok* t); +int starts_attr(const Parser* p); +Attr* parse_attribute_spec_list(Parser* p); +void parse_and_discard_attributes(Parser* p); +int find_field(TargetABI* abi, const Type* rec, Sym name, + const Type** out_type, u32* out_offset, const Field** out_field); +u32 attrs_pick_aligned(const Attr* a); +void attr_list_append(Attr** head, Attr* add); +void parse_attrs_into(Parser* p, Attr** sink); +int parse_decl_suffix(Parser* p, DeclSuffix* out); +const Type* apply_decl_suffix(Parser* p, const Type* base, const DeclSuffix* s); + +/* parse_expr.c */ +void parse_expr(Parser* p); +void parse_assign_expr(Parser* p); +void parse_cond_expr(Parser* p); +void parse_unary(Parser* p); +i64 eval_const_int(Parser* p, SrcLoc loc); +i64 parse_int_literal(Parser* p, const Tok* t); +i64 decode_char_literal(Parser* p, const Tok* t); +u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out); +void to_rvalue(Parser* p); +void coerce_top_to_lvalue(Parser* p); +ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n); + +/* parse_init.c */ +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* ty); +void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, const Type* ty); +void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, u16 quals, + int has_init, SrcLoc loc, u32 align_override); +void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend); +void encode_int_le(u8* dst, u32 size, i64 v); +void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty); +void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + const Type* ty); +int is_char_kind(const Type* ty); + +/* parse_stmt.c */ +void parse_stmt(Parser* p); +void parse_compound_stmt(Parser* p); +void parse_static_assert(Parser* p); + +/* parse.c (residual — TU driver) */ +void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, + u8* variadic_out); +void parse_local_decl(Parser* p, const DeclSpecs* specs); +FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc); +FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, SrcLoc loc, + u32 align_override); +Sym mint_static_local_sym(Parser* p, Sym orig); +void record_braced_block(Parser* p); +void replay_rewind(Parser* p); +u32 count_recorded_top_level_items(const Tok* vec, u32 len); diff --git a/src/parse/parse_stmt.c b/src/parse/parse_stmt.c @@ -0,0 +1,689 @@ +/* parse_stmt.c — statement parsers. + * + * Covers §6.8: if, while, for, do-while, return, break, continue, goto, + * labeled, case, default, switch, _Static_assert, asm, compound, + * and the top-level parse_stmt dispatcher. + */ + +#include "parse/parse_priv.h" + +/* ============================================================ + * File-local helpers + * ============================================================ */ + +static CKw ident_kw_stmt(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +static SrcLoc tok_loc_stmt(const Tok* t) { return t->loc; } + +static int accept_kw_stmt(Parser* p, CKw k) { + if (!is_kw(p, &p->cur, k)) return 0; + advance(p); + return 1; +} + +/* ============================================================ + * Statement parsers + * ============================================================ */ + +static void parse_if_stmt(Parser* p) { + CGLabel L_else = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')'"); + cg_branch_false(p->cg, L_else); + parse_stmt(p); + if (accept_kw_stmt(p, KW_ELSE)) { + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_else); + parse_stmt(p); + cg_label_place(p->cg, L_end); + } else { + cg_label_place(p->cg, L_else); + } +} + +static void parse_while_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + expect_punct(p, '(', "'('"); + cg_label_place(p->cg, L_top); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')'"); + cg_branch_false(p->cg, L_end); + p->cur_break = L_end; + p->cur_continue = L_top; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + cg_jump(p->cg, L_top); + cg_label_place(p->cg, L_end); +} + +static void parse_for_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_step = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + + scope_push(p); + expect_punct(p, '(', "'('"); + + /* init: declaration | expr | ; */ + if (!accept_punct(p, ';')) { + DeclSpecs specs; + if (parse_decl_specs(p, &specs)) { + parse_local_decl(p, &specs); + } else { + parse_expr(p); + cg_drop(p->cg); + expect_punct(p, ';', "';'"); + } + } + + cg_label_place(p->cg, L_top); + if (!is_punct(&p->cur, ';')) { + parse_expr(p); + to_rvalue(p); + cg_branch_false(p->cg, L_end); + } + expect_punct(p, ';', "';'"); + + { + CGLabel L_body = cg_label_new(p->cg); + cg_jump(p->cg, L_body); + cg_label_place(p->cg, L_step); + if (!is_punct(&p->cur, ')')) { + parse_expr(p); + cg_drop(p->cg); + } + cg_jump(p->cg, L_top); + expect_punct(p, ')', "')'"); + cg_label_place(p->cg, L_body); + + p->cur_break = L_end; + p->cur_continue = L_step; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + + cg_jump(p->cg, L_step); + cg_label_place(p->cg, L_end); + } + scope_pop(p); +} + +static void parse_return_stmt(Parser* p) { + if (accept_punct(p, ';')) { + cg_ret(p->cg, 0); + return; + } + parse_expr(p); + to_rvalue(p); + expect_punct(p, ';', "';' after return value"); + cg_ret(p->cg, 1); +} + +static void parse_break_stmt(Parser* p) { + if (p->cur_break == 0) perr(p, "'break' outside of loop or switch"); + cg_jump(p->cg, p->cur_break); + expect_punct(p, ';', "';' after break"); +} + +static void parse_continue_stmt(Parser* p) { + if (p->cur_continue == 0) perr(p, "'continue' outside of loop"); + cg_jump(p->cg, p->cur_continue); + expect_punct(p, ';', "';' after continue"); +} + +static void parse_do_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_cond = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + cg_label_place(p->cg, L_top); + p->cur_break = L_end; + p->cur_continue = L_cond; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + cg_label_place(p->cg, L_cond); + if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body"); + advance(p); /* while */ + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after do-while condition"); + expect_punct(p, ';', "';' after do-while"); + cg_branch_true(p->cg, L_top); + cg_label_place(p->cg, L_end); +} + +static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) { + GotoLabel* gl; + for (gl = p->goto_labels; gl; gl = gl->next) { + if (gl->name == name) return gl; + } + gl = arena_new(p->c->tu, GotoLabel); + if (!gl) perr(p, "out of memory in label_get_or_create"); + memset(gl, 0, sizeof *gl); + gl->name = name; + gl->label = cg_label_new(p->cg); + gl->placed = 0; + gl->first_use = loc; + gl->next = p->goto_labels; + p->goto_labels = gl; + return gl; +} + +static void parse_goto_stmt(Parser* p) { + Sym name; + SrcLoc loc; + GotoLabel* gl; + if (p->cur.kind != TOK_IDENT || ident_kw_stmt(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected label name after 'goto'"); + } + name = p->cur.v.ident; + loc = tok_loc_stmt(&p->cur); + advance(p); + expect_punct(p, ';', "';' after goto"); + gl = label_get_or_create(p, name, loc); + cg_jump(p->cg, gl->label); +} + +static void parse_label_stmt(Parser* p) { + Sym name = p->cur.v.ident; + SrcLoc loc = tok_loc_stmt(&p->cur); + GotoLabel* gl; + advance(p); /* IDENT */ + advance(p); /* ':' */ + gl = label_get_or_create(p, name, loc); + if (gl->placed) perr(p, "duplicate label"); + gl->placed = 1; + cg_label_place(p->cg, gl->label); + parse_stmt(p); +} + +static void parse_case_stmt(Parser* p) { + i64 v; + CGLabel L; + CaseEntry* ce; + SrcLoc loc = tok_loc_stmt(&p->cur); + if (!p->cur_switch) perr(p, "'case' label not in switch statement"); + v = eval_const_int(p, loc); + expect_punct(p, ':', "':' after case constant"); + L = cg_label_new(p->cg); + cg_label_place(p->cg, L); + ce = arena_new(p->c->tu, CaseEntry); + if (!ce) perr(p, "out of memory in parse_case_stmt"); + ce->value = v; + ce->label = L; + ce->next = p->cur_switch->cases; + p->cur_switch->cases = ce; + parse_stmt(p); +} + +static void parse_default_stmt(Parser* p) { + CGLabel L; + if (!p->cur_switch) perr(p, "'default' label not in switch statement"); + expect_punct(p, ':', "':' after default"); + if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label"); + L = cg_label_new(p->cg); + cg_label_place(p->cg, L); + p->cur_switch->default_label = L; + parse_stmt(p); +} + +static void parse_switch_stmt(Parser* p) { + CGLabel L_dispatch = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + SwitchCtx ctx; + SwitchCtx* saved_switch = p->cur_switch; + FrameSlotDesc fsd; + const Type* vty; + CaseEntry* it; + CaseEntry* prev; + CaseEntry* head; + + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + vty = cg_top_type(p->cg); + if (!vty) vty = type_prim(p->pool, TY_INT); + expect_punct(p, ')', "')' after switch expression"); + + memset(&ctx, 0, sizeof ctx); + memset(&fsd, 0, sizeof fsd); + fsd.type = vty; + fsd.size = abi_sizeof(p->abi, vty); + fsd.align = abi_alignof(p->abi, vty); + fsd.kind = FS_LOCAL; + ctx.value_slot = cg_local(p->cg, &fsd); + ctx.value_type = vty; + ctx.parent = saved_switch; + + cg_push_local_typed(p->cg, ctx.value_slot, vty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + + cg_jump(p->cg, L_dispatch); + + p->cur_switch = &ctx; + p->cur_break = L_end; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_switch = saved_switch; + + cg_jump(p->cg, L_end); + + cg_label_place(p->cg, L_dispatch); + prev = NULL; + head = ctx.cases; + while (head) { + CaseEntry* nxt = head->next; + head->next = prev; + prev = head; + head = nxt; + } + for (it = prev; it; it = it->next) { + cg_push_local_typed(p->cg, ctx.value_slot, vty); + cg_load(p->cg); + cg_push_int(p->cg, it->value, vty); + cg_cmp(p->cg, CMP_EQ); + cg_branch_true(p->cg, it->label); + } + if (ctx.default_label) { + cg_jump(p->cg, ctx.default_label); + } + cg_label_place(p->cg, L_end); +} + +void parse_static_assert(Parser* p) { + SrcLoc loc = tok_loc_stmt(&p->cur); + i64 v; + if (!accept_kw_stmt(p, KW_STATIC_ASSERT)) { + perr(p, "expected _Static_assert"); + } + expect_punct(p, '(', "'(' after _Static_assert"); + v = eval_const_int(p, tok_loc_stmt(&p->cur)); + expect_punct(p, ',', "',' separating _Static_assert args"); + if (p->cur.kind != TOK_STR) { + perr(p, "expected string literal as _Static_assert message"); + } + { + Tok msg = p->cur; + advance(p); + expect_punct(p, ')', "')' after _Static_assert"); + expect_punct(p, ';', "';' after _Static_assert"); + if (!v) { + size_t mlen = 0; + const char* mstr = pool_str(p->pool, msg.spelling, &mlen); + compiler_panic(p->c, loc, "static assertion failed: %.*s", + (int)mlen, mstr ? mstr : ""); + } + } +} + +/* GNU inline-asm statement. The leading 'asm'/'__asm__' keyword has + * already been consumed by parse_stmt. */ +typedef struct AsmOutLValue { + FrameSlot addr_slot; + const Type* ptr_ty; + const Type* val_ty; +} AsmOutLValue; + +static Sym parse_asm_operand_name(Parser* p) { + Sym name = 0; + if (!is_punct(&p->cur, '[')) return 0; + advance(p); + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected identifier inside '[name]' on asm operand"); + } + name = p->cur.v.ident; + advance(p); + expect_punct(p, ']', "']' after asm operand name"); + return name; +} + +static const char* parse_asm_str(Parser* p, const char* what) { + u8* bytes; + size_t nlen = 0; + Sym s; + Tok t; + if (p->cur.kind != TOK_STR) { + perr(p, "expected string literal in %s", what); + } + t = p->cur; + advance(p); + bytes = decode_string_literal(p, &t, &nlen); + if (nlen > 0) nlen -= 1; + s = pool_intern(p->pool, (const char*)bytes, nlen); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + return pool_str(p->pool, s, NULL); +} + +static void parse_asm_stmt(Parser* p) { + const char* tmpl; + AsmConstraint* outs = NULL; + AsmConstraint* ins = NULL; + Sym* clobbers = NULL; + AsmOutLValue* out_lvs = NULL; + u32 nout = 0, nin = 0, nclob = 0; + u32 cap_out = 0, cap_in = 0, cap_clob = 0; + int saw_goto = 0; + SrcLoc loc = tok_loc_stmt(&p->cur); + + for (;;) { + if (accept_kw_stmt(p, KW_VOLATILE)) continue; + if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) { + advance(p); + continue; + } + break; + } + if (accept_kw_stmt(p, KW_GOTO)) saw_goto = 1; + + expect_punct(p, '(', "'(' after asm"); + tmpl = parse_asm_str(p, "asm template"); + + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_out = 4; + outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out); + out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out); + for (;;) { + AsmConstraint c; + AsmOutLValue lv; + const Type* val_ty; + const Type* ptr_ty; + FrameSlotDesc fsd; + FrameSlot slot; + memset(&c, 0, sizeof c); + memset(&lv, 0, sizeof lv); + c.name = parse_asm_operand_name(p); + c.str = parse_asm_str(p, "asm output constraint"); + if (c.str && c.str[0] == '+') c.dir = ASM_INOUT; + else c.dir = ASM_OUT; + expect_punct(p, '(', "'(' before asm output lvalue"); + parse_assign_expr(p); + val_ty = cg_top_type(p->cg); + if (!val_ty) perr(p, "asm output: cannot determine lvalue type"); + c.type = val_ty; + cg_addr(p->cg); + ptr_ty = cg_top_type(p->cg); + if (!ptr_ty) perr(p, "asm output: cannot take address"); + memset(&fsd, 0, sizeof fsd); + fsd.type = ptr_ty; + fsd.size = 8; + fsd.align = 8; + fsd.kind = FS_LOCAL; + slot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + lv.addr_slot = slot; + lv.ptr_ty = ptr_ty; + lv.val_ty = val_ty; + expect_punct(p, ')', "')' after asm output lvalue"); + if (nout == cap_out) { + u32 nc = cap_out * 2; + AsmConstraint* nb = + (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); + AsmOutLValue* nlv = + (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc); + memcpy(nb, outs, sizeof(AsmConstraint) * nout); + memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout); + outs = nb; + out_lvs = nlv; + cap_out = nc; + } + outs[nout] = c; + out_lvs[nout] = lv; + nout++; + if (!accept_punct(p, ',')) break; + } + } + + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_in = 4; + ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in); + for (;;) { + AsmConstraint c; + memset(&c, 0, sizeof c); + c.name = parse_asm_operand_name(p); + c.str = parse_asm_str(p, "asm input constraint"); + c.dir = ASM_IN; + expect_punct(p, '(', "'(' before asm input expression"); + parse_assign_expr(p); + to_rvalue(p); + c.type = cg_top_type(p->cg); + expect_punct(p, ')', "')' after asm input expression"); + if (nin == cap_in) { + u32 nc = cap_in * 2; + AsmConstraint* nb = + (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); + memcpy(nb, ins, sizeof(AsmConstraint) * nin); + ins = nb; + cap_in = nc; + } + ins[nin++] = c; + if (!accept_punct(p, ',')) break; + } + } + + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_clob = 4; + clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob); + for (;;) { + const char* cstr; + Sym cs; + cstr = parse_asm_str(p, "asm clobber"); + cs = pool_intern_cstr(p->pool, cstr); + if (nclob == cap_clob) { + u32 nc = cap_clob * 2; + Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc); + memcpy(nb, clobbers, sizeof(Sym) * nclob); + clobbers = nb; + cap_clob = nc; + } + clobbers[nclob++] = cs; + if (!accept_punct(p, ',')) break; + } + } + + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ')')) { + for (;;) { + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected label identifier in asm-goto label list"); + } + advance(p); + if (!accept_punct(p, ',')) break; + } + } + } + } + } + } + + expect_punct(p, ')', "')' to close asm"); + expect_punct(p, ';', "';' after asm statement"); + + (void)saw_goto; + + u32 ninout = 0; + for (u32 i = 0; i < nout; ++i) { + if (outs[i].dir == ASM_INOUT) ninout++; + } + if (ninout > 0) { + static const char* const k_match_strs[10] = { + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; + u32 need = nin + ninout; + if (need > cap_in) { + u32 nc = cap_in ? cap_in : 4; + while (nc < need) nc *= 2; + AsmConstraint* nb = + (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); + if (nin) memcpy(nb, ins, sizeof(AsmConstraint) * nin); + ins = nb; + cap_in = nc; + } + for (u32 i = 0; i < nout; ++i) { + if (outs[i].dir != ASM_INOUT) continue; + if (i >= 10) { + perr(p, "asm: '+r' constraint at output index >9 exceeds " + "matching-digit syntax"); + } + AsmOutLValue* lv = &out_lvs[i]; + cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty); + cg_load(p->cg); + cg_deref(p->cg, lv->val_ty); + cg_load(p->cg); + AsmConstraint mc; + memset(&mc, 0, sizeof mc); + mc.str = k_match_strs[i]; + mc.dir = ASM_IN; + mc.type = lv->val_ty; + ins[nin++] = mc; + } + } + + cg_set_loc(p->cg, loc); + cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob); + + if (nout > 0) { + u32 i; + for (i = nout; i-- > 0;) { + AsmOutLValue* lv = &out_lvs[i]; + cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty); + cg_load(p->cg); + cg_deref(p->cg, lv->val_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + } + } +} + +void parse_compound_stmt(Parser* p) { + expect_punct(p, '{', "'{'"); + scope_push(p); + while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { + if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { + advance(p); + continue; + } + if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) { + parse_static_assert(p); + continue; + } + { + DeclSpecs specs; + Tok save_tok = p->cur; + (void)save_tok; + if (parse_decl_specs(p, &specs)) { + parse_local_decl(p, &specs); + } else { + parse_stmt(p); + } + } + } + expect_punct(p, '}', "'}'"); + scope_pop(p); +} + +void parse_stmt(Parser* p) { + cg_set_loc(p->cg, tok_loc_stmt(&p->cur)); + if (p->cur.kind == TOK_IDENT && ident_kw_stmt(p, p->cur.v.ident) == KW_NONE) { + Tok n = peek1(p); + if (is_punct(&n, ':')) { + parse_label_stmt(p); + return; + } + } + if (is_punct(&p->cur, '{')) { + parse_compound_stmt(p); + return; + } + if (is_punct(&p->cur, ';')) { + advance(p); + return; + } + if (is_kw(p, &p->cur, KW_IF)) { + advance(p); + parse_if_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_WHILE)) { + advance(p); + parse_while_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_FOR)) { + advance(p); + parse_for_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_DO)) { + advance(p); + parse_do_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_RETURN)) { + advance(p); + parse_return_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_BREAK)) { + advance(p); + parse_break_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_CONTINUE)) { + advance(p); + parse_continue_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_GOTO)) { + advance(p); + parse_goto_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_SWITCH)) { + advance(p); + parse_switch_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_CASE)) { + advance(p); + parse_case_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_DEFAULT)) { + advance(p); + parse_default_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) { + advance(p); + parse_asm_stmt(p); + return; + } + /* Expression statement. */ + parse_expr(p); + cg_drop(p->cg); + expect_punct(p, ';', "';' after expression"); +} diff --git a/src/parse/parse_type.c b/src/parse/parse_type.c @@ -0,0 +1,1121 @@ +/* parse_type.c — decl-specs, struct/union/enum, declarators, + * __attribute__ parsing. */ + +#include "parse/parse_priv.h" + +/* ============================================================ + * Type helpers + * ============================================================ */ + +static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } +static const Type* ty_size_t(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + +/* ============================================================ + * GNU __attribute__ (Phase 1 — parse + carry; no semantic wire-up) + * ============================================================ */ + +static const struct { + const char* name; + AttrKind kind; + AttrArgShape shape; +} kAttrTable[] = { + {"packed", ATTR_PACKED, AS_NONE}, + {"aligned", ATTR_ALIGNED, AS_INT_OPT}, + {"section", ATTR_SECTION, AS_STRING}, + {"used", ATTR_USED, AS_NONE}, + {"noreturn", ATTR_NORETURN, AS_NONE}, + {"alias", ATTR_ALIAS, AS_STRING}, + {"weak", ATTR_WEAK, AS_NONE}, + {"visibility", ATTR_VISIBILITY, AS_STRING}, + {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE}, + {"noinline", ATTR_NOINLINE, AS_NONE}, + {"unused", ATTR_UNUSED, AS_NONE}, + {"deprecated", ATTR_DEPRECATED, AS_OPAQUE}, + {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE}, + {"format", ATTR_FORMAT, AS_FORMAT}, + {"nonnull", ATTR_NONNULL, AS_OPAQUE}, + {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE}, + {"pure", ATTR_PURE, AS_NONE}, + {"const", ATTR_CONST, AS_NONE}, + {"malloc", ATTR_MALLOC, AS_OPAQUE}, + {"nothrow", ATTR_NOTHROW, AS_NONE}, + {"leaf", ATTR_LEAF, AS_NONE}, + {"cold", ATTR_COLD, AS_NONE}, + {"hot", ATTR_HOT, AS_NONE}, + {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT}, + {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT}, + {"cleanup", ATTR_CLEANUP, AS_IDENT}, + {"mode", ATTR_MODE, AS_IDENT}, + {"vector_size", ATTR_VECTOR_SIZE, AS_INT}, + {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE}, + {"gnu_inline", ATTR_GNU_INLINE, AS_NONE}, + {"fallthrough", ATTR_FALLTHROUGH, AS_NONE}, + {"sentinel", ATTR_SENTINEL, AS_OPAQUE}, + {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE}, + {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE}, +}; + +static SrcLoc tok_loc(const Tok* t) { return t->loc; } + +static int accept_kw(Parser* p, CKw k) { + if (is_kw(p, &p->cur, k)) { + advance(p); + return 1; + } + return 0; +} + +static CKw ident_kw(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +int starts_attr(const Parser* p) { + return p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_attribute; +} + +static void attr_canon_range(const char* s, size_t len, + const char** out_p, size_t* out_len) { + if (len >= 4 && s[0] == '_' && s[1] == '_' && + s[len - 1] == '_' && s[len - 2] == '_') { + *out_p = s + 2; + *out_len = len - 4; + return; + } + *out_p = s; + *out_len = len; +} + +static AttrKind classify_attr(Parser* p, Sym name, AttrArgShape* shape_out) { + size_t len = 0; + const char* s = pool_str(p->pool, name, &len); + const char* cs; + size_t clen; + size_t i; + if (!s) { + *shape_out = AS_OPAQUE; + return ATTR_UNKNOWN; + } + attr_canon_range(s, len, &cs, &clen); + for (i = 0; i < sizeof(kAttrTable) / sizeof(kAttrTable[0]); ++i) { + const char* tn = kAttrTable[i].name; + size_t tlen = strlen(tn); + if (tlen == clen && memcmp(tn, cs, clen) == 0) { + *shape_out = kAttrTable[i].shape; + return kAttrTable[i].kind; + } + } + *shape_out = AS_OPAQUE; + return ATTR_UNKNOWN; +} + +static void skip_balanced_parens(Parser* p) { + int depth; + if (!is_punct(&p->cur, '(')) perr(p, "internal: skip_balanced_parens"); + depth = 1; + advance(p); + while (depth > 0) { + if (p->cur.kind == TOK_EOF) { + perr(p, "unexpected EOF inside attribute arguments"); + } + if (is_punct(&p->cur, '(')) ++depth; + else if (is_punct(&p->cur, ')')) { + --depth; + if (depth == 0) { advance(p); return; } + } + advance(p); + } +} + +static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape, + const char* attr_diag_name) { + if (!is_punct(&p->cur, '(')) { + if (shape == AS_NONE || shape == AS_OPTIONAL || shape == AS_INT_OPT || + shape == AS_OPAQUE) { + return; + } + perr(p, "attribute '%s' expects '(' arguments", attr_diag_name); + } + switch (shape) { + case AS_NONE: { + advance(p); /* '(' */ + if (!accept_punct(p, ')')) { + perr(p, "attribute '%s' takes no arguments", attr_diag_name); + } + return; + } + case AS_OPTIONAL: { + skip_balanced_parens(p); + return; + } + case AS_INT: + case AS_INT_OPT: { + SrcLoc loc; + advance(p); /* '(' */ + if (is_punct(&p->cur, ')')) { + if (shape == AS_INT) { + perr(p, "attribute '%s' expects an integer argument", + attr_diag_name); + } + advance(p); + return; + } + loc = tok_loc(&p->cur); + a->v.i = eval_const_int(p, loc); + a->nargs = 1; + expect_punct(p, ')', "')' after attribute integer argument"); + return; + } + case AS_STRING: { + advance(p); /* '(' */ + if (p->cur.kind != TOK_STR) { + perr(p, "attribute '%s' expects a string literal", attr_diag_name); + } + { + Tok t = p->cur; + size_t nlen = 0; + u8* bytes = decode_string_literal(p, &t, &nlen); + u32 ilen = (nlen > 0) ? (u32)(nlen - 1) : 0; + a->v.sym = pool_intern(p->c->global, (const char*)bytes, ilen); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + } + a->nargs = 1; + advance(p); + expect_punct(p, ')', "')' after attribute string argument"); + return; + } + case AS_IDENT: { + advance(p); /* '(' */ + if (p->cur.kind != TOK_IDENT) { + perr(p, "attribute '%s' expects an identifier", attr_diag_name); + } + a->v.sym = p->cur.v.ident; + a->nargs = 1; + advance(p); + expect_punct(p, ')', "')' after attribute identifier argument"); + return; + } + case AS_FORMAT: { + SrcLoc mloc, nloc; + i64 mv, nv; + advance(p); /* '(' */ + if (p->cur.kind != TOK_IDENT) { + perr(p, "attribute 'format' expects (archetype, m, n)"); + } + advance(p); + expect_punct(p, ',', "',' after format archetype"); + mloc = tok_loc(&p->cur); + mv = eval_const_int(p, mloc); + expect_punct(p, ',', "',' after format string-index"); + nloc = tok_loc(&p->cur); + nv = eval_const_int(p, nloc); + if (mv < 0 || mv > 0xFFFF || nv < 0 || nv > 0xFFFF) { + perr(p, "attribute 'format' indices out of range"); + } + a->v.format.fmt_idx = (u16)mv; + a->v.format.first = (u16)nv; + a->nargs = 3; + expect_punct(p, ')', "')' after format arguments"); + return; + } + case AS_OPAQUE: + default: { + skip_balanced_parens(p); + return; + } + } +} + +Attr* parse_attribute_spec_list(Parser* p) { + Attr* head = NULL; + Attr* tail = NULL; + while (starts_attr(p)) { + SrcLoc kw_loc = tok_loc(&p->cur); + advance(p); /* __attribute__ */ + expect_punct(p, '(', "'(' after __attribute__"); + expect_punct(p, '(', "'((' after __attribute__"); + for (;;) { + Sym aname; + AttrArgShape shape; + Attr* a; + const char* diag_name; + size_t diag_len; + const char* canon; + size_t canon_len; + while (accept_punct(p, ',')) { /* skip */ } + if (is_punct(&p->cur, ')')) break; + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected attribute name"); + } + aname = p->cur.v.ident; + a = arena_new(p->c->tu, Attr); + if (!a) perr(p, "out of memory in parse_attribute_spec_list"); + memset(a, 0, sizeof *a); + a->loc = tok_loc(&p->cur); + a->name = aname; + a->kind = (u16)classify_attr(p, aname, &shape); + advance(p); + diag_name = pool_str(p->pool, aname, &diag_len); + attr_canon_range(diag_name, diag_len, &canon, &canon_len); + (void)canon; (void)canon_len; + parse_attr_args(p, a, shape, diag_name ? diag_name : "<unknown>"); + if (tail) tail->next = a; else head = a; + tail = a; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, ')', "')' after attribute list"); + expect_punct(p, ')', "'))' after attribute list"); + (void)kw_loc; + } + return head; +} + +void parse_and_discard_attributes(Parser* p) { + (void)parse_attribute_spec_list(p); +} + +/* Append `add` to the end of `*head` (linked via Attr.next). */ +void attr_list_append(Attr** head, Attr* add) { + if (!add) return; + if (!*head) { *head = add; return; } + Attr* tail = *head; + while (tail->next) tail = tail->next; + tail->next = add; +} + +/* If `starts_attr`, parse and append to `*sink`. No-op otherwise. */ +void parse_attrs_into(Parser* p, Attr** sink) { + if (starts_attr(p)) attr_list_append(sink, parse_attribute_spec_list(p)); +} + +#define PARSE_ATTR_ALIGNED_DEFAULT 16u + +static void attrs_to_record_opts(const Attr* a, TypeRecordOpts* opts) { + for (; a; a = a->next) { + if (a->kind == ATTR_PACKED) { + opts->packed = 1; + } else if (a->kind == ATTR_ALIGNED) { + u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; + if (v > opts->align_override) opts->align_override = (u16)v; + } + } +} + +static void attrs_to_field(const Attr* a, Field* f) { + for (; a; a = a->next) { + if (a->kind == ATTR_PACKED) { + f->packed = 1; + } else if (a->kind == ATTR_ALIGNED) { + u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; + if (v > f->align_override) f->align_override = (u16)v; + } + } +} + +u32 attrs_pick_aligned(const Attr* a) { + u32 best = 0; + for (; a; a = a->next) { + if (a->kind == ATTR_ALIGNED) { + u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i; + if (v > best) best = v; + } + } + return best; +} + +/* ============================================================ + * resolve_type_specs + * ============================================================ */ + +const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc) { + if (!a->saw_explicit_type) return NULL; + if (a->saw_void) { + if (a->saw_char || a->saw_int || a->saw_short || a->long_count || + a->saw_signed || a->saw_unsigned || a->saw_bool || a->saw_float || + a->saw_double) { + compiler_panic(p->c, loc, "conflicting type specifiers (void mixed)"); + } + return type_void(p->pool); + } + if (a->saw_bool) { + return type_prim(p->pool, TY_BOOL); + } + if (a->saw_char) { + if (a->saw_unsigned) return type_prim(p->pool, TY_UCHAR); + if (a->saw_signed) return type_prim(p->pool, TY_SCHAR); + return type_prim(p->pool, TY_CHAR); + } + if (a->saw_float) return type_prim(p->pool, TY_FLOAT); + if (a->saw_double) { + return type_prim(p->pool, a->long_count ? TY_LDOUBLE : TY_DOUBLE); + } + if (a->saw_short) { + return type_prim(p->pool, a->saw_unsigned ? TY_USHORT : TY_SHORT); + } + if (a->long_count == 2) { + return type_prim(p->pool, a->saw_unsigned ? TY_ULLONG : TY_LLONG); + } + if (a->long_count == 1) { + return type_prim(p->pool, a->saw_unsigned ? TY_ULONG : TY_LONG); + } + if (a->saw_unsigned) return type_prim(p->pool, TY_UINT); + if (a->saw_signed || a->saw_int) return type_prim(p->pool, TY_INT); + return type_prim(p->pool, TY_INT); +} + +/* ============================================================ + * parse_decl_specs + * ============================================================ */ + +int parse_decl_specs(Parser* p, DeclSpecs* out) { + TypeSpecAccum acc; + SrcLoc loc; + int seen = 0; + const Type* tagged_ty = NULL; + memset(&acc, 0, sizeof acc); + out->type = NULL; + out->storage = DS_AUTO; + out->flags = DF_NONE; + out->quals = 0; + out->align = 0; + out->vla_byte_slot = FRAME_SLOT_NONE; + out->attrs = NULL; + loc = tok_loc(&p->cur); + for (;;) { + Tok t = p->cur; + if (starts_attr(p)) { + Attr* a = parse_attribute_spec_list(p); + if (a) { + Attr* tail = a; + while (tail->next) tail = tail->next; + tail->next = out->attrs; + out->attrs = a; + } + seen = 1; + continue; + } + if (is_kw(p, &t, KW_STRUCT) || is_kw(p, &t, KW_UNION)) { + TypeKind kind = is_kw(p, &t, KW_STRUCT) ? TY_STRUCT : TY_UNION; + Attr* anon_attrs = NULL; + if (tagged_ty || acc.saw_explicit_type) { + perr(p, "conflicting type specifiers (struct/union mixed)"); + } + advance(p); + tagged_ty = parse_struct_or_union(p, kind, &anon_attrs); + attr_list_append(&out->attrs, anon_attrs); + acc.saw_explicit_type = 1; + seen = 1; + continue; + } + if (is_kw(p, &t, KW_ENUM)) { + Attr* anon_attrs = NULL; + if (tagged_ty || acc.saw_explicit_type) { + perr(p, "conflicting type specifiers (enum mixed)"); + } + advance(p); + tagged_ty = parse_enum(p, &anon_attrs); + attr_list_append(&out->attrs, anon_attrs); + acc.saw_explicit_type = 1; + seen = 1; + continue; + } + if (is_kw(p, &t, KW_VOID)) { + acc.saw_void = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_CHAR)) { + acc.saw_char = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_INT)) { + acc.saw_int = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_SHORT)) { + acc.saw_short = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_LONG)) { + acc.long_count++; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_SIGNED)) { + acc.saw_signed = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_UNSIGNED)) { + acc.saw_unsigned = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_BOOL)) { + acc.saw_bool = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_FLOAT)) { + acc.saw_float = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_DOUBLE)) { + acc.saw_double = 1; acc.saw_explicit_type = 1; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_STATIC)) { + out->storage = DS_STATIC; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_EXTERN)) { + out->storage = DS_EXTERN; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_CONST)) { + out->quals |= Q_CONST; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_VOLATILE)) { + out->quals |= Q_VOLATILE; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_RESTRICT)) { + out->quals |= Q_RESTRICT; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_ATOMIC)) { + Tok n = peek1(p); + if (is_punct(&n, '(')) { + const Type* inner; + if (tagged_ty || acc.saw_explicit_type) { + perr(p, "conflicting type specifiers (_Atomic(T) mixed)"); + } + advance(p); /* `_Atomic` */ + advance(p); /* `(` */ + inner = parse_type_name(p); + expect_punct(p, ')', "')' after _Atomic type"); + tagged_ty = type_qualified(p->pool, inner, Q_ATOMIC); + acc.saw_explicit_type = 1; + seen = 1; + continue; + } + out->quals |= Q_ATOMIC; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_TYPEDEF)) { + out->storage = DS_TYPEDEF; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_ALIGNAS)) { + u32 a = 0; + advance(p); /* `_Alignas` */ + expect_punct(p, '(', "'(' after _Alignas"); + if (starts_type_name(p, &p->cur)) { + const Type* tn = parse_type_name(p); + a = abi_alignof(p->abi, tn); + } else { + i64 v = eval_const_int(p, tok_loc(&p->cur)); + if (v < 0) perr(p, "_Alignas requires a non-negative alignment"); + a = (u32)v; + } + expect_punct(p, ')', "')' after _Alignas argument"); + if (a > out->align) out->align = a; + seen = 1; + } else if (is_kw(p, &t, KW_INLINE)) { + out->flags |= DF_INLINE; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_THREAD_LOCAL)) { + out->flags |= DF_THREAD; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) || + is_kw(p, &t, KW_AUTO)) { + advance(p); seen = 1; + } else if (!acc.saw_explicit_type && !tagged_ty && + t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) { + if (t.v.ident == p->sym_b_va_list) { + tagged_ty = abi_va_list_type(p->abi, p->pool); + acc.saw_explicit_type = 1; + advance(p); + seen = 1; + continue; + } + SymEntry* e = scope_lookup(p, t.v.ident); + if (e && e->kind == SEK_TYPEDEF) { + tagged_ty = e->type; + if (e->vla_byte_slot != FRAME_SLOT_NONE) { + out->vla_byte_slot = e->vla_byte_slot; + } + acc.saw_explicit_type = 1; + advance(p); + seen = 1; + continue; + } + break; + } else { + break; + } + } + if (seen) { + if (tagged_ty) { + out->type = tagged_ty; + } else { + out->type = resolve_type_specs(p, &acc, loc); + if (!out->type) { + out->type = ty_int(p); + } + } + } + return seen; +} + +/* ============================================================ + * struct / union / enum + * ============================================================ */ + +int find_field(TargetABI* abi, const Type* rec, Sym name, + const Type** out_type, u32* out_offset, + const Field** out_field) { + if (!rec || (rec->kind != TY_STRUCT && rec->kind != TY_UNION)) return 0; + const ABIRecordLayout* L = abi_record_layout(abi, rec); + if (!L) return 0; + for (u16 i = 0; i < rec->rec.nfields; ++i) { + const Field* f = &rec->rec.fields[i]; + if (f->name == name && name != 0) { + *out_type = f->type; + *out_offset = L->fields[i].offset; + *out_field = f; + return 1; + } + if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || + f->type->kind == TY_UNION)) { + const Type* inner_ty = NULL; + u32 inner_off = 0; + const Field* inner_f = NULL; + if (find_field(abi, f->type, name, &inner_ty, &inner_off, &inner_f)) { + *out_type = inner_ty; + *out_offset = L->fields[i].offset + inner_off; + *out_field = inner_f; + return 1; + } + } + } + return 0; +} + +static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { + while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { + DeclSpecs specs; + if (!parse_decl_specs(p, &specs)) { + perr(p, "expected member declaration"); + } + if (is_punct(&p->cur, ';')) { + if (specs.type && (specs.type->kind == TY_STRUCT || + specs.type->kind == TY_UNION)) { + Field f; + memset(&f, 0, sizeof f); + f.name = 0; + f.type = specs.type; + f.flags = FIELD_ANON; + type_record_field(b, f); + advance(p); + continue; + } + perr(p, "declaration without declarator must be anonymous aggregate"); + } + for (;;) { + Sym mname = 0; + SrcLoc mloc = tok_loc(&p->cur); + const Type* mty; + Field f; + memset(&f, 0, sizeof f); + if (is_punct(&p->cur, ':')) { + advance(p); + i64 w = eval_const_int(p, mloc); + f.name = 0; + f.type = specs.type; + f.bitfield_width = (u16)w; + f.flags = FIELD_BITFIELD; + if (w == 0) f.flags |= FIELD_ZERO_WIDTH; + attrs_to_field(specs.attrs, &f); + type_record_field(b, f); + if (!accept_punct(p, ',')) break; + continue; + } + Attr* mattrs = NULL; + mty = parse_declarator_full_ex(p, specs.type, /*allow_abstract=*/0, + &mname, &mloc, &mattrs); + if (accept_punct(p, ':')) { + i64 w = eval_const_int(p, mloc); + f.name = mname; + f.type = mty; + f.bitfield_width = (u16)w; + f.flags = FIELD_BITFIELD; + if (w == 0) f.flags |= FIELD_ZERO_WIDTH; + } else { + f.name = mname; + f.type = mty; + f.flags = FIELD_NONE; + } + attrs_to_field(specs.attrs, &f); + attrs_to_field(mattrs, &f); + { + Attr* trailing = NULL; + parse_attrs_into(p, &trailing); + attrs_to_field(trailing, &f); + } + type_record_field(b, f); + if (!accept_punct(p, ',')) break; + } + expect_punct(p, ';', "';' after struct member declaration"); + } +} + +const Type* parse_struct_or_union(Parser* p, TypeKind kind, + Attr** anon_attrs_out) { + Sym tag_name = 0; + SrcLoc tag_loc; + TagDeclKind tdk = (kind == TY_STRUCT) ? TAG_STRUCT : TAG_UNION; + Attr* rec_attrs = NULL; + parse_attrs_into(p, &rec_attrs); + tag_loc = tok_loc(&p->cur); + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + tag_name = p->cur.v.ident; + advance(p); + } + int has_body = is_punct(&p->cur, '{'); + if (!has_body && tag_name == 0) { + perr(p, "expected tag name or '{' after struct/union"); + } + if (!has_body) { + TagEntry* e = tag_lookup(p, tag_name); + if (e) { + if (e->kind != tdk) { + perr(p, "use of tag with wrong kind (struct vs union)"); + } + attr_list_append(&e->attrs, rec_attrs); + return e->type; + } + { + TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); + Type* t = type_record_forward(p->pool, kind, tid, tag_name); + TagEntry* te = tag_define(p, tag_name, tdk, t, /*complete=*/0); + attr_list_append(&te->attrs, rec_attrs); + return t; + } + } + Type* target = NULL; + TagEntry* existing = tag_name ? tag_lookup_local(p, tag_name) : NULL; + TagEntry* te = NULL; + if (existing) { + if (existing->kind != tdk) { + perr(p, "tag redeclared with wrong kind"); + } + if (existing->complete) { + perr(p, "redefinition of tag"); + } + target = existing->type; + te = existing; + } else { + TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); + target = type_record_forward(p->pool, kind, tid, tag_name); + if (tag_name) { + te = tag_define(p, tag_name, tdk, target, /*complete=*/0); + } + } + expect_punct(p, '{', "'{' to start aggregate body"); + TypeRecordBuilder* b = + type_record_begin(p->pool, kind, target->rec.tag_id, tag_name); + parse_member_decls(p, b); + expect_punct(p, '}', "'}' after aggregate body"); + parse_attrs_into(p, &rec_attrs); + if (te) { + attr_list_append(&te->attrs, rec_attrs); + } else if (anon_attrs_out) { + attr_list_append(anon_attrs_out, rec_attrs); + } + { + const Type* fresh = type_record_end(p->pool, b); + type_record_install(target, (Field*)fresh->rec.fields, + fresh->rec.nfields); + } + { + TypeRecordOpts opts; + memset(&opts, 0, sizeof opts); + attrs_to_record_opts(rec_attrs, &opts); + if (opts.packed) target->rec.packed = 1; + if (opts.align_override > target->rec.align_override) + target->rec.align_override = opts.align_override; + } + if (existing) { + existing->complete = 1; + } + return target; +} + +const Type* parse_enum(Parser* p, Attr** anon_attrs_out) { + Sym tag_name = 0; + SrcLoc tag_loc; + Attr* rec_attrs = NULL; + parse_attrs_into(p, &rec_attrs); + tag_loc = tok_loc(&p->cur); + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + tag_name = p->cur.v.ident; + advance(p); + } + int has_body = is_punct(&p->cur, '{'); + if (!has_body && tag_name == 0) { + perr(p, "expected tag name or '{' after enum"); + } + if (!has_body) { + TagEntry* e = tag_lookup(p, tag_name); + if (e && e->kind == TAG_ENUM) { + attr_list_append(&e->attrs, rec_attrs); + return e->type; + } + TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc); + const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p)); + { + TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, + /*complete=*/0); + attr_list_append(&te->attrs, rec_attrs); + } + return et; + } + TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc); + const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p)); + expect_punct(p, '{', "'{'"); + i64 next_val = 0; + for (;;) { + Sym name; + SrcLoc nloc = tok_loc(&p->cur); + SymEntry* e; + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected enumerator name"); + } + name = p->cur.v.ident; + advance(p); + i64 val = next_val; + if (accept_punct(p, '=')) { + val = eval_const_int(p, nloc); + } + e = scope_define(p, name, SEK_ENUM_CST, et); + e->v.enum_value = val; + next_val = val + 1; + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + expect_punct(p, '}', "'}' after enumerator list"); + parse_attrs_into(p, &rec_attrs); + if (tag_name) { + TagEntry* existing = tag_lookup_local(p, tag_name); + if (existing) { + if (existing->kind != TAG_ENUM) { + perr(p, "tag redeclared with wrong kind"); + } + existing->complete = 1; + attr_list_append(&existing->attrs, rec_attrs); + } else { + TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, + /*complete=*/1); + attr_list_append(&te->attrs, rec_attrs); + } + } else if (anon_attrs_out) { + attr_list_append(anon_attrs_out, rec_attrs); + } + return et; +} + +/* ============================================================ + * starts_type_name, parse_pointer_layer, parse_type_name + * ============================================================ */ + +int starts_type_name(const Parser* p, const Tok* t) { + if (t->kind != TOK_IDENT) return 0; + CKw k = ident_kw(p, t->v.ident); + switch (k) { + case KW_VOID: + case KW_CHAR: + case KW_SHORT: + case KW_INT: + case KW_LONG: + case KW_FLOAT: + case KW_DOUBLE: + case KW_SIGNED: + case KW_UNSIGNED: + case KW_BOOL: + case KW_STRUCT: + case KW_UNION: + case KW_ENUM: + case KW_CONST: + case KW_VOLATILE: + case KW_RESTRICT: + case KW_ATOMIC: + case KW_STATIC: + case KW_EXTERN: + case KW_INLINE: + case KW_NORETURN: + case KW_REGISTER: + case KW_AUTO: + case KW_TYPEDEF: + case KW_ALIGNAS: + case KW_THREAD_LOCAL: + return 1; + case KW_NONE: { + if (t->v.ident == p->sym_b_va_list) return 1; + SymEntry* e = scope_lookup((Parser*)p, t->v.ident); + return e && e->kind == SEK_TYPEDEF; + } + default: + return 0; + } +} + +const Type* parse_pointer_layer(Parser* p, const Type* base) { + while (accept_punct(p, '*')) { + u16 q = 0; + base = type_ptr(p->pool, base); + for (;;) { + if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } + if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } + if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } + if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } + if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } + break; + } + if (q) base = type_qualified(p->pool, base, q); + } + return base; +} + +const Type* parse_type_name(Parser* p) { + DeclSpecs specs; + Sym dummy_name = 0; + SrcLoc dummy_loc = {0, 0, 0}; + if (!parse_decl_specs(p, &specs)) { + perr(p, "expected type-name"); + } + return parse_declarator_full(p, specs.type, /*allow_abstract=*/1, + &dummy_name, &dummy_loc); +} + +/* ============================================================ + * Declarator suffix helpers + * (DeclSuffix / DSuffKind defined in parse_priv.h) + * ============================================================ */ + +int parse_decl_suffix(Parser* p, DeclSuffix* out) { + if (accept_punct(p, '[')) { + out->kind = DS_ARRAY; + out->count = 0; + out->incomplete = 0; + out->vla = 0; + for (;;) { + if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) || + accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) || + accept_kw(p, KW_ATOMIC)) { + continue; + } + break; + } + if (accept_punct(p, ']')) { + out->incomplete = 1; + return 1; + } + if (p->in_param_decl) { + int depth = 1; + while (depth > 0) { + if (p->cur.kind == TOK_EOF) { + perr(p, "unexpected EOF in parameter array bound"); + } + if (is_punct(&p->cur, '[')) ++depth; + else if (is_punct(&p->cur, ']')) { + --depth; + if (depth == 0) break; + } + advance(p); + } + out->incomplete = 1; + expect_punct(p, ']', "']' after array size"); + return 1; + } + { + Tok t = p->cur; + int is_const_start = (t.kind == TOK_NUM || t.kind == TOK_CHR); + if (!is_const_start && t.kind == TOK_IDENT) { + SymEntry* e = scope_lookup(p, t.v.ident); + if (e && e->kind == SEK_ENUM_CST) is_const_start = 1; + if (!is_const_start) { + CKw k = ident_kw(p, t.v.ident); + if (k == KW_SIZEOF || k == KW_ALIGNOF) is_const_start = 1; + } + } + if (is_const_start) { + SrcLoc cloc = tok_loc(&p->cur); + i64 v = eval_const_int(p, cloc); + if (v < 0) perr(p, "negative array size"); + out->count = (u32)v; + } else { + FrameSlotDesc fsd; + if (p->vla_pending) { + perr(p, "v1 supports only one VLA dimension per declarator"); + } + out->vla = 1; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty_size_t(p); + fsd.size = abi_sizeof(p->abi, fsd.type); + fsd.align = abi_alignof(p->abi, fsd.type); + fsd.kind = FS_LOCAL; + out->vla_count_slot = cg_local(p->cg, &fsd); + parse_assign_expr(p); + to_rvalue(p); + cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + p->vla_pending = 1; + p->vla_pending_count_slot = out->vla_count_slot; + } + } + expect_punct(p, ']', "']' after array size"); + return 1; + } + if (accept_punct(p, '(')) { + out->kind = DS_FUNC; + out->params = NULL; + out->nparams = 0; + out->variadic = 0; + parse_param_list(p, &out->params, &out->nparams, &out->variadic); + expect_punct(p, ')', "')' after parameter list"); + return 1; + } + return 0; +} + +const Type* apply_decl_suffix(Parser* p, const Type* base, + const DeclSuffix* s) { + if (s->kind == DS_ARRAY) { + return type_array(p->pool, base, s->count, s->incomplete || s->vla); + } + { + const Type** ptypes = NULL; + if (s->nparams) { + ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams); + for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type; + } + return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic); + } +} + +/* ============================================================ + * parse_declarator_full, parse_declarator_full_ex, parse_declarator + * ============================================================ */ + +const Type* parse_declarator_full(Parser* p, const Type* base, + int allow_abstract, Sym* name_out, + SrcLoc* loc_out) { + return parse_declarator_full_ex(p, base, allow_abstract, name_out, loc_out, + NULL); +} + +const Type* parse_declarator_full_ex(Parser* p, const Type* base, + int allow_abstract, Sym* name_out, + SrcLoc* loc_out, + Attr** attrs_out) { + base = parse_pointer_layer(p, base); + + Sym name = 0; + SrcLoc nloc = {0, 0, 0}; + u8 nptrs_inner = 0; + u16 inner_quals[8]; + int has_inner_parens = 0; + DeclSuffix inner_suffs[8]; + int n_inner_suffs = 0; + + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + int is_inner = 0; + if (is_punct(&n, '*')) { + is_inner = 1; + } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) { + SymEntry* e = scope_lookup(p, n.v.ident); + if (!(e && e->kind == SEK_TYPEDEF)) is_inner = 1; + } + if (is_inner) { + has_inner_parens = 1; + advance(p); /* '(' */ + while (accept_punct(p, '*')) { + u16 q = 0; + if (nptrs_inner >= 8) perr(p, "too many pointer levels"); + for (;;) { + if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } + if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } + if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } + if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } + if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } + break; + } + inner_quals[nptrs_inner++] = q; + } + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + name = p->cur.v.ident; + nloc = tok_loc(&p->cur); + advance(p); + } else if (!allow_abstract) { + perr(p, "expected declarator name"); + } + if (starts_attr(p)) parse_and_discard_attributes(p); + while (n_inner_suffs < 8) { + if (!parse_decl_suffix(p, &inner_suffs[n_inner_suffs])) break; + ++n_inner_suffs; + if (starts_attr(p)) parse_and_discard_attributes(p); + } + expect_punct(p, ')', "')' after inner declarator"); + } + } + + if (!has_inner_parens) { + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + name = p->cur.v.ident; + nloc = tok_loc(&p->cur); + advance(p); + } else if (!allow_abstract) { + perr(p, "expected declarator name"); + } + } + + if (starts_attr(p)) { + if (attrs_out) parse_attrs_into(p, attrs_out); + else parse_and_discard_attributes(p); + } + + DeclSuffix suffs[8]; + int nsuffs = 0; + while (nsuffs < 8) { + if (!parse_decl_suffix(p, &suffs[nsuffs])) break; + ++nsuffs; + if (starts_attr(p)) { + if (attrs_out) parse_attrs_into(p, attrs_out); + else parse_and_discard_attributes(p); + } + } + if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) { + perr(p, "too many declarator suffixes (raise the cap if needed)"); + } + for (int i = nsuffs - 1; i >= 0; --i) { + base = apply_decl_suffix(p, base, &suffs[i]); + } + + for (int i = (int)nptrs_inner - 1; i >= 0; --i) { + base = type_ptr(p->pool, base); + if (inner_quals[i]) { + base = type_qualified(p->pool, base, inner_quals[i]); + } + } + + for (int i = n_inner_suffs - 1; i >= 0; --i) { + base = apply_decl_suffix(p, base, &inner_suffs[i]); + } + + if (name_out) *name_out = name; + if (loc_out) *loc_out = nloc; + return base; +} + +const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, + SrcLoc* loc_out) { + return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out); +} + +/* ============================================================ + * complete_incomplete_array + * ============================================================ */ + +const Type* complete_incomplete_array(Parser* p, const Type* ty) { + const Type* elem; + if (!ty || ty->kind != TY_ARRAY || !ty->arr.incomplete) return ty; + elem = ty->arr.elem; + if (is_char_kind(elem) && p->cur.kind == TOK_STR) { + Tok t = p->cur; + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + return type_array(p->pool, elem, (u32)n, /*incomplete=*/0); + } + if (is_punct(&p->cur, '{')) { + u32 cnt; + record_braced_block(p); + cnt = count_recorded_top_level_items(p->replay, p->replay_len); + if (cnt == 1 && p->replay_len >= 3 && p->replay[1].kind == TOK_STR && + is_char_kind(elem)) { + Tok t = p->replay[1]; + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + cnt = (u32)n; + } + replay_rewind(p); + return type_array(p->pool, elem, cnt, /*incomplete=*/0); + } + perr(p, "initializer cannot complete incomplete array type"); +} diff --git a/src/pp/pp.c b/src/pp/pp.c @@ -7,352 +7,12 @@ * The token-source stack carries either a Lexer (file or #include'd file) or * a pre-built Tok[] buffer (macro expansion). Each buffer token carries a * hideset (Prosser, the standard's "nested-replacement" rule) recording - * which macro names it must not be re-expanded by during rescan. */ - -#include "pp/pp.h" - -#include <stdlib.h> -#include <string.h> - -#include "core/arena.h" -#include "core/diag.h" -#include "core/heap.h" -#include "core/pool.h" - -/* ============================================================ - * Internal types - * ============================================================ */ - -typedef struct Macro { - Sym name; - SrcLoc def_loc; - u8 is_func; - u8 is_variadic; - u8 pad[2]; - u32 n_params; - Sym* params; /* parameter names */ - Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */ - u32 body_len; -} Macro; - -/* Internal token kinds. Outside the range used by the lexer - * (TOK_KW_LAST = 0x1000). */ -#define TOK_PP_PARAM ((u16)0x1100) -#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */ - -typedef u32 HidesetId; -#define HS_EMPTY 0u - -typedef struct Hideset { - u32 n; - Sym names[1]; /* flexible; allocated with extra trailing slots */ -} Hideset; - -typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind; - -typedef struct TokSrc { - u8 kind; - /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is - * the top source and it's exhausted, instead of popping. The caller - * (e.g. argument pre-expansion) explicitly pops the scope when done. - * This bounds expansion to a single argument's token stream. */ - u8 scope_top; - u8 pad[2]; - /* SRC_LEX */ - Lexer* lex; - /* SRC_BUF */ - Tok* toks; - HidesetId* hs; - u32 i; - u32 n; - /* #line state (SRC_LEX only). line_delta is added to every emitted - * token's loc.line on its way out so __LINE__ and the output cursor - * see user-visible numbering. file_override is the Sym (without - * surrounding quotes) used by __FILE__ when set. */ - i32 line_delta; - Sym file_override; -} TokSrc; - -/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with - * deletion (#undef). See core/hashmap.h. */ -#include "core/hashmap.h" -static inline u32 macro_hash_(Sym s) { return hash_u32((u32)s); } -HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_); - -typedef enum IfState { - IF_INCLUDE = 1, /* group active, emit code */ - IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */ - IF_DONE = 3, /* skip, already had a true branch */ -} IfState; - -typedef struct IfFrame { - u8 state; - u8 has_else; - u8 pad[2]; - SrcLoc loc; -} IfFrame; - -struct Pp { - Compiler* c; - - /* Source stack — top of stack is sources[nsources-1]. */ - TokSrc* sources; - u32 nsources; - u32 sources_cap; - - /* Macro table (open-addressed; key = Sym, value = Macro*). */ - MacroMap mtab; - - /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */ - IfFrame* ifstk; - u32 ifstk_n; - u32 ifstk_cap; - - /* Hideset table. Element 0 reserved as HS_EMPTY. */ - Hideset** hsets; - u32 hsets_n; - u32 hsets_cap; - - /* Include directories (stage 9). */ - struct { - const char* path; - u8 system; - }* inc_dirs; - u32 ninc_dirs; - u32 inc_dirs_cap; - - /* Internal arena: macro bodies, hidesets, expansion buffers, file - * data for #include. Lives until pp_free. */ - Arena arena; - - /* Cached interned identifiers used for directive recognition. */ - Sym sym_define; - Sym sym_undef; - Sym sym_include; - Sym sym_if; - Sym sym_ifdef; - Sym sym_ifndef; - Sym sym_elif; - Sym sym_else; - Sym sym_endif; - Sym sym_line; - Sym sym_pragma; - Sym sym_error; - Sym sym_embed; - Sym sym_defined; - Sym sym_va_args; - Sym sym_line__; /* __LINE__ */ - Sym sym_file__; /* __FILE__ */ - Sym sym_date__; /* __DATE__ */ - Sym sym_time__; /* __TIME__ */ - Sym sym_stdc__; /* __STDC__ */ - Sym sym_stdc_hosted__; - Sym sym_stdc_version__; - Sym sym__pragma; /* _Pragma operator */ - Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */ - - /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for - * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or - * time(NULL) if unset). */ - Sym val_date_str; - Sym val_time_str; -}; - -/* ============================================================ - * Allocation helpers - * ============================================================ */ - -static Heap* pp_heap(Pp* pp) { return (Heap*)pp->c->env->heap; } - -static void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n, - size_t align) { - Heap* h = pp_heap(pp); - void* q = h->realloc(h, p, old_n, new_n, align); - if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory"); - return q; -} - -static void pp_xfree(Pp* pp, void* p, size_t n) { - if (p) pp_heap(pp)->free(pp_heap(pp), p, n); -} - -/* ============================================================ - * Token-vector helpers (used by directive readers, macro expansion, - * pre-expansion of arguments, and the substitute / paste phases). - * ============================================================ */ - -typedef struct TokVec { - Tok* data; - u32 n; - u32 cap; -} TokVec; - -static void tv_grow(Pp* pp, TokVec* v, u32 want) { - u32 nc; - if (v->cap >= want) return; - nc = v->cap ? v->cap * 2 : 8; - while (nc < want) nc *= 2; - { - Tok* nb = arena_array(&pp->arena, Tok, nc); - if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n); - v->data = nb; - v->cap = nc; - } -} - -static void tv_push(Pp* pp, TokVec* v, Tok t) { - tv_grow(pp, v, v->n + 1); - v->data[v->n++] = t; -} - -/* Growable char buffer (arena-backed) used by stringize, #error message - * concat, and a few other byte-level helpers. */ -typedef struct CharBuf { - char* data; - u32 len; - u32 cap; -} CharBuf; - -static void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) { - if (b->len + n > b->cap) { - u32 nc = b->cap ? b->cap * 2 : 64; - while (nc < b->len + n) nc *= 2; - { - char* nb = (char*)arena_alloc(&pp->arena, nc, 1); - if (b->len) memcpy(nb, b->data, b->len); - b->data = nb; - b->cap = nc; - } - } - if (n) memcpy(b->data + b->len, s, n); - b->len += n; -} - -static void cb_putc(Pp* pp, CharBuf* b, char c) { cb_append(pp, b, &c, 1); } - -/* ============================================================ - * Hideset table - * ============================================================ */ - -static int sym_in_array(const Sym* a, u32 n, Sym s) { - u32 i; - for (i = 0; i < n; ++i) - if (a[i] == s) return 1; - return 0; -} - -static HidesetId hs_register(Pp* pp, const Sym* names, u32 n) { - Hideset* h; - u32 i; - if (n == 0) return HS_EMPTY; - - /* Linear search for an existing identical hideset. Hidesets are tiny. */ - for (i = 1; i < pp->hsets_n; ++i) { - Hideset* e = pp->hsets[i]; - if (e->n != n) continue; - { - u32 j; - for (j = 0; j < n; ++j) - if (e->names[j] != names[j]) break; - if (j == n) return (HidesetId)i; - } - } - - if (pp->hsets_n == pp->hsets_cap) { - u32 nc = pp->hsets_cap ? pp->hsets_cap * 2 : 8; - pp->hsets = - (Hideset**)pp_xrealloc(pp, pp->hsets, sizeof(Hideset*) * pp->hsets_cap, - sizeof(Hideset*) * nc, _Alignof(Hideset*)); - pp->hsets_cap = nc; - } - h = (Hideset*)arena_alloc(&pp->arena, - sizeof(Hideset) + sizeof(Sym) * (n ? n - 1 : 0), - _Alignof(Hideset)); - h->n = n; - for (i = 0; i < n; ++i) h->names[i] = names[i]; - pp->hsets[pp->hsets_n] = h; - return (HidesetId)pp->hsets_n++; -} - -static int hs_contains(Pp* pp, HidesetId id, Sym s) { - Hideset* h; - if (id == HS_EMPTY || s == 0) return 0; - h = pp->hsets[id]; - return sym_in_array(h->names, h->n, s); -} - -static HidesetId hs_add(Pp* pp, HidesetId id, Sym s) { - Sym buf[64]; - Hideset* h; - u32 n; - u32 i; - - if (s == 0) return id; - if (hs_contains(pp, id, s)) return id; - - n = (id == HS_EMPTY) ? 0 : pp->hsets[id]->n; - if (n + 1 > sizeof(buf) / sizeof(buf[0])) { - compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: hideset overflow"); - } - if (id != HS_EMPTY) { - h = pp->hsets[id]; - for (i = 0; i < h->n; ++i) buf[i] = h->names[i]; - } - /* Keep sorted (numerically) for canonical hideset identity. */ - { - u32 pos = n; - while (pos > 0 && buf[pos - 1] > s) { - buf[pos] = buf[pos - 1]; - --pos; - } - buf[pos] = s; - } - return hs_register(pp, buf, n + 1); -} - -/* Used by token-paste in stage 5; declared early so the rest of the file - * doesn't grow forward decls. */ -__attribute__((unused)) static HidesetId hs_intersect(Pp* pp, HidesetId a, - HidesetId b) { - Sym buf[64]; - Hideset *ha, *hb; - u32 i, j, k; - if (a == HS_EMPTY || b == HS_EMPTY) return HS_EMPTY; - if (a == b) return a; - ha = pp->hsets[a]; - hb = pp->hsets[b]; - /* Both sorted; standard merge intersection. */ - i = j = k = 0; - while (i < ha->n && j < hb->n) { - if (ha->names[i] == hb->names[j]) { - buf[k++] = ha->names[i]; - ++i; - ++j; - } else if (ha->names[i] < hb->names[j]) { - ++i; - } else { - ++j; - } - } - return hs_register(pp, buf, k); -} - -/* ============================================================ - * Macro table - * ============================================================ */ - -/* Thin wrappers over the generated MacroMap_* functions; preserved - * because the call sites are tagged "mt_*" throughout this TU. */ -static Macro* mt_get(Pp* pp, Sym name) { - Macro** v = MacroMap_get(&pp->mtab, name); - return v ? *v : NULL; -} - -static void mt_put(Pp* pp, Sym name, Macro* m) { - (void)MacroMap_set(&pp->mtab, name, m); -} + * which macro names it must not be re-expanded by during rescan. + * + * Residual module: source stack, pp_next / pp_next_raw (public streaming), + * pp_new/free, predefined macros, lifecycle, keyword interning. */ -static void mt_del(Pp* pp, Sym name) { MacroMap_del(&pp->mtab, name); } +#include "pp/pp_priv.h" /* ============================================================ * Source stack @@ -362,7 +22,7 @@ static TokSrc* src_top(Pp* pp) { return pp->nsources ? &pp->sources[pp->nsources - 1] : NULL; } -static void src_push(Pp* pp, TokSrc s) { +void src_push(Pp* pp, TokSrc s) { if (pp->nsources == pp->sources_cap) { u32 nc = pp->sources_cap ? pp->sources_cap * 2 : 8; pp->sources = @@ -373,7 +33,7 @@ static void src_push(Pp* pp, TokSrc s) { pp->sources[pp->nsources++] = s; } -static void src_pop(Pp* pp) { +void src_pop(Pp* pp) { TokSrc* t; if (!pp->nsources) return; t = &pp->sources[pp->nsources - 1]; @@ -390,7 +50,7 @@ static void src_pop(Pp* pp) { * (SRC_LEX vs SRC_BUF). Used by pp_next_raw to gate directive recognition * to lex-sourced tokens only — a `#` produced by macro expansion never * starts a directive (§6.10.3.4 ¶3, covered by `63_rescan_not_directive`). */ -static Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) { +Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) { Tok t; TokSrc* s; while ((s = src_top(pp)) != NULL) { @@ -444,7 +104,7 @@ static Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) { * Buffer source push helpers * ============================================================ */ -static void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) { +void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) { TokSrc s; memset(&s, 0, sizeof(s)); s.kind = SRC_BUF; @@ -456,2109 +116,9 @@ static void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) { } /* ============================================================ - * Directive parsing - * ============================================================ */ - -/* Read tokens up through (and including) the next TOK_NEWLINE / TOK_EOF. - * Drops the newline; collected tokens are arena-allocated and returned via - * *out_toks/out_n. */ -static void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n) { - Tok* buf = NULL; - u32 cap = 0, n = 0; - Tok t; - HidesetId hs; - for (;;) { - t = src_next_raw(pp, &hs, NULL); - if (t.kind == TOK_NEWLINE || t.kind == TOK_EOF) break; - if (n == cap) { - u32 nc = cap ? cap * 2 : 8; - Tok* nb = (Tok*)arena_alloc(&pp->arena, sizeof(Tok) * nc, _Alignof(Tok)); - if (cap) memcpy(nb, buf, sizeof(Tok) * cap); - buf = nb; - cap = nc; - } - buf[n++] = t; - } - *out_toks = buf; - *out_n = n; -} - -static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb) { - u32 i; - if (na != nb) return 0; - for (i = 0; i < na; ++i) { - if (a[i].kind != b[i].kind) return 0; - if (a[i].spelling != b[i].spelling) return 0; - /* Whitespace separation must match (§6.10.3 ¶2). The first body - * token's leading-space bit is meaningless (it's whatever was - * between macro name and body); skip i==0 for that bit. */ - if (i > 0) { - if ((a[i].flags & TF_HAS_SPACE) != (b[i].flags & TF_HAS_SPACE)) { - return 0; - } - } - } - return 1; -} - -static int macros_equal(const Macro* a, const Macro* b) { - if (a->is_func != b->is_func) return 0; - if (a->is_variadic != b->is_variadic) return 0; - if (a->n_params != b->n_params) return 0; - { - u32 i; - for (i = 0; i < a->n_params; ++i) { - if (a->params[i] != b->params[i]) return 0; - } - } - return body_tokens_equal(a->body, a->body_len, b->body, b->body_len); -} - -static void do_define(Pp* pp, const Tok* line, u32 n) { - Macro* m; - u32 i = 0; - Sym name; - SrcLoc def_loc; - Macro* existing; - - if (i >= n || line[i].kind != TOK_IDENT) { - compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0}, - "#define: expected macro name"); - } - name = line[i].v.ident; - def_loc = line[i].loc; - ++i; - - m = arena_znew(&pp->arena, Macro); - m->name = name; - m->def_loc = def_loc; - - /* Function-like vs object-like: '(' immediately after the name with no - * intervening whitespace. */ - if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == '(' && - (line[i].flags & TF_HAS_SPACE) == 0) { - Sym* params = NULL; - u32 pcap = 0, pn = 0; - ++i; - m->is_func = 1; - if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ')') { - ++i; - } else { - for (;;) { - if (i >= n) { - compiler_panic(pp->c, def_loc, - "#define: unterminated parameter list"); - } - if (line[i].kind == TOK_PUNCT && line[i].v.punct == P_ELLIPSIS) { - /* Append a synthetic __VA_ARGS__ param so body-rewrite - * matches the standard identifier directly. */ - if (pn == pcap) { - u32 nc = pcap ? pcap * 2 : 4; - Sym* nb = arena_array(&pp->arena, Sym, nc); - if (pcap) memcpy(nb, params, sizeof(Sym) * pcap); - params = nb; - pcap = nc; - } - params[pn++] = pp->sym_va_args; - m->is_variadic = 1; - ++i; - } else if (line[i].kind == TOK_IDENT) { - if (pn == pcap) { - u32 nc = pcap ? pcap * 2 : 4; - Sym* nb = arena_array(&pp->arena, Sym, nc); - if (pcap) memcpy(nb, params, sizeof(Sym) * pcap); - params = nb; - pcap = nc; - } - params[pn++] = line[i].v.ident; - ++i; - } else { - compiler_panic(pp->c, line[i].loc, "#define: bad parameter list"); - } - if (i >= n) { - compiler_panic(pp->c, def_loc, - "#define: unterminated parameter list"); - } - if (line[i].kind == TOK_PUNCT && line[i].v.punct == ')') { - ++i; - break; - } - if (m->is_variadic) { - compiler_panic(pp->c, line[i].loc, - "#define: '...' must be last parameter"); - } - if (line[i].kind == TOK_PUNCT && line[i].v.punct == ',') { - ++i; - continue; - } - compiler_panic(pp->c, line[i].loc, "#define: expected ',' or ')'"); - } - } - m->params = params; - m->n_params = pn; - } - - /* Refuse define/undef of a few names the spec reserves: `defined` - * and a small set of mandatory predefined macros. */ - if (name == pp->sym_defined || name == pp->sym_line__ || - name == pp->sym_file__ || name == pp->sym_date__ || - name == pp->sym_time__) { - compiler_panic(pp->c, def_loc, - "#define of a reserved / predefined name is not allowed"); - } - /* Static predefineds are already in the macro table; redefining - * with a different body is caught by the existing macros_equal - * check below, but #define of __STDC__ et al. with the SAME body - * should also be rejected. */ - if (name == pp->sym_stdc__ || name == pp->sym_stdc_hosted__ || - name == pp->sym_stdc_version__) { - /* Allow re-registration of the predefined value at pp_new time - * but reject user-level redefinition. We detect "user-level" - * by checking whether it's already in the table — at pp_new the - * first call goes through cleanly. */ - if (mt_get(pp, name)) { - compiler_panic(pp->c, def_loc, - "#define of a mandatory predefined macro is not allowed"); - } - } - - /* Body: rewrite parameter occurrences to TOK_PP_PARAM. */ - { - u32 body_n = n - i; - u32 j; - m->body = body_n ? arena_array(&pp->arena, Tok, body_n) : NULL; - m->body_len = body_n; - for (j = 0; j < body_n; ++j) { - Tok t = line[i + j]; - if (m->is_func && t.kind == TOK_IDENT) { - u32 p; - for (p = 0; p < m->n_params; ++p) { - if (m->params[p] == t.v.ident) { - t.kind = TOK_PP_PARAM; - t.v.punct = p; - break; - } - } - } - /* §6.10.3 ¶5: __VA_ARGS__ outside a variadic macro is - * undefined behavior; we diagnose. */ - if (!m->is_variadic && t.kind == TOK_IDENT && - t.v.ident == pp->sym_va_args) { - compiler_panic(pp->c, t.loc, - "__VA_ARGS__ may only appear in a variadic macro body"); - } - m->body[j] = t; - } - /* Drop the leading-space bit on the first body token: it reflects - * the whitespace between the macro name (or close-paren) and the - * body, which is irrelevant to expansion output. */ - if (m->body_len) m->body[0].flags &= (u16)~TF_HAS_SPACE; - } - - existing = mt_get(pp, name); - if (existing) { - if (!macros_equal(existing, m)) { - compiler_panic(pp->c, def_loc, - "macro redefined with different replacement"); - } - return; - } - mt_put(pp, name, m); -} - -static void do_undef(Pp* pp, const Tok* line, u32 n) { - Sym name; - if (!n || line[0].kind != TOK_IDENT) { - compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0}, - "#undef: expected identifier"); - } - name = line[0].v.ident; - if (name == pp->sym_defined || name == pp->sym_line__ || - name == pp->sym_file__ || name == pp->sym_date__ || - name == pp->sym_time__ || name == pp->sym_stdc__ || - name == pp->sym_stdc_hosted__ || name == pp->sym_stdc_version__) { - compiler_panic(pp->c, line[0].loc, - "#undef of a mandatory predefined name is not allowed"); - } - mt_del(pp, name); -} - -/* ============================================================ - * Conditional inclusion (§6.10.1) + * Public streaming entries * ============================================================ */ -static void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out); -static int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out); - -static void if_push(Pp* pp, IfFrame f) { - if (pp->ifstk_n == pp->ifstk_cap) { - u32 nc = pp->ifstk_cap ? pp->ifstk_cap * 2 : 4; - pp->ifstk = pp_xrealloc(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap, - sizeof(IfFrame) * nc, _Alignof(IfFrame)); - pp->ifstk_cap = nc; - } - pp->ifstk[pp->ifstk_n++] = f; -} - -static IfFrame* if_top(Pp* pp) { - return pp->ifstk_n ? &pp->ifstk[pp->ifstk_n - 1] : NULL; -} - -static void if_pop(Pp* pp) { - if (pp->ifstk_n) --pp->ifstk_n; -} - -/* Parse a C integer constant from a pp-number's spelling. Suffixes (u, l, - * etc.) are ignored. Recognizes decimal, hex (0x...), and octal (0...). */ -static i64 parse_pp_int(const char* s, size_t n) { - int base = 10; - size_t i = 0; - i64 val = 0; - if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - base = 16; - i = 2; - } else if (n >= 1 && s[0] == '0') { - base = 8; - i = 1; - } - for (; i < n; ++i) { - char c = s[i]; - int d; - if (c >= '0' && c <= '9') - d = c - '0'; - else if (base == 16 && c >= 'a' && c <= 'f') - d = c - 'a' + 10; - else if (base == 16 && c >= 'A' && c <= 'F') - d = c - 'A' + 10; - else - break; - if (d >= base) break; - val = val * (i64)base + (i64)d; - } - return val; -} - -/* Pre-pass: replace `defined X` / `defined ( X )` with a 0/1 pp-number, - * preserving the rest of the token sequence. The operand of `defined` is - * NOT macro-expanded. Output is a fresh TokVec. */ -static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) { - u32 i; - for (i = 0; i < nin; ++i) { - if (in[i].kind == TOK_IDENT && in[i].v.ident == pp->sym_defined) { - int has_paren = 0; - Sym ident = 0; - u32 j = i + 1; - if (j < nin && in[j].kind == TOK_PUNCT && in[j].v.punct == '(') { - has_paren = 1; - ++j; - } - if (j >= nin || in[j].kind != TOK_IDENT) { - compiler_panic(pp->c, in[i].loc, - "operand of 'defined' must be an identifier"); - } - ident = in[j].v.ident; - ++j; - if (has_paren) { - if (j >= nin || in[j].kind != TOK_PUNCT || in[j].v.punct != ')') { - compiler_panic(pp->c, in[i].loc, - "expected ')' after 'defined' operand"); - } - ++j; - } - { - Tok t; - memset(&t, 0, sizeof(t)); - t.kind = TOK_NUM; - t.flags = in[i].flags & (TF_AT_BOL | TF_HAS_SPACE); - t.loc = in[i].loc; - t.spelling = - pool_intern_cstr(pp->c->global, mt_get(pp, ident) ? "1" : "0"); - tv_push(pp, out, t); - } - i = j - 1; - } else { - tv_push(pp, out, in[i]); - } - } -} - -/* Macro-expand a sequence of pre-#if tokens to completion. Wraps the - * fixed-buffer arg pre-expansion machinery with TOK_IDENT → 0 - * substitution per §6.10.1 ¶4. */ -static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) { - Tok* slice; - if (nin == 0) return; - slice = arena_array(&pp->arena, Tok, nin); - memcpy(slice, in, sizeof(Tok) * nin); - expand_arg_to_eof(pp, slice, nin, out); - /* Replace remaining identifiers with `0`. */ - { - u32 i; - Sym zero = pool_intern_cstr(pp->c->global, "0"); - for (i = 0; i < out->n; ++i) { - if (out->data[i].kind == TOK_IDENT) { - out->data[i].kind = TOK_NUM; - out->data[i].spelling = zero; - } - } - } -} - -/* Recursive-descent expression evaluator over an expanded token list. */ -typedef struct EE { - Pp* pp; - const Tok* toks; - u32 n; - u32 pos; - SrcLoc loc; -} EE; - -static i64 ee_ternary(EE* e); - -static const Tok* ee_peek(EE* e) { - return e->pos < e->n ? &e->toks[e->pos] : NULL; -} - -static int ee_match_punct(EE* e, u32 p) { - const Tok* t = ee_peek(e); - if (t && t->kind == TOK_PUNCT && t->v.punct == p) { - ++e->pos; - return 1; - } - return 0; -} - -static i64 ee_primary(EE* e) { - const Tok* t = ee_peek(e); - if (!t) compiler_panic(e->pp->c, e->loc, "#if: missing operand"); - if (t->kind == TOK_NUM) { - size_t slen; - const char* s = pool_str(e->pp->c->global, t->spelling, &slen); - ++e->pos; - return parse_pp_int(s, slen); - } - if (t->kind == TOK_CHR) { - /* Treat as the codepoint of the first character (post-decoding - * not implemented; cover the common case of a single ASCII - * char). */ - size_t slen; - const char* s = pool_str(e->pp->c->global, t->spelling, &slen); - ++e->pos; - if (slen >= 3 && s[0] == '\'') return (unsigned char)s[1]; - return 0; - } - if (t->kind == TOK_PUNCT && t->v.punct == '(') { - i64 v; - ++e->pos; - v = ee_ternary(e); - if (!ee_match_punct(e, ')')) { - compiler_panic(e->pp->c, t->loc, "#if: expected ')'"); - } - return v; - } - compiler_panic(e->pp->c, t->loc, "#if: unexpected token in expression"); - return 0; -} - -static i64 ee_unary(EE* e) { - const Tok* t = ee_peek(e); - if (t && t->kind == TOK_PUNCT) { - u32 p = t->v.punct; - if (p == '!' || p == '-' || p == '+' || p == '~') { - i64 v; - ++e->pos; - v = ee_unary(e); - switch (p) { - case '!': - return v ? 0 : 1; - case '-': - return -v; - case '+': - return v; - case '~': - return ~v; - } - } - } - return ee_primary(e); -} - -static i64 ee_mul(EE* e) { - i64 v = ee_unary(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '*') { - ++e->pos; - v = v * ee_unary(e); - } else if (t->v.punct == '/') { - i64 r; - ++e->pos; - r = ee_unary(e); - if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero"); - v = v / r; - } else if (t->v.punct == '%') { - i64 r; - ++e->pos; - r = ee_unary(e); - if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero"); - v = v % r; - } else - break; - } - return v; -} - -static i64 ee_add(EE* e) { - i64 v = ee_mul(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '+') { - ++e->pos; - v = v + ee_mul(e); - } else if (t->v.punct == '-') { - ++e->pos; - v = v - ee_mul(e); - } else - break; - } - return v; -} - -static i64 ee_shift(EE* e) { - i64 v = ee_add(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == P_SHL) { - ++e->pos; - v = v << ee_add(e); - } else if (t->v.punct == P_SHR) { - ++e->pos; - v = v >> ee_add(e); - } else - break; - } - return v; -} - -static i64 ee_rel(EE* e) { - i64 v = ee_shift(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '<') { - ++e->pos; - v = (v < ee_shift(e)); - } else if (t->v.punct == '>') { - ++e->pos; - v = (v > ee_shift(e)); - } else if (t->v.punct == P_LE) { - ++e->pos; - v = (v <= ee_shift(e)); - } else if (t->v.punct == P_GE) { - ++e->pos; - v = (v >= ee_shift(e)); - } else - break; - } - return v; -} - -static i64 ee_eq(EE* e) { - i64 v = ee_rel(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == P_EQ) { - ++e->pos; - v = (v == ee_rel(e)); - } else if (t->v.punct == P_NE) { - ++e->pos; - v = (v != ee_rel(e)); - } else - break; - } - return v; -} - -static i64 ee_band(EE* e) { - i64 v = ee_eq(e); - while (ee_match_punct(e, '&')) v = v & ee_eq(e); - return v; -} - -static i64 ee_bxor(EE* e) { - i64 v = ee_band(e); - while (ee_match_punct(e, '^')) v = v ^ ee_band(e); - return v; -} - -static i64 ee_bor(EE* e) { - i64 v = ee_bxor(e); - while (ee_match_punct(e, '|')) v = v | ee_bxor(e); - return v; -} - -static i64 ee_logand(EE* e) { - i64 v = ee_bor(e); - while (ee_match_punct(e, P_AND)) { - i64 r = ee_bor(e); - v = (v && r); - } - return v; -} - -static i64 ee_logor(EE* e) { - i64 v = ee_logand(e); - while (ee_match_punct(e, P_OR)) { - i64 r = ee_logand(e); - v = (v || r); - } - return v; -} - -static i64 ee_ternary(EE* e) { - i64 c = ee_logor(e); - if (ee_match_punct(e, '?')) { - i64 a = ee_ternary(e); - i64 b; - if (!ee_match_punct(e, ':')) { - compiler_panic(e->pp->c, e->loc, "#if: ':' expected in ternary"); - } - b = ee_ternary(e); - return c ? a : b; - } - return c; -} - -static i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - TokVec defs = {0}; - TokVec exp = {0}; - EE e; - i64 v; - - prepass_defined(pp, line, n, &defs); - expand_for_if(pp, defs.data, defs.n, &exp); - - e.pp = pp; - e.toks = exp.data; - e.n = exp.n; - e.pos = 0; - e.loc = loc; - v = ee_ternary(&e); - if (e.pos != e.n) { - compiler_panic(pp->c, e.loc, - "#if: unexpected trailing tokens in expression"); - } - return v; -} - -static void consume_to_newline(Pp* pp) { - Tok t; - do { - t = src_next_raw(pp, NULL, NULL); - } while (t.kind != TOK_NEWLINE && t.kind != TOK_EOF); -} - -/* Drive the source forward consuming tokens until we either: - * - reach a balancing #endif (pops the frame, returns), or - * - reach a #elif / #else that flips the top frame to IF_INCLUDE - * (returns with that frame active). - * Nested #if directives inside the skipped group are tracked via - * `local_depth`. Unrecognised directives in skipped groups are tolerated - * (§6.10 ¶4, covered by `8c_skipped_relaxed_syntax`). */ -static void skip_until_active(Pp* pp) { - int local_depth = 0; - while (pp->ifstk_n > 0) { - IfFrame* top = if_top(pp); - Tok t; - if (top->state == IF_INCLUDE && local_depth == 0) return; - t = src_next_raw(pp, NULL, NULL); - if (t.kind == TOK_EOF) { - compiler_panic(pp->c, top->loc, "unterminated #if / #ifdef"); - } - if (t.kind != TOK_PP_HASH || (t.flags & TF_AT_BOL) == 0) continue; - - /* Read directive name (or null directive). */ - { - Tok nt = src_next_raw(pp, NULL, NULL); - Sym name; - if (nt.kind == TOK_NEWLINE || nt.kind == TOK_EOF) continue; - if (nt.kind != TOK_IDENT) { - consume_to_newline(pp); - continue; - } - name = nt.v.ident; - if (name == pp->sym_if || name == pp->sym_ifdef || - name == pp->sym_ifndef) { - ++local_depth; - consume_to_newline(pp); - continue; - } - if (name == pp->sym_endif) { - consume_to_newline(pp); - if (local_depth > 0) { - --local_depth; - continue; - } - if_pop(pp); - return; - } - if (name == pp->sym_else) { - consume_to_newline(pp); - if (local_depth > 0) continue; - if (top->has_else) { - compiler_panic(pp->c, t.loc, "duplicate #else"); - } - top->has_else = 1; - if (top->state == IF_SEEK_TRUE) { - top->state = IF_INCLUDE; - return; - } - top->state = IF_DONE; - continue; - } - if (name == pp->sym_elif) { - if (local_depth > 0 || top->has_else || top->state == IF_DONE) { - consume_to_newline(pp); - continue; - } - if (top->state == IF_SEEK_TRUE) { - Tok* line; - u32 ln; - i64 v; - read_directive_line(pp, &line, &ln); - v = eval_if_expr(pp, line, ln, t.loc); - if (v != 0) { - top->state = IF_INCLUDE; - return; - } - continue; - } - /* Was IF_INCLUDE; #elif means we're done. (Should already - * have been transitioned to DONE before entering this - * skip — defensive.) */ - top->state = IF_DONE; - consume_to_newline(pp); - continue; - } - /* Other directive — relaxed: skip silently. */ - consume_to_newline(pp); - continue; - } - } -} - -static int is_predefined_macro_name(Pp* pp, Sym name) { - return name == pp->sym_va_args || name == pp->sym_line__ || - name == pp->sym_file__ || name == pp->sym_date__ || - name == pp->sym_time__; - /* __STDC__/__STDC_HOSTED__/__STDC_VERSION__ are registered as real - * macros, so the macro-table lookup catches them. */ -} - -static void do_ifdef(Pp* pp, const Tok* line, u32 n, int negate, SrcLoc loc) { - int defined; - IfFrame f; - if (n < 1 || line[0].kind != TOK_IDENT) { - compiler_panic(pp->c, loc, - negate ? "#ifndef: expected identifier" - : "#ifdef: expected identifier"); - } - defined = (mt_get(pp, line[0].v.ident) != NULL) || - is_predefined_macro_name(pp, line[0].v.ident); - if (negate) defined = !defined; - memset(&f, 0, sizeof(f)); - f.state = defined ? IF_INCLUDE : IF_SEEK_TRUE; - f.loc = loc; - if_push(pp, f); - if (!defined) skip_until_active(pp); -} - -static void do_if_directive(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - i64 v = eval_if_expr(pp, line, n, loc); - IfFrame f; - memset(&f, 0, sizeof(f)); - f.state = v ? IF_INCLUDE : IF_SEEK_TRUE; - f.loc = loc; - if_push(pp, f); - if (!v) skip_until_active(pp); -} - -static void do_elif(Pp* pp, SrcLoc loc) { - /* We only reach do_elif from the active branch — meaning the - * preceding group emitted code. So we must skip the rest. */ - IfFrame* top = if_top(pp); - if (!top) compiler_panic(pp->c, loc, "stray #elif"); - if (top->has_else) compiler_panic(pp->c, loc, "#elif after #else"); - top->state = IF_DONE; - skip_until_active(pp); -} - -static void do_else(Pp* pp, SrcLoc loc) { - IfFrame* top = if_top(pp); - if (!top) compiler_panic(pp->c, loc, "stray #else"); - if (top->has_else) compiler_panic(pp->c, loc, "duplicate #else"); - top->has_else = 1; - top->state = IF_DONE; - skip_until_active(pp); -} - -static void do_endif(Pp* pp, SrcLoc loc) { - if (!if_top(pp)) compiler_panic(pp->c, loc, "stray #endif"); - if_pop(pp); -} - -/* ============================================================ - * #include (§6.10.2) - * ============================================================ */ - -/* Read `path` via the host's file_io and copy its bytes into the pp - * arena so they outlive io->release. Returns 1 on success. */ -static int try_open_include(Pp* pp, const char* path, const u8** data_out, - size_t* size_out) { - CfreeFileData fd; - const CfreeFileIO* io; - u8* buf; - - memset(&fd, 0, sizeof(fd)); - io = pp->c->env->file_io; - if (!io || !io->read_all) { - compiler_panic(pp->c, (SrcLoc){0, 0, 0}, - "#include: env.file_io is not configured"); - } - if (!io->read_all(io->user, path, &fd)) return 0; - { - size_t sz = fd.size; - buf = (u8*)arena_alloc(&pp->arena, sz ? sz : 1, 1); - if (sz && fd.data) memcpy(buf, fd.data, sz); - if (io->release) io->release(io->user, &fd); /* zeros fd */ - *data_out = buf; - *size_out = sz; - } - return 1; -} - -/* Return the includer's directory for resolving a quoted include, or "." - * for in-memory/builtin sources (where CWD is the natural fallback, like - * gcc treats stdin). `dir_out` must point to a buffer of size >= cap. */ -static int includer_dir(Pp* pp, SrcLoc loc, char* dir_out, size_t cap) { - const SourceFile* sf = source_file(pp->c->sources, loc.file_id); - const char* p = NULL; - size_t plen = 0; - const char* slash; - size_t dlen; - if (sf && sf->name) p = pool_str(pp->c->global, sf->name, &plen); - if (!p || plen == 0 || p[0] == '<') { - if (cap < 2) return 0; - dir_out[0] = '.'; - dir_out[1] = 0; - return 1; - } - slash = NULL; - { - size_t i; - for (i = plen; i > 0; --i) { - if (p[i - 1] == '/') { - slash = p + i - 1; - break; - } - } - } - if (!slash) { - if (cap < 2) return 0; - dir_out[0] = '.'; - dir_out[1] = 0; - return 1; - } - dlen = (size_t)(slash - p); - if (dlen == 0) dlen = 1; /* path was "/x" — dir is "/" */ - if (dlen + 1 > cap) return 0; - memcpy(dir_out, p, dlen); - dir_out[dlen] = 0; - return 1; -} - -/* Search for a header. Absolute paths are opened verbatim. Quoted form - * ("...") additionally searches the directory of the file containing the - * #include first (per C §6.10.2); bracket form (<...>) skips that step. - * Both forms then walk the configured -I / -isystem dirs in order. */ -static int find_and_open_include(Pp* pp, const char* path, int system, - SrcLoc loc, const u8** data, size_t* size, - char* resolved, size_t resolved_cap) { - char buf[4096]; - u32 i; - size_t plen = strlen(path); - - if (plen > 0 && path[0] == '/') { - if (try_open_include(pp, path, data, size)) { - if (plen + 1 > resolved_cap) return 0; - memcpy(resolved, path, plen + 1); - return 1; - } - return 0; - } - - if (!system) { - char dir[4096]; - if (includer_dir(pp, loc, dir, sizeof(dir))) { - size_t dlen = strlen(dir); - if (dlen + 1 + plen + 1 <= sizeof(buf)) { - memcpy(buf, dir, dlen); - buf[dlen] = '/'; - memcpy(buf + dlen + 1, path, plen); - buf[dlen + 1 + plen] = 0; - if (try_open_include(pp, buf, data, size)) { - if (dlen + 1 + plen + 1 > resolved_cap) return 0; - memcpy(resolved, buf, dlen + 1 + plen + 1); - return 1; - } - } - } - } - for (i = 0; i < pp->ninc_dirs; ++i) { - const char* d = pp->inc_dirs[i].path; - size_t dlen = strlen(d); - if (dlen + 1 + plen + 1 > sizeof(buf)) continue; - memcpy(buf, d, dlen); - buf[dlen] = '/'; - memcpy(buf + dlen + 1, path, plen); - buf[dlen + 1 + plen] = 0; - if (try_open_include(pp, buf, data, size)) { - if (dlen + 1 + plen + 1 > resolved_cap) return 0; - memcpy(resolved, buf, dlen + 1 + plen + 1); - return 1; - } - } - return 0; -} - -/* Parse the directive arguments into (path, system_flag). Handles: - * - directly-lexed TOK_HEADER: < ... > or " ... " - * - macro-replaced form: line is macro-expanded, then expected to - * produce either a TOK_STR ("...") or a < ... > sequence. */ -static void parse_include_path(Pp* pp, const Tok* line, u32 n, SrcLoc loc, - char* path_out, size_t cap, int* system_out) { - if (n == 0) compiler_panic(pp->c, loc, "#include: missing path"); - - if (line[0].kind == TOK_HEADER) { - size_t slen = 0; - const char* s = pool_str(pp->c->global, line[0].spelling, &slen); - if (slen < 2) compiler_panic(pp->c, loc, "#include: malformed header name"); - if (s[0] == '<' && s[slen - 1] == '>') - *system_out = 1; - else if (s[0] == '"' && s[slen - 1] == '"') - *system_out = 0; - else - compiler_panic(pp->c, loc, "#include: malformed header name"); - if (slen - 2 + 1 > cap) - compiler_panic(pp->c, loc, "#include: path too long"); - memcpy(path_out, s + 1, slen - 2); - path_out[slen - 2] = 0; - return; - } - - /* Macro-replaced form. */ - { - TokVec exp = {0}; - Tok* slice = arena_array(&pp->arena, Tok, n); - memcpy(slice, line, sizeof(Tok) * n); - expand_arg_to_eof(pp, slice, n, &exp); - - if (exp.n == 0) { - compiler_panic(pp->c, loc, "#include: empty after macro replacement"); - } - if (exp.data[0].kind == TOK_STR) { - size_t slen = 0; - const char* s = pool_str(pp->c->global, exp.data[0].spelling, &slen); - if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') { - compiler_panic(pp->c, loc, "#include: malformed string"); - } - if (slen - 2 + 1 > cap) { - compiler_panic(pp->c, loc, "#include: path too long"); - } - memcpy(path_out, s + 1, slen - 2); - path_out[slen - 2] = 0; - *system_out = 0; - return; - } - if (exp.data[0].kind == TOK_PUNCT && exp.data[0].v.punct == '<') { - size_t pos = 0; - u32 i; - for (i = 1; i < exp.n; ++i) { - size_t slen = 0; - const char* s = NULL; - if (exp.data[i].kind == TOK_PUNCT && exp.data[i].v.punct == '>') { - break; - } - if (exp.data[i].spelling) { - s = pool_str(pp->c->global, exp.data[i].spelling, &slen); - } - if (s && pos + slen + 1 <= cap) { - memcpy(path_out + pos, s, slen); - pos += slen; - } - } - path_out[pos] = 0; - *system_out = 1; - return; - } - compiler_panic(pp->c, loc, - "#include: expected \"...\" or <...> after expansion"); - } -} - -static void do_include(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - char path[4096]; - char resolved[4096]; - int system_form = 0; - const u8* data; - size_t size; - Lexer* lex; - u32 includer_id = 0; - u32 included_id; - u32 i; - TokSrc s; - - parse_include_path(pp, line, n, loc, path, sizeof(path), &system_form); - - if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved, - sizeof(resolved))) { - compiler_panic(pp->c, loc, "#include: file not found: %s", path); - } - - /* Walk the source stack to find the current includer's file_id. */ - for (i = pp->nsources; i > 0; --i) { - TokSrc* tp = &pp->sources[i - 1]; - if (tp->kind == SRC_LEX && tp->lex) { - includer_id = lex_file_id(tp->lex); - break; - } - } - - lex = lex_open_mem(pp->c, resolved, (const char*)data, size); - included_id = lex_file_id(lex); - - memset(&s, 0, sizeof(s)); - s.kind = SRC_LEX; - s.lex = lex; - src_push(pp, s); - - source_add_include(pp->c->sources, includer_id, included_id, loc, - system_form); -} - -/* ============================================================ - * #line (§6.10.4) - * ============================================================ */ - -/* Find the topmost SRC_LEX source on the stack — that's the "current - * file" whose line/file should track #line directives. */ -static TokSrc* current_lex_src(Pp* pp) { - u32 i; - for (i = pp->nsources; i > 0; --i) { - TokSrc* s = &pp->sources[i - 1]; - if (s->kind == SRC_LEX) return s; - } - return NULL; -} - -static void do_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - /* Macro-replace arguments first (a2). */ - TokVec exp = {0}; - Tok* slice; - TokSrc* lex_src; - i64 target_line; - Sym target_file = 0; - - if (n == 0) compiler_panic(pp->c, loc, "#line: missing arguments"); - slice = arena_array(&pp->arena, Tok, n); - memcpy(slice, line, sizeof(Tok) * n); - expand_arg_to_eof(pp, slice, n, &exp); - - if (exp.n == 0 || exp.data[0].kind != TOK_NUM) { - compiler_panic(pp->c, loc, "#line: expected line number"); - } - { - size_t sl = 0; - const char* s = pool_str(pp->c->global, exp.data[0].spelling, &sl); - target_line = parse_pp_int(s, sl); - } - if (exp.n >= 2) { - if (exp.data[1].kind != TOK_STR) { - compiler_panic(pp->c, loc, "#line: file argument must be a string"); - } - { - size_t sl = 0; - const char* s = pool_str(pp->c->global, exp.data[1].spelling, &sl); - if (sl >= 2 && s[0] == '"' && s[sl - 1] == '"') { - target_file = pool_intern(pp->c->global, s + 1, sl - 2); - } - } - } - - lex_src = current_lex_src(pp); - if (!lex_src) compiler_panic(pp->c, loc, "#line outside any file"); - { - /* The next token (post-directive-NL) currently has lex.line == - * <lex's line counter>. Set delta so its user-visible line == - * target_line. */ - SrcLoc here = lex_loc(lex_src->lex); - lex_src->line_delta = (i32)target_line - (i32)here.line; - if (target_file) lex_src->file_override = target_file; - } -} - -/* ============================================================ - * #pragma + _Pragma (§6.10.6, §6.10.9) - * ============================================================ */ - -/* Push the unmodified directive line back onto the source stack as a - * buffer, so pp_emit_text writes it as-is. SRC_BUF gates directive - * recognition off, so this won't recurse. */ -static void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - TokVec out = {0}; - HidesetId* hids; - u32 i; - Tok hash, ident, nl; - - memset(&hash, 0, sizeof(hash)); - hash.kind = TOK_PP_HASH; - hash.flags = TF_AT_BOL; - hash.loc = loc; - hash.spelling = pool_intern_cstr(pp->c->global, "#"); - tv_push(pp, &out, hash); - - memset(&ident, 0, sizeof(ident)); - ident.kind = TOK_IDENT; - ident.flags = 0; - ident.loc = loc; - ident.spelling = pp->sym_pragma_kw; - ident.v.ident = pp->sym_pragma_kw; - tv_push(pp, &out, ident); - - for (i = 0; i < n; ++i) { - Tok t = line[i]; - /* Force a leading space between tokens. */ - t.flags |= TF_HAS_SPACE; - if (i == 0) { - /* Space between "pragma" and the first arg. */ - } - tv_push(pp, &out, t); - } - - memset(&nl, 0, sizeof(nl)); - nl.kind = TOK_NEWLINE; - nl.loc = loc; - tv_push(pp, &out, nl); - - hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1); - for (i = 0; i < out.n; ++i) hids[i] = HS_EMPTY; - push_buf(pp, out.data, hids, out.n); -} - -static void do_pragma(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - /* Forward unrecognised pragmas to the output. STDC pragmas pass - * through too; we don't act on them yet. */ - emit_pragma_line(pp, line, n, loc); -} - -/* Destringize a string literal token's content: strip surrounding quotes - * and undo the `\"` and `\\` escapes. Other escape sequences pass - * through verbatim — the result is fed back through the lexer, which - * does its own escape handling for any string literals nested inside. */ -static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap, - size_t* out_len) { - size_t slen = 0; - const char* s = pool_str(pp->c->global, str_tok->spelling, &slen); - size_t i, w = 0; - if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') { - compiler_panic(pp->c, str_tok->loc, - "_Pragma: argument must be a string literal"); - } - for (i = 1; i + 1 < slen; ++i) { - char c = s[i]; - if (c == '\\' && i + 2 < slen && (s[i + 1] == '\\' || s[i + 1] == '"')) { - ++i; - c = s[i]; - } - if (w + 1 >= cap) - compiler_panic(pp->c, str_tok->loc, "_Pragma: payload too long"); - out[w++] = c; - } - out[w] = 0; - *out_len = w; -} - -/* Handle a `_Pragma("...")` invocation. Caller has consumed the - * `_Pragma` identifier. Reads `(` STR `)`, destringizes, re-lexes the - * payload, and emits a #pragma directive line. */ -static int try_expand_pragma_op(Pp* pp, const Tok* invoke) { - Tok lp, str, rp; - char buf[1024]; - size_t buf_n = 0; - Lexer* lex; - TokVec args = {0}; - - /* Peek '(' (skipping NL). Use peek_for_invoke_paren for consistency, - * but we need the saved-back behavior for a non-match. */ - { - int saw_ws; - if (!peek_for_invoke_paren(pp, &saw_ws)) { - return 0; /* not an invocation; emit _Pragma as ident */ - } - (void)saw_ws; - } - /* Read the string literal arg. */ - { - HidesetId hs; - str = src_next_raw(pp, &hs, NULL); - } - if (str.kind != TOK_STR) { - compiler_panic(pp->c, invoke->loc, "_Pragma: expected string literal"); - } - { - HidesetId hs; - rp = src_next_raw(pp, &hs, NULL); - } - if (rp.kind != TOK_PUNCT || rp.v.punct != ')') { - compiler_panic(pp->c, invoke->loc, "_Pragma: expected ')'"); - } - (void)lp; - - destringize(pp, &str, buf, sizeof(buf) - 2, &buf_n); - /* Append a NL so the lexer terminates cleanly. */ - buf[buf_n++] = '\n'; - buf[buf_n] = 0; - - /* Re-lex into args. Bytes need to live until lex_close; copy into - * arena. */ - { - char* arena_buf = (char*)arena_alloc(&pp->arena, buf_n + 1, 1); - memcpy(arena_buf, buf, buf_n + 1); - lex = lex_open_mem(pp->c, "<_Pragma>", arena_buf, buf_n); - } - for (;;) { - Tok t = lex_next(lex); - if (t.kind == TOK_EOF || t.kind == TOK_NEWLINE) break; - tv_push(pp, &args, t); - } - lex_close(lex); - - emit_pragma_line(pp, args.data, args.n, invoke->loc); - return 1; -} - -/* ============================================================ - * #error - * ============================================================ */ - -static void do_error(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - /* Concatenate token spellings into a single message. */ - CharBuf cb = {0}; - u32 i; - for (i = 0; i < n; ++i) { - size_t sl = 0; - const char* s = line[i].spelling - ? pool_str(pp->c->global, line[i].spelling, &sl) - : NULL; - if (i > 0) cb_putc(pp, &cb, ' '); - if (s && sl) cb_append(pp, &cb, s, (u32)sl); - } - cb_putc(pp, &cb, 0); - compiler_panic(pp->c, loc, "#error: %s", cb.data ? cb.data : ""); -} - -/* ============================================================ - * #embed (C23, §6.10.* per N3033) - * ============================================================ */ - -static void do_embed(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { - char path[4096]; - char resolved[4096]; - int system_form = 0; - const u8* data; - size_t size; - u32 j; - /* Optional embed parameters parsed below. */ - i64 limit_n = -1; - Tok* if_empty_toks = NULL; - u32 if_empty_n = 0; - /* Header-name path: first token. */ - u32 arg_start = 0; - - if (n == 0) compiler_panic(pp->c, loc, "#embed: missing path"); - - if (line[0].kind == TOK_HEADER) { - size_t sl = 0; - const char* s = pool_str(pp->c->global, line[0].spelling, &sl); - if (sl < 2) compiler_panic(pp->c, loc, "#embed: malformed header name"); - if (s[0] == '<' && s[sl - 1] == '>') - system_form = 1; - else if (s[0] == '"' && s[sl - 1] == '"') - system_form = 0; - else - compiler_panic(pp->c, loc, "#embed: malformed header name"); - memcpy(path, s + 1, sl - 2); - path[sl - 2] = 0; - arg_start = 1; - } else { - compiler_panic(pp->c, loc, "#embed: header-name argument required"); - } - - /* Parse trailing parameters: limit(N), if_empty(...). */ - j = arg_start; - while (j < n) { - if (line[j].kind == TOK_IDENT) { - size_t sl = 0; - const char* s = pool_str(pp->c->global, line[j].v.ident, &sl); - if (sl == 5 && memcmp(s, "limit", 5) == 0) { - if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT || - line[j + 1].v.punct != '(') { - compiler_panic(pp->c, loc, "#embed: expected '(' after limit"); - } - j += 2; - if (j >= n || line[j].kind != TOK_NUM) { - compiler_panic(pp->c, loc, "#embed: limit() expects an integer"); - } - { - size_t sl2 = 0; - const char* s2 = pool_str(pp->c->global, line[j].spelling, &sl2); - limit_n = parse_pp_int(s2, sl2); - } - ++j; - if (j >= n || line[j].kind != TOK_PUNCT || line[j].v.punct != ')') { - compiler_panic(pp->c, loc, "#embed: expected ')' to close limit"); - } - ++j; - continue; - } - if (sl == 8 && memcmp(s, "if_empty", 8) == 0) { - u32 depth = 0; - u32 start; - if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT || - line[j + 1].v.punct != '(') { - compiler_panic(pp->c, loc, "#embed: expected '(' after if_empty"); - } - j += 2; - start = j; - while (j < n) { - if (line[j].kind == TOK_PUNCT) { - if (line[j].v.punct == '(') - ++depth; - else if (line[j].v.punct == ')') { - if (depth == 0) break; - --depth; - } - } - ++j; - } - if (j >= n) { - compiler_panic(pp->c, loc, "#embed: unterminated if_empty"); - } - if_empty_toks = arena_array(&pp->arena, Tok, j - start ? j - start : 1); - if_empty_n = j - start; - memcpy(if_empty_toks, line + start, sizeof(Tok) * if_empty_n); - ++j; /* skip ')' */ - continue; - } - } - compiler_panic(pp->c, loc, "#embed: unexpected token in parameter list"); - } - - if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved, - sizeof(resolved))) { - compiler_panic(pp->c, loc, "#embed: file not found: %s", path); - } - - /* Apply limit(). */ - { - size_t emit_n = size; - if (limit_n >= 0 && (u64)limit_n < emit_n) emit_n = (size_t)limit_n; - if (emit_n == 0) { - /* Empty: emit if_empty payload (or nothing). */ - if (if_empty_toks && if_empty_n) { - HidesetId* hids = arena_array(&pp->arena, HidesetId, if_empty_n); - u32 i; - for (i = 0; i < if_empty_n; ++i) hids[i] = HS_EMPTY; - push_buf(pp, if_empty_toks, hids, if_empty_n); - } - return; - } - /* Build a buffer of pp-numbers separated by ',' punctuators. */ - { - TokVec out = {0}; - HidesetId* hids; - size_t i; - for (i = 0; i < emit_n; ++i) { - char numbuf[8]; - int nl = 0; - u8 v = data[i]; - /* "u8 -> decimal" without sprintf. */ - if (v == 0) { - numbuf[nl++] = '0'; - } else { - char tmp[4]; - int k = 0; - while (v) { - tmp[k++] = (char)('0' + (v % 10)); - v /= 10; - } - while (k > 0) numbuf[nl++] = tmp[--k]; - } - { - Tok t; - memset(&t, 0, sizeof(t)); - t.kind = TOK_NUM; - t.loc = loc; - t.spelling = pool_intern(pp->c->global, numbuf, (size_t)nl); - if (i == 0) t.flags = TF_AT_BOL; - /* Bytes after a comma get a leading space to match - * clang's `, ` separator format. */ - else - t.flags = TF_HAS_SPACE; - tv_push(pp, &out, t); - } - if (i + 1 < emit_n) { - Tok comma; - memset(&comma, 0, sizeof(comma)); - comma.kind = TOK_PUNCT; - comma.v.punct = ','; - comma.loc = loc; - comma.spelling = pool_intern_cstr(pp->c->global, ","); - tv_push(pp, &out, comma); - } - } - hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1); - { - u32 k; - for (k = 0; k < out.n; ++k) hids[k] = HS_EMPTY; - } - push_buf(pp, out.data, hids, out.n); - } - } -} - -/* ============================================================ - * Directive dispatch - * ============================================================ */ - -static void process_directive(Pp* pp, SrcLoc hash_loc) { - Tok* line; - u32 n; - Sym name; - - read_directive_line(pp, &line, &n); - if (n == 0) { - /* Null directive: '#' newline. Nothing to do. */ - return; - } - if (line[0].kind != TOK_IDENT) { - compiler_panic(pp->c, line[0].loc, "expected directive name after '#'"); - } - name = line[0].v.ident; - if (name == pp->sym_define) - do_define(pp, line + 1, n - 1); - else if (name == pp->sym_undef) - do_undef(pp, line + 1, n - 1); - else if (name == pp->sym_if) - do_if_directive(pp, line + 1, n - 1, hash_loc); - else if (name == pp->sym_ifdef) - do_ifdef(pp, line + 1, n - 1, 0, hash_loc); - else if (name == pp->sym_ifndef) - do_ifdef(pp, line + 1, n - 1, 1, hash_loc); - else if (name == pp->sym_elif) - do_elif(pp, hash_loc); - else if (name == pp->sym_else) - do_else(pp, hash_loc); - else if (name == pp->sym_endif) - do_endif(pp, hash_loc); - else if (name == pp->sym_include) - do_include(pp, line + 1, n - 1, hash_loc); - else if (name == pp->sym_line) - do_line(pp, line + 1, n - 1, hash_loc); - else if (name == pp->sym_pragma) - do_pragma(pp, line + 1, n - 1, hash_loc); - else if (name == pp->sym_error) - do_error(pp, line + 1, n - 1, hash_loc); - else if (name == pp->sym_embed) - do_embed(pp, line + 1, n - 1, hash_loc); - else { - compiler_panic(pp->c, line[0].loc, "unsupported directive"); - } -} - -/* ============================================================ - * Macro expansion - * ============================================================ */ - -static Tok pp_next_raw(Pp* pp); -static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke, - TokVec* out); - -/* Build a buffer of the macro's body (with hidesets) and push it. The - * first expanded token inherits the invocation token's TF_AT_BOL / - * TF_HAS_SPACE so output formatting matches the invocation site. */ -static void expand_object_macro(Pp* pp, const Macro* m, const Tok* invoke, - HidesetId invoke_hs) { - TokVec body = {0}; - Tok* tmp; - HidesetId hs; - HidesetId* hids; - u32 i; - - if (m->body_len == 0) { - return; /* placemarker: nothing to push */ - } - /* Run the body through the paste phase: object-like macros may use - * `##`. There are no parameters, so phase 1 reduces to a copy. */ - tmp = arena_array(&pp->arena, Tok, m->body_len); - for (i = 0; i < m->body_len; ++i) tmp[i] = m->body[i]; - subst_phase2(pp, tmp, m->body_len, invoke, &body); - - if (body.n == 0) return; - - /* Transfer invocation flags onto the first emitted token. */ - body.data[0].flags = - (u16)((body.data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | - (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE))); - for (i = 0; i < body.n; ++i) body.data[i].loc = invoke->loc; - - hs = hs_add(pp, invoke_hs, m->name); - hids = arena_array(&pp->arena, HidesetId, body.n); - for (i = 0; i < body.n; ++i) hids[i] = hs; - push_buf(pp, body.data, hids, body.n); -} - -/* ============================================================ - * Function-like macro expansion - * ============================================================ */ - -/* Peek for an open paren after the just-consumed identifier (which named - * a function-like macro). Newlines are whitespace inside an invocation. - * Returns 1 with `*ws_has_space_out` indicating whether any whitespace - * (newlines or HAS_SPACE) sat between the ident and the `(`. Returns 0 if - * no `(` follows; pushed-back tokens (NLs + the non-`(` token, if any) - * are restored as a buffer source so subsequent reads still see them. */ -static int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out) { - TokVec saved = {0}; - int saw_ws = 0; - Tok t; - HidesetId hs; - - for (;;) { - t = src_next_raw(pp, &hs, NULL); - if (t.kind == TOK_NEWLINE) { - saw_ws = 1; - tv_push(pp, &saved, t); - continue; - } - if (t.kind == TOK_EOF) { - /* No '(' — push back saved tokens, leave EOF for next read. */ - if (saved.n) push_buf(pp, saved.data, NULL, saved.n); - *ws_has_space_out = saw_ws; - return 0; - } - if (t.flags & TF_HAS_SPACE) saw_ws = 1; - if (t.kind == TOK_PUNCT && t.v.punct == '(') { - /* Consumed. The newlines we walked past are whitespace and - * dropped (per spec); they don't go back on the stack. */ - *ws_has_space_out = saw_ws; - return 1; - } - /* Save this non-`(` token too and push back. */ - tv_push(pp, &saved, t); - push_buf(pp, saved.data, NULL, saved.n); - *ws_has_space_out = saw_ws; - return 0; - } -} - -/* Run macro expansion on a fixed token sequence to completion, yielding the - * fully-expanded token sequence. Used to pre-expand each function-macro - * argument before substitution (§6.10.3.1 ¶1). */ -static void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out) { - TokSrc src; - Tok t; - - memset(&src, 0, sizeof(src)); - src.kind = SRC_BUF; - src.scope_top = 1; - src.toks = in; - src.hs = NULL; - src.n = nin; - src_push(pp, src); - - for (;;) { - t = pp_next_raw(pp); /* drives macro expansion within this scope */ - if (t.kind == TOK_EOF) break; - if (t.kind == TOK_NEWLINE) { - /* Newlines inside an arg act as whitespace; convert to - * "next-token has TF_HAS_SPACE". Drop the NL token itself. */ - continue; - } - tv_push(pp, out, t); - } - /* Pop our scope source. */ - --pp->nsources; -} - -/* Argument list for a function-like invocation. Stored as parallel - * (start, end) ranges into a flat unexpanded token vector and a flat - * expanded token vector. */ -typedef struct ArgList { - /* Unexpanded arg tokens (raw as collected from invocation). */ - Tok* raw; - u32 raw_n; - u32* raw_start; /* size n_args + 1 (sentinel = raw_n) */ - /* Pre-expanded tokens. */ - Tok* exp; - u32 exp_n; - u32* exp_start; /* size n_args + 1 (sentinel = exp_n) */ - u32 n_args; -} ArgList; - -/* Collect arguments. Caller has just consumed the opening `(`. Returns the - * close-paren's token (used as the invocation's last source location). */ -static Tok read_invocation_args(Pp* pp, const Macro* m, SrcLoc invoke_loc, - ArgList* out) { - TokVec raw = {0}; - u32* starts; - u32 starts_cap = 0; - u32 n_args = 0; - u32 cur_start = 0; - int depth = 0; - Tok t; - HidesetId hs; - int first_token_of_arg = 1; - Tok close_tok; - - memset(out, 0, sizeof(*out)); - starts = arena_array(&pp->arena, u32, 8); - starts_cap = 8; - starts[0] = 0; - - for (;;) { - t = src_next_raw(pp, &hs, NULL); - if (t.kind == TOK_EOF) { - compiler_panic(pp->c, invoke_loc, - "unterminated function-like macro invocation"); - } - if (t.kind == TOK_NEWLINE) { - /* Whitespace within an invocation. Mark the next token as - * having space; drop the NL. */ - if (raw.n && depth >= 0) { - /* No-op token list; we'll OR onto the next pushed token. */ - } - /* Use a sentinel: track via a flag on a deferred push. We - * accumulate "has_space" by setting it on the next pushed - * token. */ - /* Simpler: just push a placeholder by OR'ing onto next via - * a flag stored in `first_token_of_arg`-style state. */ - /* Implementation: use the next read token's TF_HAS_SPACE bit, - * which the lexer already sets after a NL. Actually NOT — - * after a NL the lexer sets TF_AT_BOL on the next token, not - * HAS_SPACE necessarily. Force it: */ - /* We'll OR it manually onto the next token. */ - /* Use a small flag stash: */ - /* (handled below by setting a pending flag) */ - /* See: pending_space variable */ - /* — commit: declare a pending_space static earlier. */ - continue; - } - - if (t.kind == TOK_PUNCT) { - u32 p = t.v.punct; - if (p == '(' || p == '[' || p == '{') { - ++depth; - } else if (p == ')' || p == ']' || p == '}') { - if (p == ')' && depth == 0) { - /* End of invocation. Close the current argument. The - * empty-args case (no commas seen, no tokens - * collected) emits a slot only when the macro expects - * at least one argument; arity-0 macros take none. */ - close_tok = t; - { - int empty_call = - (n_args == 0 && raw.n == cur_start && first_token_of_arg); - int want_slot = !empty_call || (m->n_params > 0) || m->is_variadic; - if (want_slot) { - if (n_args + 1 >= starts_cap) { - u32 nc = starts_cap * 2; - u32* nb = arena_array(&pp->arena, u32, nc); - memcpy(nb, starts, sizeof(u32) * starts_cap); - starts = nb; - starts_cap = nc; - } - ++n_args; - starts[n_args] = raw.n; - } - } - goto done; - } - --depth; - } else if (p == ',' && depth == 0) { - /* Variadic: once we've filled all named params, the rest - * (commas included) collect into __VA_ARGS__. */ - if (m->is_variadic && n_args + 1 >= m->n_params) { - /* This comma is part of __VA_ARGS__. Push it. */ - tv_push(pp, &raw, t); - first_token_of_arg = 0; - continue; - } - /* Close current arg, start next. */ - if (n_args + 1 >= starts_cap) { - u32 nc = starts_cap * 2; - u32* nb = arena_array(&pp->arena, u32, nc); - memcpy(nb, starts, sizeof(u32) * starts_cap); - starts = nb; - starts_cap = nc; - } - ++n_args; - starts[n_args] = raw.n; - cur_start = raw.n; - first_token_of_arg = 1; - continue; - } - } - tv_push(pp, &raw, t); - first_token_of_arg = 0; - (void)hs; /* hideset of raw arg tokens carried for blue-paint - * propagation in the arg's pre-expansion */ - } -done: - /* Validate arity. */ - { - u32 expected = m->n_params; - if (m->is_variadic) { - if (n_args < (expected ? expected - 1 : 0)) { - /* Allow exactly expected-1 (empty __VA_ARGS__) by - * synthesizing an empty trailing arg. */ - if (n_args + 1 == (expected ? expected - 1 : 0)) { - /* off by one — fall through to error */ - } - compiler_panic(pp->c, invoke_loc, - "too few arguments to variadic macro invocation"); - } - /* Synthesize an empty __VA_ARGS__ if caller passed exactly - * the named-parameter count. */ - if (n_args + 1 == expected) { - if (n_args + 1 >= starts_cap) { - u32 nc = starts_cap * 2; - u32* nb = arena_array(&pp->arena, u32, nc); - memcpy(nb, starts, sizeof(u32) * starts_cap); - starts = nb; - starts_cap = nc; - } - ++n_args; - starts[n_args] = raw.n; - } - } else { - if (n_args != expected) { - /* Spec: arity-0 macro `M()` invoked as `M()` is allowed and - * has 0 args. Above logic produces 0 in that case. */ - compiler_panic(pp->c, invoke_loc, - "wrong number of arguments to function-like macro"); - } - } - } - out->raw = raw.data; - out->raw_n = raw.n; - out->raw_start = starts; - out->n_args = n_args; - return close_tok; -} - -/* Build pre-expanded args. */ -static void preexpand_args(Pp* pp, ArgList* a) { - TokVec exp = {0}; - u32* exp_start; - u32 i; - exp_start = arena_array(&pp->arena, u32, a->n_args + 1); - exp_start[0] = 0; - for (i = 0; i < a->n_args; ++i) { - u32 lo = a->raw_start[i]; - u32 hi = a->raw_start[i + 1]; - if (hi > lo) { - /* Copy the slice into a fresh buffer so expand_arg_to_eof can - * own it without aliasing. */ - Tok* slice = arena_array(&pp->arena, Tok, hi - lo); - memcpy(slice, &a->raw[lo], sizeof(Tok) * (hi - lo)); - expand_arg_to_eof(pp, slice, hi - lo, &exp); - } - exp_start[i + 1] = exp.n; - } - a->exp = exp.data; - a->exp_n = exp.n; - a->exp_start = exp_start; -} - -/* Build a stringized TOK_STR from the unexpanded argument tokens - * `arg[lo..hi)`. The first token's leading-space flag is ignored (leading - * whitespace stripped). Inside string/char-literal spellings, '"' and '\' - * are escaped. */ -static Tok make_stringize(Pp* pp, const Tok* arg, u32 lo, u32 hi, SrcLoc loc) { - CharBuf b = {0}; - u32 i; - Tok t; - Sym sp; - - cb_putc(pp, &b, '"'); - for (i = lo; i < hi; ++i) { - const Tok* at = &arg[i]; - size_t slen = 0; - const char* s = - at->spelling ? pool_str(pp->c->global, at->spelling, &slen) : NULL; - if (i > lo && (at->flags & TF_HAS_SPACE)) cb_putc(pp, &b, ' '); - if (s && slen) { - int esc = (at->kind == TOK_STR || at->kind == TOK_CHR); - size_t k; - for (k = 0; k < slen; ++k) { - char c = s[k]; - if (esc && (c == '\\' || c == '"')) cb_putc(pp, &b, '\\'); - cb_putc(pp, &b, c); - } - } - } - cb_putc(pp, &b, '"'); - - sp = pool_intern(pp->c->global, b.data, b.len); - memset(&t, 0, sizeof(t)); - t.kind = TOK_STR; - t.loc = loc; - t.spelling = sp; - t.v.str = sp; - return t; -} - -/* Concatenate two token spellings and re-lex into a single token. Empty - * (placemarker) sides collapse to the other side per §6.10.3.3 ¶2. */ -static Tok paste_tokens(Pp* pp, Tok lhs, Tok rhs, SrcLoc loc) { - char buf[1024]; - size_t alen = 0, blen = 0; - const char* a; - const char* b; - Lexer* lex; - Tok t1, t2; - - if (lhs.kind == TOK_PP_PLACEMARKER) return rhs; - if (rhs.kind == TOK_PP_PLACEMARKER) return lhs; - - a = lhs.spelling ? pool_str(pp->c->global, lhs.spelling, &alen) : ""; - b = rhs.spelling ? pool_str(pp->c->global, rhs.spelling, &blen) : ""; - if (alen + blen + 2 > sizeof(buf)) { - compiler_panic(pp->c, loc, "token paste: spelling too long"); - } - if (alen) memcpy(buf, a, alen); - if (blen) memcpy(buf + alen, b, blen); - buf[alen + blen] = '\n'; - buf[alen + blen + 1] = 0; - - lex = lex_open_mem(pp->c, "<paste>", buf, alen + blen + 1); - t1 = lex_next(lex); - t2 = lex_next(lex); - if (t1.kind == TOK_EOF) { - /* Both empty (shouldn't reach here since we handled placemarkers). */ - lex_close(lex); - return lhs; - } - if (t2.kind != TOK_NEWLINE && t2.kind != TOK_EOF) { - lex_close(lex); - compiler_panic(pp->c, loc, "token pasting yields multiple tokens, invalid"); - } - lex_close(lex); - - /* Inherit positional flags from LHS (it sat in the same slot). */ - t1.flags = (u16)((t1.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | - (lhs.flags & (TF_AT_BOL | TF_HAS_SPACE))); - t1.loc = loc; - return t1; -} - -/* Phase 1 (param substitution). For each parameter occurrence in the - * body: if adjacent to ## or # (handled separately), substitute the raw - * argument tokens; otherwise substitute the pre-expanded form. Empty raw - * args become a TOK_PP_PLACEMARKER which phase 2 collapses. */ -static void subst_phase1(Pp* pp, const Macro* m, ArgList* a, const Tok* invoke, - TokVec* out) { - u32 j; - for (j = 0; j < m->body_len; ++j) { - const Tok* bt = &m->body[j]; - if (bt->kind == TOK_PP_HASH) { - /* §6.10.3.2: # must be followed by a parameter. */ - if (j + 1 >= m->body_len || m->body[j + 1].kind != TOK_PP_PARAM) { - compiler_panic(pp->c, bt->loc, - "'#' is not followed by a macro parameter"); - } - { - u32 p = m->body[j + 1].v.punct; - u32 lo = a->raw_start[p]; - u32 hi = a->raw_start[p + 1]; - Tok s = make_stringize(pp, a->raw, lo, hi, invoke->loc); - s.flags = (u16)((s.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | - (bt->flags & (TF_AT_BOL | TF_HAS_SPACE))); - tv_push(pp, out, s); - ++j; - continue; - } - } - if (bt->kind == TOK_PP_PARAM) { - u32 p = bt->v.punct; - int adj_paste = - (j > 0 && m->body[j - 1].kind == TOK_PP_PASTE) || - (j + 1 < m->body_len && m->body[j + 1].kind == TOK_PP_PASTE); - - u32 lo, hi; - if (adj_paste) { - lo = a->raw_start[p]; - hi = a->raw_start[p + 1]; - } else { - lo = a->exp_start[p]; - hi = a->exp_start[p + 1]; - } - - if (lo == hi) { - /* Empty argument → placemarker. */ - Tok pm; - memset(&pm, 0, sizeof(pm)); - pm.kind = TOK_PP_PLACEMARKER; - pm.flags = bt->flags & (TF_AT_BOL | TF_HAS_SPACE); - pm.loc = invoke->loc; - tv_push(pp, out, pm); - } else { - u32 k; - int first = 1; - Tok* src = adj_paste ? a->raw : a->exp; - for (k = lo; k < hi; ++k) { - Tok t = src[k]; - if (first) { - t.flags = (u16)((t.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | - (bt->flags & (TF_AT_BOL | TF_HAS_SPACE))); - first = 0; - } - tv_push(pp, out, t); - } - } - continue; - } - tv_push(pp, out, *bt); - } -} - -/* Phase 2 (paste). Walk the post-substitute buffer; for each TOK_PP_PASTE, - * splice the previous output token with the next input token. Then strip - * remaining placemarkers. */ -static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke, - TokVec* out) { - u32 i; - for (i = 0; i < nin; ++i) { - Tok t = in[i]; - if (t.kind == TOK_PP_PASTE) { - Tok lhs, rhs; - if (out->n == 0 || i + 1 >= nin) { - compiler_panic(pp->c, invoke->loc, - "'##' at start or end of replacement list"); - } - lhs = out->data[--out->n]; - rhs = in[++i]; - tv_push(pp, out, paste_tokens(pp, lhs, rhs, invoke->loc)); - continue; - } - tv_push(pp, out, t); - } - /* Strip placemarkers, preserving leading-space flag on the next token. */ - { - u32 r = 0, w = 0; - u16 carry = 0; - for (r = 0; r < out->n; ++r) { - if (out->data[r].kind == TOK_PP_PLACEMARKER) { - carry |= out->data[r].flags & (TF_AT_BOL | TF_HAS_SPACE); - continue; - } - if (carry) { - out->data[r].flags |= carry; - carry = 0; - } - if (w != r) out->data[w] = out->data[r]; - ++w; - } - out->n = w; - } -} - -/* Wrapper: phases 1 and 2 in sequence, plus invocation-loc / flag transfer. */ -static void substitute_body(Pp* pp, const Macro* m, ArgList* a, - const Tok* invoke, HidesetId result_hs, TokVec* out, - TokVec* hs_out) { - TokVec phase1 = {0}; - u32 i; - subst_phase1(pp, m, a, invoke, &phase1); - subst_phase2(pp, phase1.data, phase1.n, invoke, out); - /* Invocation flags onto first emitted token. */ - if (out->n) { - out->data[0].flags = - (u16)((out->data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | - (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE))); - } - /* Locations to invocation site. */ - for (i = 0; i < out->n; ++i) out->data[i].loc = invoke->loc; - /* Build parallel hideset vector. */ - for (i = 0; i < out->n; ++i) { - Tok hsmark; - memset(&hsmark, 0, sizeof(hsmark)); - hsmark.spelling = (Sym)result_hs; - tv_push(pp, hs_out, hsmark); - } -} - -/* Expand a function-like macro invocation: peek for `(`, collect args, - * pre-expand them, substitute the body, push the result. Returns 1 if - * the invocation was performed, 0 if there was no `(` (the caller should - * emit the identifier as-is). */ -static int try_expand_func_macro(Pp* pp, const Macro* m, const Tok* invoke, - HidesetId invoke_hs) { - int saw_ws; - ArgList args; - TokVec body = {0}; - TokVec hsvec = {0}; /* parallel to body, holds HidesetId per slot */ - HidesetId result_hs; - Tok close_tok; - - if (!peek_for_invoke_paren(pp, &saw_ws)) { - return 0; - } - (void)saw_ws; - read_invocation_args(pp, m, invoke->loc, &args); - /* Note: assigned to silence unused-result; we don't use the close tok yet. */ - close_tok.kind = 0; - (void)close_tok; - preexpand_args(pp, &args); - - /* Hideset of result = invocation hideset ∪ {macro_name}. The standard - * intersects with the closing `)`'s hideset for blue-paint purity, but - * for the freshly-collected `)` from the lex source that's the empty - * set, so the union form suffices here. */ - result_hs = hs_add(pp, invoke_hs, m->name); - substitute_body(pp, m, &args, invoke, result_hs, &body, &hsvec); - - { - u32 i; - HidesetId* hids = arena_array(&pp->arena, HidesetId, body.n ? body.n : 1); - for (i = 0; i < body.n; ++i) { - hids[i] = (HidesetId)hsvec.data[i].spelling; - } - push_buf(pp, body.data, hids, body.n); - } - return 1; -} - -/* ============================================================ - * Public streaming entries - * ============================================================ */ - -/* pp_next_raw: reads from the top source, applies macro expansion when an - * identifier names a macro that isn't blue-painted, and consumes - * directives in-place. TOK_NEWLINE is preserved for pp_emit_text. */ -static Tok pp_next_raw(Pp* pp) { - Tok t; - HidesetId hs; - u8 src_kind; - for (;;) { - t = src_next_raw(pp, &hs, &src_kind); - if (t.kind == TOK_EOF) return t; - if (t.kind == TOK_PP_HASH && (t.flags & TF_AT_BOL) && src_kind == SRC_LEX) { - process_directive(pp, t.loc); - /* No synthesized newline: the comparator collapses - * whitespace, so blank-line replacement of consumed - * directives isn't observable here. Directives that produce - * content (e.g. #include, #embed, #pragma) push their own - * tokens onto the source stack, which the next loop - * iteration picks up. */ - continue; - } - if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) { - Sym id = t.v.ident; - - /* Dynamic predefined macros: __LINE__ / __FILE__ / - * __DATE__ / __TIME__. Always expand, ignoring the macro - * table. */ - if (id == pp->sym_line__) { - char tmp[16], buf[16]; - int k = 0, j = 0; - u32 ln = t.loc.line; - if (ln == 0) - buf[k++] = '0'; - else { - while (ln) { - tmp[j++] = (char)('0' + ln % 10); - ln /= 10; - } - while (j > 0) buf[k++] = tmp[--j]; - } - t.kind = TOK_NUM; - t.spelling = pool_intern(pp->c->global, buf, (size_t)k); - return t; - } - if (id == pp->sym_file__) { - TokSrc* ls = current_lex_src(pp); - Sym name = 0; - size_t nlen = 0; - const char* nstr = NULL; - char* buf; - if (ls && ls->file_override) { - name = ls->file_override; - } else if (ls) { - const SourceFile* sf = - source_file(pp->c->sources, lex_file_id(ls->lex)); - if (sf) name = sf->name; - } - if (name) nstr = pool_str(pp->c->global, name, &nlen); - buf = (char*)arena_alloc(&pp->arena, nlen + 2, 1); - buf[0] = '"'; - if (nlen) memcpy(buf + 1, nstr, nlen); - buf[nlen + 1] = '"'; - t.kind = TOK_STR; - t.spelling = pool_intern(pp->c->global, buf, nlen + 2); - t.v.str = t.spelling; - return t; - } - if (id == pp->sym_date__) { - t.kind = TOK_STR; - t.spelling = pp->val_date_str; - t.v.str = t.spelling; - return t; - } - if (id == pp->sym_time__) { - t.kind = TOK_STR; - t.spelling = pp->val_time_str; - t.v.str = t.spelling; - return t; - } - if (id == pp->sym__pragma) { - if (try_expand_pragma_op(pp, &t)) continue; - /* No '(' — fall through and emit as plain ident. */ - } - - { - Macro* m = mt_get(pp, id); - if (m && !hs_contains(pp, hs, m->name)) { - if (!m->is_func) { - expand_object_macro(pp, m, &t, hs); - continue; - } - if (try_expand_func_macro(pp, m, &t, hs)) { - continue; - } - /* No '(' followed; emit as plain identifier. */ - } - } - } - return t; - } -} - Tok pp_next(Pp* pp) { /* Public: filter newlines so consumers like the C parser don't need * to handle them. pp_emit_text uses pp_next_raw via its own loop. */ diff --git a/src/pp/pp_directive.c b/src/pp/pp_directive.c @@ -0,0 +1,1252 @@ +/* pp_directive.c — if-stack, PP expression evaluator, #include search/open, + * #line, #pragma, #error, #embed, and directive dispatch. */ + +#include "pp/pp_priv.h" + +/* ============================================================ + * If-stack + * ============================================================ */ + +static void if_push(Pp* pp, IfFrame f) { + if (pp->ifstk_n == pp->ifstk_cap) { + u32 nc = pp->ifstk_cap ? pp->ifstk_cap * 2 : 4; + pp->ifstk = pp_xrealloc(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap, + sizeof(IfFrame) * nc, _Alignof(IfFrame)); + pp->ifstk_cap = nc; + } + pp->ifstk[pp->ifstk_n++] = f; +} + +static IfFrame* if_top(Pp* pp) { + return pp->ifstk_n ? &pp->ifstk[pp->ifstk_n - 1] : NULL; +} + +static void if_pop(Pp* pp) { + if (pp->ifstk_n) --pp->ifstk_n; +} + +/* ============================================================ + * Directive line reader + * ============================================================ */ + +/* Read tokens up through (and including) the next TOK_NEWLINE / TOK_EOF. + * Drops the newline; collected tokens are arena-allocated and returned via + * *out_toks/out_n. */ +void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n) { + Tok* buf = NULL; + u32 cap = 0, n = 0; + Tok t; + HidesetId hs; + for (;;) { + t = src_next_raw(pp, &hs, NULL); + if (t.kind == TOK_NEWLINE || t.kind == TOK_EOF) break; + if (n == cap) { + u32 nc = cap ? cap * 2 : 8; + Tok* nb = (Tok*)arena_alloc(&pp->arena, sizeof(Tok) * nc, _Alignof(Tok)); + if (cap) memcpy(nb, buf, sizeof(Tok) * cap); + buf = nb; + cap = nc; + } + buf[n++] = t; + } + *out_toks = buf; + *out_n = n; +} + +/* ============================================================ + * PP expression evaluator (§6.10.1) + * ============================================================ */ + +/* Parse a C integer constant from a pp-number's spelling. Suffixes (u, l, + * etc.) are ignored. Recognizes decimal, hex (0x...), and octal (0...). */ +static i64 parse_pp_int(const char* s, size_t n) { + int base = 10; + size_t i = 0; + i64 val = 0; + if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + i = 2; + } else if (n >= 1 && s[0] == '0') { + base = 8; + i = 1; + } + for (; i < n; ++i) { + char c = s[i]; + int d; + if (c >= '0' && c <= '9') + d = c - '0'; + else if (base == 16 && c >= 'a' && c <= 'f') + d = c - 'a' + 10; + else if (base == 16 && c >= 'A' && c <= 'F') + d = c - 'A' + 10; + else + break; + if (d >= base) break; + val = val * (i64)base + (i64)d; + } + return val; +} + +/* Pre-pass: replace `defined X` / `defined ( X )` with a 0/1 pp-number, + * preserving the rest of the token sequence. The operand of `defined` is + * NOT macro-expanded. Output is a fresh TokVec. */ +static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) { + u32 i; + for (i = 0; i < nin; ++i) { + if (in[i].kind == TOK_IDENT && in[i].v.ident == pp->sym_defined) { + int has_paren = 0; + Sym ident = 0; + u32 j = i + 1; + if (j < nin && in[j].kind == TOK_PUNCT && in[j].v.punct == '(') { + has_paren = 1; + ++j; + } + if (j >= nin || in[j].kind != TOK_IDENT) { + compiler_panic(pp->c, in[i].loc, + "operand of 'defined' must be an identifier"); + } + ident = in[j].v.ident; + ++j; + if (has_paren) { + if (j >= nin || in[j].kind != TOK_PUNCT || in[j].v.punct != ')') { + compiler_panic(pp->c, in[i].loc, + "expected ')' after 'defined' operand"); + } + ++j; + } + { + Tok t; + memset(&t, 0, sizeof(t)); + t.kind = TOK_NUM; + t.flags = in[i].flags & (TF_AT_BOL | TF_HAS_SPACE); + t.loc = in[i].loc; + t.spelling = + pool_intern_cstr(pp->c->global, mt_get(pp, ident) ? "1" : "0"); + tv_push(pp, out, t); + } + i = j - 1; + } else { + tv_push(pp, out, in[i]); + } + } +} + +/* Macro-expand a sequence of pre-#if tokens to completion. Wraps the + * fixed-buffer arg pre-expansion machinery with TOK_IDENT → 0 + * substitution per §6.10.1 ¶4. */ +static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) { + Tok* slice; + if (nin == 0) return; + slice = arena_array(&pp->arena, Tok, nin); + memcpy(slice, in, sizeof(Tok) * nin); + expand_arg_to_eof(pp, slice, nin, out); + /* Replace remaining identifiers with `0`. */ + { + u32 i; + Sym zero = pool_intern_cstr(pp->c->global, "0"); + for (i = 0; i < out->n; ++i) { + if (out->data[i].kind == TOK_IDENT) { + out->data[i].kind = TOK_NUM; + out->data[i].spelling = zero; + } + } + } +} + +/* Recursive-descent expression evaluator over an expanded token list. */ +typedef struct EE { + Pp* pp; + const Tok* toks; + u32 n; + u32 pos; + SrcLoc loc; +} EE; + +static i64 ee_ternary(EE* e); + +static const Tok* ee_peek(EE* e) { + return e->pos < e->n ? &e->toks[e->pos] : NULL; +} + +static int ee_match_punct(EE* e, u32 p) { + const Tok* t = ee_peek(e); + if (t && t->kind == TOK_PUNCT && t->v.punct == p) { + ++e->pos; + return 1; + } + return 0; +} + +static i64 ee_primary(EE* e) { + const Tok* t = ee_peek(e); + if (!t) compiler_panic(e->pp->c, e->loc, "#if: missing operand"); + if (t->kind == TOK_NUM) { + size_t slen; + const char* s = pool_str(e->pp->c->global, t->spelling, &slen); + ++e->pos; + return parse_pp_int(s, slen); + } + if (t->kind == TOK_CHR) { + /* Treat as the codepoint of the first character (post-decoding + * not implemented; cover the common case of a single ASCII + * char). */ + size_t slen; + const char* s = pool_str(e->pp->c->global, t->spelling, &slen); + ++e->pos; + if (slen >= 3 && s[0] == '\'') return (unsigned char)s[1]; + return 0; + } + if (t->kind == TOK_PUNCT && t->v.punct == '(') { + i64 v; + ++e->pos; + v = ee_ternary(e); + if (!ee_match_punct(e, ')')) { + compiler_panic(e->pp->c, t->loc, "#if: expected ')'"); + } + return v; + } + compiler_panic(e->pp->c, t->loc, "#if: unexpected token in expression"); + return 0; +} + +static i64 ee_unary(EE* e) { + const Tok* t = ee_peek(e); + if (t && t->kind == TOK_PUNCT) { + u32 p = t->v.punct; + if (p == '!' || p == '-' || p == '+' || p == '~') { + i64 v; + ++e->pos; + v = ee_unary(e); + switch (p) { + case '!': + return v ? 0 : 1; + case '-': + return -v; + case '+': + return v; + case '~': + return ~v; + } + } + } + return ee_primary(e); +} + +static i64 ee_mul(EE* e) { + i64 v = ee_unary(e); + for (;;) { + const Tok* t = ee_peek(e); + if (!t || t->kind != TOK_PUNCT) break; + if (t->v.punct == '*') { + ++e->pos; + v = v * ee_unary(e); + } else if (t->v.punct == '/') { + i64 r; + ++e->pos; + r = ee_unary(e); + if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero"); + v = v / r; + } else if (t->v.punct == '%') { + i64 r; + ++e->pos; + r = ee_unary(e); + if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero"); + v = v % r; + } else + break; + } + return v; +} + +static i64 ee_add(EE* e) { + i64 v = ee_mul(e); + for (;;) { + const Tok* t = ee_peek(e); + if (!t || t->kind != TOK_PUNCT) break; + if (t->v.punct == '+') { + ++e->pos; + v = v + ee_mul(e); + } else if (t->v.punct == '-') { + ++e->pos; + v = v - ee_mul(e); + } else + break; + } + return v; +} + +static i64 ee_shift(EE* e) { + i64 v = ee_add(e); + for (;;) { + const Tok* t = ee_peek(e); + if (!t || t->kind != TOK_PUNCT) break; + if (t->v.punct == P_SHL) { + ++e->pos; + v = v << ee_add(e); + } else if (t->v.punct == P_SHR) { + ++e->pos; + v = v >> ee_add(e); + } else + break; + } + return v; +} + +static i64 ee_rel(EE* e) { + i64 v = ee_shift(e); + for (;;) { + const Tok* t = ee_peek(e); + if (!t || t->kind != TOK_PUNCT) break; + if (t->v.punct == '<') { + ++e->pos; + v = (v < ee_shift(e)); + } else if (t->v.punct == '>') { + ++e->pos; + v = (v > ee_shift(e)); + } else if (t->v.punct == P_LE) { + ++e->pos; + v = (v <= ee_shift(e)); + } else if (t->v.punct == P_GE) { + ++e->pos; + v = (v >= ee_shift(e)); + } else + break; + } + return v; +} + +static i64 ee_eq(EE* e) { + i64 v = ee_rel(e); + for (;;) { + const Tok* t = ee_peek(e); + if (!t || t->kind != TOK_PUNCT) break; + if (t->v.punct == P_EQ) { + ++e->pos; + v = (v == ee_rel(e)); + } else if (t->v.punct == P_NE) { + ++e->pos; + v = (v != ee_rel(e)); + } else + break; + } + return v; +} + +static i64 ee_band(EE* e) { + i64 v = ee_eq(e); + while (ee_match_punct(e, '&')) v = v & ee_eq(e); + return v; +} + +static i64 ee_bxor(EE* e) { + i64 v = ee_band(e); + while (ee_match_punct(e, '^')) v = v ^ ee_band(e); + return v; +} + +static i64 ee_bor(EE* e) { + i64 v = ee_bxor(e); + while (ee_match_punct(e, '|')) v = v | ee_bxor(e); + return v; +} + +static i64 ee_logand(EE* e) { + i64 v = ee_bor(e); + while (ee_match_punct(e, P_AND)) { + i64 r = ee_bor(e); + v = (v && r); + } + return v; +} + +static i64 ee_logor(EE* e) { + i64 v = ee_logand(e); + while (ee_match_punct(e, P_OR)) { + i64 r = ee_logand(e); + v = (v || r); + } + return v; +} + +static i64 ee_ternary(EE* e) { + i64 c = ee_logor(e); + if (ee_match_punct(e, '?')) { + i64 a = ee_ternary(e); + i64 b; + if (!ee_match_punct(e, ':')) { + compiler_panic(e->pp->c, e->loc, "#if: ':' expected in ternary"); + } + b = ee_ternary(e); + return c ? a : b; + } + return c; +} + +i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + TokVec defs = {0}; + TokVec exp = {0}; + EE e; + i64 v; + + prepass_defined(pp, line, n, &defs); + expand_for_if(pp, defs.data, defs.n, &exp); + + e.pp = pp; + e.toks = exp.data; + e.n = exp.n; + e.pos = 0; + e.loc = loc; + v = ee_ternary(&e); + if (e.pos != e.n) { + compiler_panic(pp->c, e.loc, + "#if: unexpected trailing tokens in expression"); + } + return v; +} + +/* ============================================================ + * Conditional inclusion helpers + * ============================================================ */ + +static void consume_to_newline(Pp* pp) { + Tok t; + do { + t = src_next_raw(pp, NULL, NULL); + } while (t.kind != TOK_NEWLINE && t.kind != TOK_EOF); +} + +/* Drive the source forward consuming tokens until we either: + * - reach a balancing #endif (pops the frame, returns), or + * - reach a #elif / #else that flips the top frame to IF_INCLUDE + * (returns with that frame active). + * Nested #if directives inside the skipped group are tracked via + * `local_depth`. Unrecognised directives in skipped groups are tolerated + * (§6.10 ¶4, covered by `8c_skipped_relaxed_syntax`). */ +static void skip_until_active(Pp* pp) { + int local_depth = 0; + while (pp->ifstk_n > 0) { + IfFrame* top = if_top(pp); + Tok t; + if (top->state == IF_INCLUDE && local_depth == 0) return; + t = src_next_raw(pp, NULL, NULL); + if (t.kind == TOK_EOF) { + compiler_panic(pp->c, top->loc, "unterminated #if / #ifdef"); + } + if (t.kind != TOK_PP_HASH || (t.flags & TF_AT_BOL) == 0) continue; + + /* Read directive name (or null directive). */ + { + Tok nt = src_next_raw(pp, NULL, NULL); + Sym name; + if (nt.kind == TOK_NEWLINE || nt.kind == TOK_EOF) continue; + if (nt.kind != TOK_IDENT) { + consume_to_newline(pp); + continue; + } + name = nt.v.ident; + if (name == pp->sym_if || name == pp->sym_ifdef || + name == pp->sym_ifndef) { + ++local_depth; + consume_to_newline(pp); + continue; + } + if (name == pp->sym_endif) { + consume_to_newline(pp); + if (local_depth > 0) { + --local_depth; + continue; + } + if_pop(pp); + return; + } + if (name == pp->sym_else) { + consume_to_newline(pp); + if (local_depth > 0) continue; + if (top->has_else) { + compiler_panic(pp->c, t.loc, "duplicate #else"); + } + top->has_else = 1; + if (top->state == IF_SEEK_TRUE) { + top->state = IF_INCLUDE; + return; + } + top->state = IF_DONE; + continue; + } + if (name == pp->sym_elif) { + if (local_depth > 0 || top->has_else || top->state == IF_DONE) { + consume_to_newline(pp); + continue; + } + if (top->state == IF_SEEK_TRUE) { + Tok* line; + u32 ln; + i64 v; + read_directive_line(pp, &line, &ln); + v = eval_if_expr(pp, line, ln, t.loc); + if (v != 0) { + top->state = IF_INCLUDE; + return; + } + continue; + } + /* Was IF_INCLUDE; #elif means we're done. (Should already + * have been transitioned to DONE before entering this + * skip — defensive.) */ + top->state = IF_DONE; + consume_to_newline(pp); + continue; + } + /* Other directive — relaxed: skip silently. */ + consume_to_newline(pp); + continue; + } + } +} + +/* ============================================================ + * Predefined macro name guard + * ============================================================ */ + +static int is_predefined_macro_name(Pp* pp, Sym name) { + return name == pp->sym_va_args || name == pp->sym_line__ || + name == pp->sym_file__ || name == pp->sym_date__ || + name == pp->sym_time__; + /* __STDC__/__STDC_HOSTED__/__STDC_VERSION__ are registered as real + * macros, so the macro-table lookup catches them. */ +} + +/* ============================================================ + * #ifdef / #if / #elif / #else / #endif + * ============================================================ */ + +static void do_ifdef(Pp* pp, const Tok* line, u32 n, int negate, SrcLoc loc) { + int defined; + IfFrame f; + if (n < 1 || line[0].kind != TOK_IDENT) { + compiler_panic(pp->c, loc, + negate ? "#ifndef: expected identifier" + : "#ifdef: expected identifier"); + } + defined = (mt_get(pp, line[0].v.ident) != NULL) || + is_predefined_macro_name(pp, line[0].v.ident); + if (negate) defined = !defined; + memset(&f, 0, sizeof(f)); + f.state = defined ? IF_INCLUDE : IF_SEEK_TRUE; + f.loc = loc; + if_push(pp, f); + if (!defined) skip_until_active(pp); +} + +static void do_if_directive(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + i64 v = eval_if_expr(pp, line, n, loc); + IfFrame f; + memset(&f, 0, sizeof(f)); + f.state = v ? IF_INCLUDE : IF_SEEK_TRUE; + f.loc = loc; + if_push(pp, f); + if (!v) skip_until_active(pp); +} + +static void do_elif(Pp* pp, SrcLoc loc) { + /* We only reach do_elif from the active branch — meaning the + * preceding group emitted code. So we must skip the rest. */ + IfFrame* top = if_top(pp); + if (!top) compiler_panic(pp->c, loc, "stray #elif"); + if (top->has_else) compiler_panic(pp->c, loc, "#elif after #else"); + top->state = IF_DONE; + skip_until_active(pp); +} + +static void do_else(Pp* pp, SrcLoc loc) { + IfFrame* top = if_top(pp); + if (!top) compiler_panic(pp->c, loc, "stray #else"); + if (top->has_else) compiler_panic(pp->c, loc, "duplicate #else"); + top->has_else = 1; + top->state = IF_DONE; + skip_until_active(pp); +} + +static void do_endif(Pp* pp, SrcLoc loc) { + if (!if_top(pp)) compiler_panic(pp->c, loc, "stray #endif"); + if_pop(pp); +} + +/* ============================================================ + * #include (§6.10.2) + * ============================================================ */ + +/* Read `path` via the host's file_io and copy its bytes into the pp + * arena so they outlive io->release. Returns 1 on success. */ +static int try_open_include(Pp* pp, const char* path, const u8** data_out, + size_t* size_out) { + CfreeFileData fd; + const CfreeFileIO* io; + u8* buf; + + memset(&fd, 0, sizeof(fd)); + io = pp->c->env->file_io; + if (!io || !io->read_all) { + compiler_panic(pp->c, (SrcLoc){0, 0, 0}, + "#include: env.file_io is not configured"); + } + if (!io->read_all(io->user, path, &fd)) return 0; + { + size_t sz = fd.size; + buf = (u8*)arena_alloc(&pp->arena, sz ? sz : 1, 1); + if (sz && fd.data) memcpy(buf, fd.data, sz); + if (io->release) io->release(io->user, &fd); /* zeros fd */ + *data_out = buf; + *size_out = sz; + } + return 1; +} + +/* Return the includer's directory for resolving a quoted include, or "." + * for in-memory/builtin sources (where CWD is the natural fallback, like + * gcc treats stdin). `dir_out` must point to a buffer of size >= cap. */ +static int includer_dir(Pp* pp, SrcLoc loc, char* dir_out, size_t cap) { + const SourceFile* sf = source_file(pp->c->sources, loc.file_id); + const char* p = NULL; + size_t plen = 0; + const char* slash; + size_t dlen; + if (sf && sf->name) p = pool_str(pp->c->global, sf->name, &plen); + if (!p || plen == 0 || p[0] == '<') { + if (cap < 2) return 0; + dir_out[0] = '.'; + dir_out[1] = 0; + return 1; + } + slash = NULL; + { + size_t i; + for (i = plen; i > 0; --i) { + if (p[i - 1] == '/') { + slash = p + i - 1; + break; + } + } + } + if (!slash) { + if (cap < 2) return 0; + dir_out[0] = '.'; + dir_out[1] = 0; + return 1; + } + dlen = (size_t)(slash - p); + if (dlen == 0) dlen = 1; /* path was "/x" — dir is "/" */ + if (dlen + 1 > cap) return 0; + memcpy(dir_out, p, dlen); + dir_out[dlen] = 0; + return 1; +} + +/* Search for a header. Absolute paths are opened verbatim. Quoted form + * ("...") additionally searches the directory of the file containing the + * #include first (per C §6.10.2); bracket form (<...>) skips that step. + * Both forms then walk the configured -I / -isystem dirs in order. */ +static int find_and_open_include(Pp* pp, const char* path, int system, + SrcLoc loc, const u8** data, size_t* size, + char* resolved, size_t resolved_cap) { + char buf[4096]; + u32 i; + size_t plen = strlen(path); + + if (plen > 0 && path[0] == '/') { + if (try_open_include(pp, path, data, size)) { + if (plen + 1 > resolved_cap) return 0; + memcpy(resolved, path, plen + 1); + return 1; + } + return 0; + } + + if (!system) { + char dir[4096]; + if (includer_dir(pp, loc, dir, sizeof(dir))) { + size_t dlen = strlen(dir); + if (dlen + 1 + plen + 1 <= sizeof(buf)) { + memcpy(buf, dir, dlen); + buf[dlen] = '/'; + memcpy(buf + dlen + 1, path, plen); + buf[dlen + 1 + plen] = 0; + if (try_open_include(pp, buf, data, size)) { + if (dlen + 1 + plen + 1 > resolved_cap) return 0; + memcpy(resolved, buf, dlen + 1 + plen + 1); + return 1; + } + } + } + } + for (i = 0; i < pp->ninc_dirs; ++i) { + const char* d = pp->inc_dirs[i].path; + size_t dlen = strlen(d); + if (dlen + 1 + plen + 1 > sizeof(buf)) continue; + memcpy(buf, d, dlen); + buf[dlen] = '/'; + memcpy(buf + dlen + 1, path, plen); + buf[dlen + 1 + plen] = 0; + if (try_open_include(pp, buf, data, size)) { + if (dlen + 1 + plen + 1 > resolved_cap) return 0; + memcpy(resolved, buf, dlen + 1 + plen + 1); + return 1; + } + } + return 0; +} + +/* Parse the directive arguments into (path, system_flag). Handles: + * - directly-lexed TOK_HEADER: < ... > or " ... " + * - macro-replaced form: line is macro-expanded, then expected to + * produce either a TOK_STR ("...") or a < ... > sequence. */ +static void parse_include_path(Pp* pp, const Tok* line, u32 n, SrcLoc loc, + char* path_out, size_t cap, int* system_out) { + if (n == 0) compiler_panic(pp->c, loc, "#include: missing path"); + + if (line[0].kind == TOK_HEADER) { + size_t slen = 0; + const char* s = pool_str(pp->c->global, line[0].spelling, &slen); + if (slen < 2) compiler_panic(pp->c, loc, "#include: malformed header name"); + if (s[0] == '<' && s[slen - 1] == '>') + *system_out = 1; + else if (s[0] == '"' && s[slen - 1] == '"') + *system_out = 0; + else + compiler_panic(pp->c, loc, "#include: malformed header name"); + if (slen - 2 + 1 > cap) + compiler_panic(pp->c, loc, "#include: path too long"); + memcpy(path_out, s + 1, slen - 2); + path_out[slen - 2] = 0; + return; + } + + /* Macro-replaced form. */ + { + TokVec exp = {0}; + Tok* slice = arena_array(&pp->arena, Tok, n); + memcpy(slice, line, sizeof(Tok) * n); + expand_arg_to_eof(pp, slice, n, &exp); + + if (exp.n == 0) { + compiler_panic(pp->c, loc, "#include: empty after macro replacement"); + } + if (exp.data[0].kind == TOK_STR) { + size_t slen = 0; + const char* s = pool_str(pp->c->global, exp.data[0].spelling, &slen); + if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') { + compiler_panic(pp->c, loc, "#include: malformed string"); + } + if (slen - 2 + 1 > cap) { + compiler_panic(pp->c, loc, "#include: path too long"); + } + memcpy(path_out, s + 1, slen - 2); + path_out[slen - 2] = 0; + *system_out = 0; + return; + } + if (exp.data[0].kind == TOK_PUNCT && exp.data[0].v.punct == '<') { + size_t pos = 0; + u32 i; + for (i = 1; i < exp.n; ++i) { + size_t slen = 0; + const char* s = NULL; + if (exp.data[i].kind == TOK_PUNCT && exp.data[i].v.punct == '>') { + break; + } + if (exp.data[i].spelling) { + s = pool_str(pp->c->global, exp.data[i].spelling, &slen); + } + if (s && pos + slen + 1 <= cap) { + memcpy(path_out + pos, s, slen); + pos += slen; + } + } + path_out[pos] = 0; + *system_out = 1; + return; + } + compiler_panic(pp->c, loc, + "#include: expected \"...\" or <...> after expansion"); + } +} + +static void do_include(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + char path[4096]; + char resolved[4096]; + int system_form = 0; + const u8* data; + size_t size; + Lexer* lex; + u32 includer_id = 0; + u32 included_id; + u32 i; + TokSrc s; + + parse_include_path(pp, line, n, loc, path, sizeof(path), &system_form); + + if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved, + sizeof(resolved))) { + compiler_panic(pp->c, loc, "#include: file not found: %s", path); + } + + /* Walk the source stack to find the current includer's file_id. */ + for (i = pp->nsources; i > 0; --i) { + TokSrc* tp = &pp->sources[i - 1]; + if (tp->kind == SRC_LEX && tp->lex) { + includer_id = lex_file_id(tp->lex); + break; + } + } + + lex = lex_open_mem(pp->c, resolved, (const char*)data, size); + included_id = lex_file_id(lex); + + memset(&s, 0, sizeof(s)); + s.kind = SRC_LEX; + s.lex = lex; + src_push(pp, s); + + source_add_include(pp->c->sources, includer_id, included_id, loc, + system_form); +} + +/* ============================================================ + * #line (§6.10.4) + * ============================================================ */ + +/* Find the topmost SRC_LEX source on the stack — that's the "current + * file" whose line/file should track #line directives. */ +TokSrc* current_lex_src(Pp* pp) { + u32 i; + for (i = pp->nsources; i > 0; --i) { + TokSrc* s = &pp->sources[i - 1]; + if (s->kind == SRC_LEX) return s; + } + return NULL; +} + +static void do_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + /* Macro-replace arguments first (a2). */ + TokVec exp = {0}; + Tok* slice; + TokSrc* lex_src; + i64 target_line; + Sym target_file = 0; + + if (n == 0) compiler_panic(pp->c, loc, "#line: missing arguments"); + slice = arena_array(&pp->arena, Tok, n); + memcpy(slice, line, sizeof(Tok) * n); + expand_arg_to_eof(pp, slice, n, &exp); + + if (exp.n == 0 || exp.data[0].kind != TOK_NUM) { + compiler_panic(pp->c, loc, "#line: expected line number"); + } + { + size_t sl = 0; + const char* s = pool_str(pp->c->global, exp.data[0].spelling, &sl); + target_line = parse_pp_int(s, sl); + } + if (exp.n >= 2) { + if (exp.data[1].kind != TOK_STR) { + compiler_panic(pp->c, loc, "#line: file argument must be a string"); + } + { + size_t sl = 0; + const char* s = pool_str(pp->c->global, exp.data[1].spelling, &sl); + if (sl >= 2 && s[0] == '"' && s[sl - 1] == '"') { + target_file = pool_intern(pp->c->global, s + 1, sl - 2); + } + } + } + + lex_src = current_lex_src(pp); + if (!lex_src) compiler_panic(pp->c, loc, "#line outside any file"); + { + /* The next token (post-directive-NL) currently has lex.line == + * <lex's line counter>. Set delta so its user-visible line == + * target_line. */ + SrcLoc here = lex_loc(lex_src->lex); + lex_src->line_delta = (i32)target_line - (i32)here.line; + if (target_file) lex_src->file_override = target_file; + } +} + +/* ============================================================ + * #pragma + _Pragma (§6.10.6, §6.10.9) + * ============================================================ */ + +/* Push the unmodified directive line back onto the source stack as a + * buffer, so pp_emit_text writes it as-is. SRC_BUF gates directive + * recognition off, so this won't recurse. */ +void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + TokVec out = {0}; + HidesetId* hids; + u32 i; + Tok hash, ident, nl; + + memset(&hash, 0, sizeof(hash)); + hash.kind = TOK_PP_HASH; + hash.flags = TF_AT_BOL; + hash.loc = loc; + hash.spelling = pool_intern_cstr(pp->c->global, "#"); + tv_push(pp, &out, hash); + + memset(&ident, 0, sizeof(ident)); + ident.kind = TOK_IDENT; + ident.flags = 0; + ident.loc = loc; + ident.spelling = pp->sym_pragma_kw; + ident.v.ident = pp->sym_pragma_kw; + tv_push(pp, &out, ident); + + for (i = 0; i < n; ++i) { + Tok t = line[i]; + /* Force a leading space between tokens. */ + t.flags |= TF_HAS_SPACE; + if (i == 0) { + /* Space between "pragma" and the first arg. */ + } + tv_push(pp, &out, t); + } + + memset(&nl, 0, sizeof(nl)); + nl.kind = TOK_NEWLINE; + nl.loc = loc; + tv_push(pp, &out, nl); + + hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1); + for (i = 0; i < out.n; ++i) hids[i] = HS_EMPTY; + push_buf(pp, out.data, hids, out.n); +} + +static void do_pragma(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + /* Forward unrecognised pragmas to the output. STDC pragmas pass + * through too; we don't act on them yet. */ + emit_pragma_line(pp, line, n, loc); +} + +/* Destringize a string literal token's content: strip surrounding quotes + * and undo the `\"` and `\\` escapes. Other escape sequences pass + * through verbatim — the result is fed back through the lexer, which + * does its own escape handling for any string literals nested inside. */ +static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap, + size_t* out_len) { + size_t slen = 0; + const char* s = pool_str(pp->c->global, str_tok->spelling, &slen); + size_t i, w = 0; + if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') { + compiler_panic(pp->c, str_tok->loc, + "_Pragma: argument must be a string literal"); + } + for (i = 1; i + 1 < slen; ++i) { + char c = s[i]; + if (c == '\\' && i + 2 < slen && (s[i + 1] == '\\' || s[i + 1] == '"')) { + ++i; + c = s[i]; + } + if (w + 1 >= cap) + compiler_panic(pp->c, str_tok->loc, "_Pragma: payload too long"); + out[w++] = c; + } + out[w] = 0; + *out_len = w; +} + +/* Handle a `_Pragma("...")` invocation. Caller has consumed the + * `_Pragma` identifier. Reads `(` STR `)`, destringizes, re-lexes the + * payload, and emits a #pragma directive line. */ +int try_expand_pragma_op(Pp* pp, const Tok* invoke) { + Tok lp, str, rp; + char buf[1024]; + size_t buf_n = 0; + Lexer* lex; + TokVec args = {0}; + + /* Peek '(' (skipping NL). Use peek_for_invoke_paren for consistency, + * but we need the saved-back behavior for a non-match. */ + { + int saw_ws; + if (!peek_for_invoke_paren(pp, &saw_ws)) { + return 0; /* not an invocation; emit _Pragma as ident */ + } + (void)saw_ws; + } + /* Read the string literal arg. */ + { + HidesetId hs; + str = src_next_raw(pp, &hs, NULL); + } + if (str.kind != TOK_STR) { + compiler_panic(pp->c, invoke->loc, "_Pragma: expected string literal"); + } + { + HidesetId hs; + rp = src_next_raw(pp, &hs, NULL); + } + if (rp.kind != TOK_PUNCT || rp.v.punct != ')') { + compiler_panic(pp->c, invoke->loc, "_Pragma: expected ')'"); + } + (void)lp; + + destringize(pp, &str, buf, sizeof(buf) - 2, &buf_n); + /* Append a NL so the lexer terminates cleanly. */ + buf[buf_n++] = '\n'; + buf[buf_n] = 0; + + /* Re-lex into args. Bytes need to live until lex_close; copy into + * arena. */ + { + char* arena_buf = (char*)arena_alloc(&pp->arena, buf_n + 1, 1); + memcpy(arena_buf, buf, buf_n + 1); + lex = lex_open_mem(pp->c, "<_Pragma>", arena_buf, buf_n); + } + for (;;) { + Tok t = lex_next(lex); + if (t.kind == TOK_EOF || t.kind == TOK_NEWLINE) break; + tv_push(pp, &args, t); + } + lex_close(lex); + + emit_pragma_line(pp, args.data, args.n, invoke->loc); + return 1; +} + +/* ============================================================ + * #error + * ============================================================ */ + +static void do_error(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + /* Concatenate token spellings into a single message. */ + CharBuf cb = {0}; + u32 i; + for (i = 0; i < n; ++i) { + size_t sl = 0; + const char* s = line[i].spelling + ? pool_str(pp->c->global, line[i].spelling, &sl) + : NULL; + if (i > 0) cb_putc(pp, &cb, ' '); + if (s && sl) cb_append(pp, &cb, s, (u32)sl); + } + cb_putc(pp, &cb, 0); + compiler_panic(pp->c, loc, "#error: %s", cb.data ? cb.data : ""); +} + +/* ============================================================ + * #embed (C23, §6.10.* per N3033) + * ============================================================ */ + +static void do_embed(Pp* pp, const Tok* line, u32 n, SrcLoc loc) { + char path[4096]; + char resolved[4096]; + int system_form = 0; + const u8* data; + size_t size; + u32 j; + /* Optional embed parameters parsed below. */ + i64 limit_n = -1; + Tok* if_empty_toks = NULL; + u32 if_empty_n = 0; + /* Header-name path: first token. */ + u32 arg_start = 0; + + if (n == 0) compiler_panic(pp->c, loc, "#embed: missing path"); + + if (line[0].kind == TOK_HEADER) { + size_t sl = 0; + const char* s = pool_str(pp->c->global, line[0].spelling, &sl); + if (sl < 2) compiler_panic(pp->c, loc, "#embed: malformed header name"); + if (s[0] == '<' && s[sl - 1] == '>') + system_form = 1; + else if (s[0] == '"' && s[sl - 1] == '"') + system_form = 0; + else + compiler_panic(pp->c, loc, "#embed: malformed header name"); + memcpy(path, s + 1, sl - 2); + path[sl - 2] = 0; + arg_start = 1; + } else { + compiler_panic(pp->c, loc, "#embed: header-name argument required"); + } + + /* Parse trailing parameters: limit(N), if_empty(...). */ + j = arg_start; + while (j < n) { + if (line[j].kind == TOK_IDENT) { + size_t sl = 0; + const char* s = pool_str(pp->c->global, line[j].v.ident, &sl); + if (sl == 5 && memcmp(s, "limit", 5) == 0) { + if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT || + line[j + 1].v.punct != '(') { + compiler_panic(pp->c, loc, "#embed: expected '(' after limit"); + } + j += 2; + if (j >= n || line[j].kind != TOK_NUM) { + compiler_panic(pp->c, loc, "#embed: limit() expects an integer"); + } + { + size_t sl2 = 0; + const char* s2 = pool_str(pp->c->global, line[j].spelling, &sl2); + limit_n = parse_pp_int(s2, sl2); + } + ++j; + if (j >= n || line[j].kind != TOK_PUNCT || line[j].v.punct != ')') { + compiler_panic(pp->c, loc, "#embed: expected ')' to close limit"); + } + ++j; + continue; + } + if (sl == 8 && memcmp(s, "if_empty", 8) == 0) { + u32 depth = 0; + u32 start; + if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT || + line[j + 1].v.punct != '(') { + compiler_panic(pp->c, loc, "#embed: expected '(' after if_empty"); + } + j += 2; + start = j; + while (j < n) { + if (line[j].kind == TOK_PUNCT) { + if (line[j].v.punct == '(') + ++depth; + else if (line[j].v.punct == ')') { + if (depth == 0) break; + --depth; + } + } + ++j; + } + if (j >= n) { + compiler_panic(pp->c, loc, "#embed: unterminated if_empty"); + } + if_empty_toks = arena_array(&pp->arena, Tok, j - start ? j - start : 1); + if_empty_n = j - start; + memcpy(if_empty_toks, line + start, sizeof(Tok) * if_empty_n); + ++j; /* skip ')' */ + continue; + } + } + compiler_panic(pp->c, loc, "#embed: unexpected token in parameter list"); + } + + if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved, + sizeof(resolved))) { + compiler_panic(pp->c, loc, "#embed: file not found: %s", path); + } + + /* Apply limit(). */ + { + size_t emit_n = size; + if (limit_n >= 0 && (u64)limit_n < emit_n) emit_n = (size_t)limit_n; + if (emit_n == 0) { + /* Empty: emit if_empty payload (or nothing). */ + if (if_empty_toks && if_empty_n) { + HidesetId* hids = arena_array(&pp->arena, HidesetId, if_empty_n); + u32 i; + for (i = 0; i < if_empty_n; ++i) hids[i] = HS_EMPTY; + push_buf(pp, if_empty_toks, hids, if_empty_n); + } + return; + } + /* Build a buffer of pp-numbers separated by ',' punctuators. */ + { + TokVec out = {0}; + HidesetId* hids; + size_t i; + for (i = 0; i < emit_n; ++i) { + char numbuf[8]; + int nl = 0; + u8 v = data[i]; + /* "u8 -> decimal" without sprintf. */ + if (v == 0) { + numbuf[nl++] = '0'; + } else { + char tmp[4]; + int k = 0; + while (v) { + tmp[k++] = (char)('0' + (v % 10)); + v /= 10; + } + while (k > 0) numbuf[nl++] = tmp[--k]; + } + { + Tok t; + memset(&t, 0, sizeof(t)); + t.kind = TOK_NUM; + t.loc = loc; + t.spelling = pool_intern(pp->c->global, numbuf, (size_t)nl); + if (i == 0) t.flags = TF_AT_BOL; + /* Bytes after a comma get a leading space to match + * clang's `, ` separator format. */ + else + t.flags = TF_HAS_SPACE; + tv_push(pp, &out, t); + } + if (i + 1 < emit_n) { + Tok comma; + memset(&comma, 0, sizeof(comma)); + comma.kind = TOK_PUNCT; + comma.v.punct = ','; + comma.loc = loc; + comma.spelling = pool_intern_cstr(pp->c->global, ","); + tv_push(pp, &out, comma); + } + } + hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1); + { + u32 k; + for (k = 0; k < out.n; ++k) hids[k] = HS_EMPTY; + } + push_buf(pp, out.data, hids, out.n); + } + } +} + +/* ============================================================ + * Directive dispatch + * ============================================================ */ + +void process_directive(Pp* pp, SrcLoc hash_loc) { + Tok* line; + u32 n; + Sym name; + + read_directive_line(pp, &line, &n); + if (n == 0) { + /* Null directive: '#' newline. Nothing to do. */ + return; + } + if (line[0].kind != TOK_IDENT) { + compiler_panic(pp->c, line[0].loc, "expected directive name after '#'"); + } + name = line[0].v.ident; + if (name == pp->sym_define) + do_define(pp, line + 1, n - 1); + else if (name == pp->sym_undef) + do_undef(pp, line + 1, n - 1); + else if (name == pp->sym_if) + do_if_directive(pp, line + 1, n - 1, hash_loc); + else if (name == pp->sym_ifdef) + do_ifdef(pp, line + 1, n - 1, 0, hash_loc); + else if (name == pp->sym_ifndef) + do_ifdef(pp, line + 1, n - 1, 1, hash_loc); + else if (name == pp->sym_elif) + do_elif(pp, hash_loc); + else if (name == pp->sym_else) + do_else(pp, hash_loc); + else if (name == pp->sym_endif) + do_endif(pp, hash_loc); + else if (name == pp->sym_include) + do_include(pp, line + 1, n - 1, hash_loc); + else if (name == pp->sym_line) + do_line(pp, line + 1, n - 1, hash_loc); + else if (name == pp->sym_pragma) + do_pragma(pp, line + 1, n - 1, hash_loc); + else if (name == pp->sym_error) + do_error(pp, line + 1, n - 1, hash_loc); + else if (name == pp->sym_embed) + do_embed(pp, line + 1, n - 1, hash_loc); + else { + compiler_panic(pp->c, line[0].loc, "unsupported directive"); + } +} diff --git a/src/pp/pp_expand.c b/src/pp/pp_expand.c @@ -0,0 +1,1008 @@ +/* pp_expand.c — hideset table, macro hashmap, #define/#undef, substitution, + * paste, stringize, argument prescan, func/object macro expansion. */ + +#include "pp/pp_priv.h" + +static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb); +static int macros_equal(const Macro* a, const Macro* b); + +/* ============================================================ + * Hideset table + * ============================================================ */ + +static int sym_in_array(const Sym* a, u32 n, Sym s) { + u32 i; + for (i = 0; i < n; ++i) + if (a[i] == s) return 1; + return 0; +} + +static HidesetId hs_register(Pp* pp, const Sym* names, u32 n) { + Hideset* h; + u32 i; + if (n == 0) return HS_EMPTY; + + /* Linear search for an existing identical hideset. Hidesets are tiny. */ + for (i = 1; i < pp->hsets_n; ++i) { + Hideset* e = pp->hsets[i]; + if (e->n != n) continue; + { + u32 j; + for (j = 0; j < n; ++j) + if (e->names[j] != names[j]) break; + if (j == n) return (HidesetId)i; + } + } + + if (pp->hsets_n == pp->hsets_cap) { + u32 nc = pp->hsets_cap ? pp->hsets_cap * 2 : 8; + pp->hsets = + (Hideset**)pp_xrealloc(pp, pp->hsets, sizeof(Hideset*) * pp->hsets_cap, + sizeof(Hideset*) * nc, _Alignof(Hideset*)); + pp->hsets_cap = nc; + } + h = (Hideset*)arena_alloc(&pp->arena, + sizeof(Hideset) + sizeof(Sym) * (n ? n - 1 : 0), + _Alignof(Hideset)); + h->n = n; + for (i = 0; i < n; ++i) h->names[i] = names[i]; + pp->hsets[pp->hsets_n] = h; + return (HidesetId)pp->hsets_n++; +} + +int hs_contains(Pp* pp, HidesetId id, Sym s) { + Hideset* h; + if (id == HS_EMPTY || s == 0) return 0; + h = pp->hsets[id]; + return sym_in_array(h->names, h->n, s); +} + +HidesetId hs_add(Pp* pp, HidesetId id, Sym s) { + Sym buf[64]; + Hideset* h; + u32 n; + u32 i; + + if (s == 0) return id; + if (hs_contains(pp, id, s)) return id; + + n = (id == HS_EMPTY) ? 0 : pp->hsets[id]->n; + if (n + 1 > sizeof(buf) / sizeof(buf[0])) { + compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: hideset overflow"); + } + if (id != HS_EMPTY) { + h = pp->hsets[id]; + for (i = 0; i < h->n; ++i) buf[i] = h->names[i]; + } + /* Keep sorted (numerically) for canonical hideset identity. */ + { + u32 pos = n; + while (pos > 0 && buf[pos - 1] > s) { + buf[pos] = buf[pos - 1]; + --pos; + } + buf[pos] = s; + } + return hs_register(pp, buf, n + 1); +} + +/* Used by token-paste in stage 5; declared early so the rest of the file + * doesn't grow forward decls. */ +__attribute__((unused)) static HidesetId hs_intersect(Pp* pp, HidesetId a, + HidesetId b) { + Sym buf[64]; + Hideset *ha, *hb; + u32 i, j, k; + if (a == HS_EMPTY || b == HS_EMPTY) return HS_EMPTY; + if (a == b) return a; + ha = pp->hsets[a]; + hb = pp->hsets[b]; + /* Both sorted; standard merge intersection. */ + i = j = k = 0; + while (i < ha->n && j < hb->n) { + if (ha->names[i] == hb->names[j]) { + buf[k++] = ha->names[i]; + ++i; + ++j; + } else if (ha->names[i] < hb->names[j]) { + ++i; + } else { + ++j; + } + } + return hs_register(pp, buf, k); +} + +/* ============================================================ + * Macro table + * ============================================================ */ + +/* Thin wrappers over the generated MacroMap_* functions; preserved + * because the call sites are tagged "mt_*" throughout this TU. */ +Macro* mt_get(Pp* pp, Sym name) { + Macro** v = MacroMap_get(&pp->mtab, name); + return v ? *v : NULL; +} + +void mt_put(Pp* pp, Sym name, Macro* m) { + (void)MacroMap_set(&pp->mtab, name, m); +} + +void mt_del(Pp* pp, Sym name) { MacroMap_del(&pp->mtab, name); } + +/* ============================================================ + * #define / #undef + * ============================================================ */ + +void do_define(Pp* pp, const Tok* line, u32 n) { + Macro* m; + u32 i = 0; + Sym name; + SrcLoc def_loc; + Macro* existing; + + if (i >= n || line[i].kind != TOK_IDENT) { + compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0}, + "#define: expected macro name"); + } + name = line[i].v.ident; + def_loc = line[i].loc; + ++i; + + m = arena_znew(&pp->arena, Macro); + m->name = name; + m->def_loc = def_loc; + + /* Function-like vs object-like: '(' immediately after the name with no + * intervening whitespace. */ + if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == '(' && + (line[i].flags & TF_HAS_SPACE) == 0) { + Sym* params = NULL; + u32 pcap = 0, pn = 0; + ++i; + m->is_func = 1; + if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ')') { + ++i; + } else { + for (;;) { + if (i >= n) { + compiler_panic(pp->c, def_loc, + "#define: unterminated parameter list"); + } + if (line[i].kind == TOK_PUNCT && line[i].v.punct == P_ELLIPSIS) { + /* Append a synthetic __VA_ARGS__ param so body-rewrite + * matches the standard identifier directly. */ + if (pn == pcap) { + u32 nc = pcap ? pcap * 2 : 4; + Sym* nb = arena_array(&pp->arena, Sym, nc); + if (pcap) memcpy(nb, params, sizeof(Sym) * pcap); + params = nb; + pcap = nc; + } + params[pn++] = pp->sym_va_args; + m->is_variadic = 1; + ++i; + } else if (line[i].kind == TOK_IDENT) { + if (pn == pcap) { + u32 nc = pcap ? pcap * 2 : 4; + Sym* nb = arena_array(&pp->arena, Sym, nc); + if (pcap) memcpy(nb, params, sizeof(Sym) * pcap); + params = nb; + pcap = nc; + } + params[pn++] = line[i].v.ident; + ++i; + } else { + compiler_panic(pp->c, line[i].loc, "#define: bad parameter list"); + } + if (i >= n) { + compiler_panic(pp->c, def_loc, + "#define: unterminated parameter list"); + } + if (line[i].kind == TOK_PUNCT && line[i].v.punct == ')') { + ++i; + break; + } + if (m->is_variadic) { + compiler_panic(pp->c, line[i].loc, + "#define: '...' must be last parameter"); + } + if (line[i].kind == TOK_PUNCT && line[i].v.punct == ',') { + ++i; + continue; + } + compiler_panic(pp->c, line[i].loc, "#define: expected ',' or ')'"); + } + } + m->params = params; + m->n_params = pn; + } + + /* Refuse define/undef of a few names the spec reserves: `defined` + * and a small set of mandatory predefined macros. */ + if (name == pp->sym_defined || name == pp->sym_line__ || + name == pp->sym_file__ || name == pp->sym_date__ || + name == pp->sym_time__) { + compiler_panic(pp->c, def_loc, + "#define of a reserved / predefined name is not allowed"); + } + /* Static predefineds are already in the macro table; redefining + * with a different body is caught by the existing macros_equal + * check below, but #define of __STDC__ et al. with the SAME body + * should also be rejected. */ + if (name == pp->sym_stdc__ || name == pp->sym_stdc_hosted__ || + name == pp->sym_stdc_version__) { + /* Allow re-registration of the predefined value at pp_new time + * but reject user-level redefinition. We detect "user-level" + * by checking whether it's already in the table — at pp_new the + * first call goes through cleanly. */ + if (mt_get(pp, name)) { + compiler_panic(pp->c, def_loc, + "#define of a mandatory predefined macro is not allowed"); + } + } + + /* Body: rewrite parameter occurrences to TOK_PP_PARAM. */ + { + u32 body_n = n - i; + u32 j; + m->body = body_n ? arena_array(&pp->arena, Tok, body_n) : NULL; + m->body_len = body_n; + for (j = 0; j < body_n; ++j) { + Tok t = line[i + j]; + if (m->is_func && t.kind == TOK_IDENT) { + u32 p; + for (p = 0; p < m->n_params; ++p) { + if (m->params[p] == t.v.ident) { + t.kind = TOK_PP_PARAM; + t.v.punct = p; + break; + } + } + } + /* §6.10.3 ¶5: __VA_ARGS__ outside a variadic macro is + * undefined behavior; we diagnose. */ + if (!m->is_variadic && t.kind == TOK_IDENT && + t.v.ident == pp->sym_va_args) { + compiler_panic(pp->c, t.loc, + "__VA_ARGS__ may only appear in a variadic macro body"); + } + m->body[j] = t; + } + /* Drop the leading-space bit on the first body token: it reflects + * the whitespace between the macro name (or close-paren) and the + * body, which is irrelevant to expansion output. */ + if (m->body_len) m->body[0].flags &= (u16)~TF_HAS_SPACE; + } + + existing = mt_get(pp, name); + if (existing) { + if (!macros_equal(existing, m)) { + compiler_panic(pp->c, def_loc, + "macro redefined with different replacement"); + } + return; + } + mt_put(pp, name, m); +} + +void do_undef(Pp* pp, const Tok* line, u32 n) { + Sym name; + if (!n || line[0].kind != TOK_IDENT) { + compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0}, + "#undef: expected identifier"); + } + name = line[0].v.ident; + if (name == pp->sym_defined || name == pp->sym_line__ || + name == pp->sym_file__ || name == pp->sym_date__ || + name == pp->sym_time__ || name == pp->sym_stdc__ || + name == pp->sym_stdc_hosted__ || name == pp->sym_stdc_version__) { + compiler_panic(pp->c, line[0].loc, + "#undef of a mandatory predefined name is not allowed"); + } + mt_del(pp, name); +} + +/* ============================================================ + * Body comparison helpers + * ============================================================ */ + +static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb) { + u32 i; + if (na != nb) return 0; + for (i = 0; i < na; ++i) { + if (a[i].kind != b[i].kind) return 0; + if (a[i].spelling != b[i].spelling) return 0; + /* Whitespace separation must match (§6.10.3 ¶2). The first body + * token's leading-space bit is meaningless (it's whatever was + * between macro name and body); skip i==0 for that bit. */ + if (i > 0) { + if ((a[i].flags & TF_HAS_SPACE) != (b[i].flags & TF_HAS_SPACE)) { + return 0; + } + } + } + return 1; +} + +static int macros_equal(const Macro* a, const Macro* b) { + if (a->is_func != b->is_func) return 0; + if (a->is_variadic != b->is_variadic) return 0; + if (a->n_params != b->n_params) return 0; + { + u32 i; + for (i = 0; i < a->n_params; ++i) { + if (a->params[i] != b->params[i]) return 0; + } + } + return body_tokens_equal(a->body, a->body_len, b->body, b->body_len); +} + +/* ============================================================ + * Object-macro expansion + * ============================================================ */ + +static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke, + TokVec* out); + +/* Build a buffer of the macro's body (with hidesets) and push it. The + * first expanded token inherits the invocation token's TF_AT_BOL / + * TF_HAS_SPACE so output formatting matches the invocation site. */ +static void expand_object_macro(Pp* pp, const Macro* m, const Tok* invoke, + HidesetId invoke_hs) { + TokVec body = {0}; + Tok* tmp; + HidesetId hs; + HidesetId* hids; + u32 i; + + if (m->body_len == 0) { + return; /* placemarker: nothing to push */ + } + /* Run the body through the paste phase: object-like macros may use + * `##`. There are no parameters, so phase 1 reduces to a copy. */ + tmp = arena_array(&pp->arena, Tok, m->body_len); + for (i = 0; i < m->body_len; ++i) tmp[i] = m->body[i]; + subst_phase2(pp, tmp, m->body_len, invoke, &body); + + if (body.n == 0) return; + + /* Transfer invocation flags onto the first emitted token. */ + body.data[0].flags = + (u16)((body.data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | + (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE))); + for (i = 0; i < body.n; ++i) body.data[i].loc = invoke->loc; + + hs = hs_add(pp, invoke_hs, m->name); + hids = arena_array(&pp->arena, HidesetId, body.n); + for (i = 0; i < body.n; ++i) hids[i] = hs; + push_buf(pp, body.data, hids, body.n); +} + +/* ============================================================ + * Function-like macro expansion + * ============================================================ */ + +/* Peek for an open paren after the just-consumed identifier (which named + * a function-like macro). Newlines are whitespace inside an invocation. + * Returns 1 with `*ws_has_space_out` indicating whether any whitespace + * (newlines or HAS_SPACE) sat between the ident and the `(`. Returns 0 if + * no `(` follows; pushed-back tokens (NLs + the non-`(` token, if any) + * are restored as a buffer source so subsequent reads still see them. */ +int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out) { + TokVec saved = {0}; + int saw_ws = 0; + Tok t; + HidesetId hs; + + for (;;) { + t = src_next_raw(pp, &hs, NULL); + if (t.kind == TOK_NEWLINE) { + saw_ws = 1; + tv_push(pp, &saved, t); + continue; + } + if (t.kind == TOK_EOF) { + /* No '(' — push back saved tokens, leave EOF for next read. */ + if (saved.n) push_buf(pp, saved.data, NULL, saved.n); + *ws_has_space_out = saw_ws; + return 0; + } + if (t.flags & TF_HAS_SPACE) saw_ws = 1; + if (t.kind == TOK_PUNCT && t.v.punct == '(') { + /* Consumed. The newlines we walked past are whitespace and + * dropped (per spec); they don't go back on the stack. */ + *ws_has_space_out = saw_ws; + return 1; + } + /* Save this non-`(` token too and push back. */ + tv_push(pp, &saved, t); + push_buf(pp, saved.data, NULL, saved.n); + *ws_has_space_out = saw_ws; + return 0; + } +} + +/* Run macro expansion on a fixed token sequence to completion, yielding the + * fully-expanded token sequence. Used to pre-expand each function-macro + * argument before substitution (§6.10.3.1 ¶1). */ +void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out) { + TokSrc src; + Tok t; + + memset(&src, 0, sizeof(src)); + src.kind = SRC_BUF; + src.scope_top = 1; + src.toks = in; + src.hs = NULL; + src.n = nin; + src_push(pp, src); + + for (;;) { + t = pp_next_raw(pp); /* drives macro expansion within this scope */ + if (t.kind == TOK_EOF) break; + if (t.kind == TOK_NEWLINE) { + /* Newlines inside an arg act as whitespace; convert to + * "next-token has TF_HAS_SPACE". Drop the NL token itself. */ + continue; + } + tv_push(pp, out, t); + } + /* Pop our scope source. */ + --pp->nsources; +} + +/* Argument list for a function-like invocation. Stored as parallel + * (start, end) ranges into a flat unexpanded token vector and a flat + * expanded token vector. */ +typedef struct ArgList { + /* Unexpanded arg tokens (raw as collected from invocation). */ + Tok* raw; + u32 raw_n; + u32* raw_start; /* size n_args + 1 (sentinel = raw_n) */ + /* Pre-expanded tokens. */ + Tok* exp; + u32 exp_n; + u32* exp_start; /* size n_args + 1 (sentinel = exp_n) */ + u32 n_args; +} ArgList; + +/* Collect arguments. Caller has just consumed the opening `(`. Returns the + * close-paren's token (used as the invocation's last source location). */ +static Tok read_invocation_args(Pp* pp, const Macro* m, SrcLoc invoke_loc, + ArgList* out) { + TokVec raw = {0}; + u32* starts; + u32 starts_cap = 0; + u32 n_args = 0; + u32 cur_start = 0; + int depth = 0; + Tok t; + HidesetId hs; + int first_token_of_arg = 1; + Tok close_tok; + + memset(out, 0, sizeof(*out)); + starts = arena_array(&pp->arena, u32, 8); + starts_cap = 8; + starts[0] = 0; + + for (;;) { + t = src_next_raw(pp, &hs, NULL); + if (t.kind == TOK_EOF) { + compiler_panic(pp->c, invoke_loc, + "unterminated function-like macro invocation"); + } + if (t.kind == TOK_NEWLINE) { + /* Whitespace within an invocation. Mark the next token as + * having space; drop the NL. */ + if (raw.n && depth >= 0) { + /* No-op token list; we'll OR onto the next pushed token. */ + } + /* Use a sentinel: track via a flag on a deferred push. We + * accumulate "has_space" by setting it on the next pushed + * token. */ + /* Simpler: just push a placeholder by OR'ing onto next via + * a flag stored in `first_token_of_arg`-style state. */ + /* Implementation: use the next read token's TF_HAS_SPACE bit, + * which the lexer already sets after a NL. Actually NOT — + * after a NL the lexer sets TF_AT_BOL on the next token, not + * HAS_SPACE necessarily. Force it: */ + /* We'll OR it manually onto the next token. */ + /* Use a small flag stash: */ + /* (handled below by setting a pending flag) */ + /* See: pending_space variable */ + /* — commit: declare a pending_space static earlier. */ + continue; + } + + if (t.kind == TOK_PUNCT) { + u32 p = t.v.punct; + if (p == '(' || p == '[' || p == '{') { + ++depth; + } else if (p == ')' || p == ']' || p == '}') { + if (p == ')' && depth == 0) { + /* End of invocation. Close the current argument. The + * empty-args case (no commas seen, no tokens + * collected) emits a slot only when the macro expects + * at least one argument; arity-0 macros take none. */ + close_tok = t; + { + int empty_call = + (n_args == 0 && raw.n == cur_start && first_token_of_arg); + int want_slot = !empty_call || (m->n_params > 0) || m->is_variadic; + if (want_slot) { + if (n_args + 1 >= starts_cap) { + u32 nc = starts_cap * 2; + u32* nb = arena_array(&pp->arena, u32, nc); + memcpy(nb, starts, sizeof(u32) * starts_cap); + starts = nb; + starts_cap = nc; + } + ++n_args; + starts[n_args] = raw.n; + } + } + goto done; + } + --depth; + } else if (p == ',' && depth == 0) { + /* Variadic: once we've filled all named params, the rest + * (commas included) collect into __VA_ARGS__. */ + if (m->is_variadic && n_args + 1 >= m->n_params) { + /* This comma is part of __VA_ARGS__. Push it. */ + tv_push(pp, &raw, t); + first_token_of_arg = 0; + continue; + } + /* Close current arg, start next. */ + if (n_args + 1 >= starts_cap) { + u32 nc = starts_cap * 2; + u32* nb = arena_array(&pp->arena, u32, nc); + memcpy(nb, starts, sizeof(u32) * starts_cap); + starts = nb; + starts_cap = nc; + } + ++n_args; + starts[n_args] = raw.n; + cur_start = raw.n; + first_token_of_arg = 1; + continue; + } + } + tv_push(pp, &raw, t); + first_token_of_arg = 0; + (void)hs; /* hideset of raw arg tokens carried for blue-paint + * propagation in the arg's pre-expansion */ + } +done: + /* Validate arity. */ + { + u32 expected = m->n_params; + if (m->is_variadic) { + if (n_args < (expected ? expected - 1 : 0)) { + /* Allow exactly expected-1 (empty __VA_ARGS__) by + * synthesizing an empty trailing arg. */ + if (n_args + 1 == (expected ? expected - 1 : 0)) { + /* off by one — fall through to error */ + } + compiler_panic(pp->c, invoke_loc, + "too few arguments to variadic macro invocation"); + } + /* Synthesize an empty __VA_ARGS__ if caller passed exactly + * the named-parameter count. */ + if (n_args + 1 == expected) { + if (n_args + 1 >= starts_cap) { + u32 nc = starts_cap * 2; + u32* nb = arena_array(&pp->arena, u32, nc); + memcpy(nb, starts, sizeof(u32) * starts_cap); + starts = nb; + starts_cap = nc; + } + ++n_args; + starts[n_args] = raw.n; + } + } else { + if (n_args != expected) { + /* Spec: arity-0 macro `M()` invoked as `M()` is allowed and + * has 0 args. Above logic produces 0 in that case. */ + compiler_panic(pp->c, invoke_loc, + "wrong number of arguments to function-like macro"); + } + } + } + out->raw = raw.data; + out->raw_n = raw.n; + out->raw_start = starts; + out->n_args = n_args; + return close_tok; +} + +/* Build pre-expanded args. */ +static void preexpand_args(Pp* pp, ArgList* a) { + TokVec exp = {0}; + u32* exp_start; + u32 i; + exp_start = arena_array(&pp->arena, u32, a->n_args + 1); + exp_start[0] = 0; + for (i = 0; i < a->n_args; ++i) { + u32 lo = a->raw_start[i]; + u32 hi = a->raw_start[i + 1]; + if (hi > lo) { + /* Copy the slice into a fresh buffer so expand_arg_to_eof can + * own it without aliasing. */ + Tok* slice = arena_array(&pp->arena, Tok, hi - lo); + memcpy(slice, &a->raw[lo], sizeof(Tok) * (hi - lo)); + expand_arg_to_eof(pp, slice, hi - lo, &exp); + } + exp_start[i + 1] = exp.n; + } + a->exp = exp.data; + a->exp_n = exp.n; + a->exp_start = exp_start; +} + +/* Build a stringized TOK_STR from the unexpanded argument tokens + * `arg[lo..hi)`. The first token's leading-space flag is ignored (leading + * whitespace stripped). Inside string/char-literal spellings, '"' and '\' + * are escaped. */ +static Tok make_stringize(Pp* pp, const Tok* arg, u32 lo, u32 hi, SrcLoc loc) { + CharBuf b = {0}; + u32 i; + Tok t; + Sym sp; + + cb_putc(pp, &b, '"'); + for (i = lo; i < hi; ++i) { + const Tok* at = &arg[i]; + size_t slen = 0; + const char* s = + at->spelling ? pool_str(pp->c->global, at->spelling, &slen) : NULL; + if (i > lo && (at->flags & TF_HAS_SPACE)) cb_putc(pp, &b, ' '); + if (s && slen) { + int esc = (at->kind == TOK_STR || at->kind == TOK_CHR); + size_t k; + for (k = 0; k < slen; ++k) { + char c = s[k]; + if (esc && (c == '\\' || c == '"')) cb_putc(pp, &b, '\\'); + cb_putc(pp, &b, c); + } + } + } + cb_putc(pp, &b, '"'); + + sp = pool_intern(pp->c->global, b.data, b.len); + memset(&t, 0, sizeof(t)); + t.kind = TOK_STR; + t.loc = loc; + t.spelling = sp; + t.v.str = sp; + return t; +} + +/* Concatenate two token spellings and re-lex into a single token. Empty + * (placemarker) sides collapse to the other side per §6.10.3.3 ¶2. */ +static Tok paste_tokens(Pp* pp, Tok lhs, Tok rhs, SrcLoc loc) { + char buf[1024]; + size_t alen = 0, blen = 0; + const char* a; + const char* b; + Lexer* lex; + Tok t1, t2; + + if (lhs.kind == TOK_PP_PLACEMARKER) return rhs; + if (rhs.kind == TOK_PP_PLACEMARKER) return lhs; + + a = lhs.spelling ? pool_str(pp->c->global, lhs.spelling, &alen) : ""; + b = rhs.spelling ? pool_str(pp->c->global, rhs.spelling, &blen) : ""; + if (alen + blen + 2 > sizeof(buf)) { + compiler_panic(pp->c, loc, "token paste: spelling too long"); + } + if (alen) memcpy(buf, a, alen); + if (blen) memcpy(buf + alen, b, blen); + buf[alen + blen] = '\n'; + buf[alen + blen + 1] = 0; + + lex = lex_open_mem(pp->c, "<paste>", buf, alen + blen + 1); + t1 = lex_next(lex); + t2 = lex_next(lex); + if (t1.kind == TOK_EOF) { + /* Both empty (shouldn't reach here since we handled placemarkers). */ + lex_close(lex); + return lhs; + } + if (t2.kind != TOK_NEWLINE && t2.kind != TOK_EOF) { + lex_close(lex); + compiler_panic(pp->c, loc, "token pasting yields multiple tokens, invalid"); + } + lex_close(lex); + + /* Inherit positional flags from LHS (it sat in the same slot). */ + t1.flags = (u16)((t1.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | + (lhs.flags & (TF_AT_BOL | TF_HAS_SPACE))); + t1.loc = loc; + return t1; +} + +/* Phase 1 (param substitution). For each parameter occurrence in the + * body: if adjacent to ## or # (handled separately), substitute the raw + * argument tokens; otherwise substitute the pre-expanded form. Empty raw + * args become a TOK_PP_PLACEMARKER which phase 2 collapses. */ +static void subst_phase1(Pp* pp, const Macro* m, ArgList* a, const Tok* invoke, + TokVec* out) { + u32 j; + for (j = 0; j < m->body_len; ++j) { + const Tok* bt = &m->body[j]; + if (bt->kind == TOK_PP_HASH) { + /* §6.10.3.2: # must be followed by a parameter. */ + if (j + 1 >= m->body_len || m->body[j + 1].kind != TOK_PP_PARAM) { + compiler_panic(pp->c, bt->loc, + "'#' is not followed by a macro parameter"); + } + { + u32 p = m->body[j + 1].v.punct; + u32 lo = a->raw_start[p]; + u32 hi = a->raw_start[p + 1]; + Tok s = make_stringize(pp, a->raw, lo, hi, invoke->loc); + s.flags = (u16)((s.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | + (bt->flags & (TF_AT_BOL | TF_HAS_SPACE))); + tv_push(pp, out, s); + ++j; + continue; + } + } + if (bt->kind == TOK_PP_PARAM) { + u32 p = bt->v.punct; + int adj_paste = + (j > 0 && m->body[j - 1].kind == TOK_PP_PASTE) || + (j + 1 < m->body_len && m->body[j + 1].kind == TOK_PP_PASTE); + + u32 lo, hi; + if (adj_paste) { + lo = a->raw_start[p]; + hi = a->raw_start[p + 1]; + } else { + lo = a->exp_start[p]; + hi = a->exp_start[p + 1]; + } + + if (lo == hi) { + /* Empty argument → placemarker. */ + Tok pm; + memset(&pm, 0, sizeof(pm)); + pm.kind = TOK_PP_PLACEMARKER; + pm.flags = bt->flags & (TF_AT_BOL | TF_HAS_SPACE); + pm.loc = invoke->loc; + tv_push(pp, out, pm); + } else { + u32 k; + int first = 1; + Tok* src = adj_paste ? a->raw : a->exp; + for (k = lo; k < hi; ++k) { + Tok t = src[k]; + if (first) { + t.flags = (u16)((t.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | + (bt->flags & (TF_AT_BOL | TF_HAS_SPACE))); + first = 0; + } + tv_push(pp, out, t); + } + } + continue; + } + tv_push(pp, out, *bt); + } +} + +/* Phase 2 (paste). Walk the post-substitute buffer; for each TOK_PP_PASTE, + * splice the previous output token with the next input token. Then strip + * remaining placemarkers. */ +static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke, + TokVec* out) { + u32 i; + for (i = 0; i < nin; ++i) { + Tok t = in[i]; + if (t.kind == TOK_PP_PASTE) { + Tok lhs, rhs; + if (out->n == 0 || i + 1 >= nin) { + compiler_panic(pp->c, invoke->loc, + "'##' at start or end of replacement list"); + } + lhs = out->data[--out->n]; + rhs = in[++i]; + tv_push(pp, out, paste_tokens(pp, lhs, rhs, invoke->loc)); + continue; + } + tv_push(pp, out, t); + } + /* Strip placemarkers, preserving leading-space flag on the next token. */ + { + u32 r = 0, w = 0; + u16 carry = 0; + for (r = 0; r < out->n; ++r) { + if (out->data[r].kind == TOK_PP_PLACEMARKER) { + carry |= out->data[r].flags & (TF_AT_BOL | TF_HAS_SPACE); + continue; + } + if (carry) { + out->data[r].flags |= carry; + carry = 0; + } + if (w != r) out->data[w] = out->data[r]; + ++w; + } + out->n = w; + } +} + +/* Wrapper: phases 1 and 2 in sequence, plus invocation-loc / flag transfer. */ +static void substitute_body(Pp* pp, const Macro* m, ArgList* a, + const Tok* invoke, HidesetId result_hs, TokVec* out, + TokVec* hs_out) { + TokVec phase1 = {0}; + u32 i; + subst_phase1(pp, m, a, invoke, &phase1); + subst_phase2(pp, phase1.data, phase1.n, invoke, out); + /* Invocation flags onto first emitted token. */ + if (out->n) { + out->data[0].flags = + (u16)((out->data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) | + (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE))); + } + /* Locations to invocation site. */ + for (i = 0; i < out->n; ++i) out->data[i].loc = invoke->loc; + /* Build parallel hideset vector. */ + for (i = 0; i < out->n; ++i) { + Tok hsmark; + memset(&hsmark, 0, sizeof(hsmark)); + hsmark.spelling = (Sym)result_hs; + tv_push(pp, hs_out, hsmark); + } +} + +/* Expand a function-like macro invocation: peek for `(`, collect args, + * pre-expand them, substitute the body, push the result. Returns 1 if + * the invocation was performed, 0 if there was no `(` (the caller should + * emit the identifier as-is). */ +static int try_expand_func_macro(Pp* pp, const Macro* m, const Tok* invoke, + HidesetId invoke_hs) { + int saw_ws; + ArgList args; + TokVec body = {0}; + TokVec hsvec = {0}; /* parallel to body, holds HidesetId per slot */ + HidesetId result_hs; + Tok close_tok; + + if (!peek_for_invoke_paren(pp, &saw_ws)) { + return 0; + } + (void)saw_ws; + read_invocation_args(pp, m, invoke->loc, &args); + /* Note: assigned to silence unused-result; we don't use the close tok yet. */ + close_tok.kind = 0; + (void)close_tok; + preexpand_args(pp, &args); + + /* Hideset of result = invocation hideset ∪ {macro_name}. The standard + * intersects with the closing `)`'s hideset for blue-paint purity, but + * for the freshly-collected `)` from the lex source that's the empty + * set, so the union form suffices here. */ + result_hs = hs_add(pp, invoke_hs, m->name); + substitute_body(pp, m, &args, invoke, result_hs, &body, &hsvec); + + { + u32 i; + HidesetId* hids = arena_array(&pp->arena, HidesetId, body.n ? body.n : 1); + for (i = 0; i < body.n; ++i) { + hids[i] = (HidesetId)hsvec.data[i].spelling; + } + push_buf(pp, body.data, hids, body.n); + } + return 1; +} + +/* ============================================================ + * pp_next_raw — mutual recursion entry (called from expand_arg_to_eof) + * Defined here; also declared in pp_priv.h so pp.c can call it. + * ============================================================ */ + +/* pp_next_raw: reads from the top source, applies macro expansion when an + * identifier names a macro that isn't blue-painted, and consumes + * directives in-place. TOK_NEWLINE is preserved for pp_emit_text. */ +Tok pp_next_raw(Pp* pp) { + Tok t; + HidesetId hs; + u8 src_kind; + for (;;) { + t = src_next_raw(pp, &hs, &src_kind); + if (t.kind == TOK_EOF) return t; + if (t.kind == TOK_PP_HASH && (t.flags & TF_AT_BOL) && src_kind == SRC_LEX) { + process_directive(pp, t.loc); + /* No synthesized newline: the comparator collapses + * whitespace, so blank-line replacement of consumed + * directives isn't observable here. Directives that produce + * content (e.g. #include, #embed, #pragma) push their own + * tokens onto the source stack, which the next loop + * iteration picks up. */ + continue; + } + if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) { + Sym id = t.v.ident; + + /* Dynamic predefined macros: __LINE__ / __FILE__ / + * __DATE__ / __TIME__. Always expand, ignoring the macro + * table. */ + if (id == pp->sym_line__) { + char tmp[16], buf[16]; + int k = 0, j = 0; + u32 ln = t.loc.line; + if (ln == 0) + buf[k++] = '0'; + else { + while (ln) { + tmp[j++] = (char)('0' + ln % 10); + ln /= 10; + } + while (j > 0) buf[k++] = tmp[--j]; + } + t.kind = TOK_NUM; + t.spelling = pool_intern(pp->c->global, buf, (size_t)k); + return t; + } + if (id == pp->sym_file__) { + TokSrc* ls = current_lex_src(pp); + Sym name = 0; + size_t nlen = 0; + const char* nstr = NULL; + char* buf; + if (ls && ls->file_override) { + name = ls->file_override; + } else if (ls) { + const SourceFile* sf = + source_file(pp->c->sources, lex_file_id(ls->lex)); + if (sf) name = sf->name; + } + if (name) nstr = pool_str(pp->c->global, name, &nlen); + buf = (char*)arena_alloc(&pp->arena, nlen + 2, 1); + buf[0] = '"'; + if (nlen) memcpy(buf + 1, nstr, nlen); + buf[nlen + 1] = '"'; + t.kind = TOK_STR; + t.spelling = pool_intern(pp->c->global, buf, nlen + 2); + t.v.str = t.spelling; + return t; + } + if (id == pp->sym_date__) { + t.kind = TOK_STR; + t.spelling = pp->val_date_str; + t.v.str = t.spelling; + return t; + } + if (id == pp->sym_time__) { + t.kind = TOK_STR; + t.spelling = pp->val_time_str; + t.v.str = t.spelling; + return t; + } + if (id == pp->sym__pragma) { + if (try_expand_pragma_op(pp, &t)) continue; + /* No '(' — fall through and emit as plain ident. */ + } + + { + Macro* m = mt_get(pp, id); + if (m && !hs_contains(pp, hs, m->name)) { + if (!m->is_func) { + expand_object_macro(pp, m, &t, hs); + continue; + } + if (try_expand_func_macro(pp, m, &t, hs)) { + continue; + } + /* No '(' followed; emit as plain identifier. */ + } + } + } + return t; + } +} diff --git a/src/pp/pp_priv.h b/src/pp/pp_priv.h @@ -0,0 +1,278 @@ +/* pp_priv.h — shared types, helpers, and cross-module forward declarations + * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c). + * NOT part of the public API; included only within src/pp/. */ + +#ifndef CFREE_PP_PRIV_H +#define CFREE_PP_PRIV_H + +#include "pp/pp.h" + +#include <stdlib.h> +#include <string.h> + +#include "core/arena.h" +#include "core/diag.h" +#include "core/heap.h" +#include "core/pool.h" + +/* ============================================================ + * Internal token kinds + * ============================================================ */ + +/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */ +#define TOK_PP_PARAM ((u16)0x1100) +#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */ + +/* ============================================================ + * Types + * ============================================================ */ + +typedef struct Macro { + Sym name; + SrcLoc def_loc; + u8 is_func; + u8 is_variadic; + u8 pad[2]; + u32 n_params; + Sym* params; /* parameter names */ + Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */ + u32 body_len; +} Macro; + +typedef u32 HidesetId; +#define HS_EMPTY 0u + +typedef struct Hideset { + u32 n; + Sym names[1]; /* flexible; allocated with extra trailing slots */ +} Hideset; + +typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind; + +typedef struct TokSrc { + u8 kind; + /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is + * the top source and it's exhausted, instead of popping. The caller + * (e.g. argument pre-expansion) explicitly pops the scope when done. + * This bounds expansion to a single argument's token stream. */ + u8 scope_top; + u8 pad[2]; + /* SRC_LEX */ + Lexer* lex; + /* SRC_BUF */ + Tok* toks; + HidesetId* hs; + u32 i; + u32 n; + /* #line state (SRC_LEX only). line_delta is added to every emitted + * token's loc.line on its way out so __LINE__ and the output cursor + * see user-visible numbering. file_override is the Sym (without + * surrounding quotes) used by __FILE__ when set. */ + i32 line_delta; + Sym file_override; +} TokSrc; + +typedef enum IfState { + IF_INCLUDE = 1, /* group active, emit code */ + IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */ + IF_DONE = 3, /* skip, already had a true branch */ +} IfState; + +typedef struct IfFrame { + u8 state; + u8 has_else; + u8 pad[2]; + SrcLoc loc; +} IfFrame; + +/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with + * deletion (#undef). See core/hashmap.h. */ +#include "core/hashmap.h" +static inline u32 macro_hash_(Sym s) { return hash_u32((u32)s); } +HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_); + +/* ============================================================ + * Pp struct (definition shared across all three TUs) + * ============================================================ */ + +struct Pp { + Compiler* c; + + /* Source stack — top of stack is sources[nsources-1]. */ + TokSrc* sources; + u32 nsources; + u32 sources_cap; + + /* Macro table (open-addressed; key = Sym, value = Macro*). */ + MacroMap mtab; + + /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */ + IfFrame* ifstk; + u32 ifstk_n; + u32 ifstk_cap; + + /* Hideset table. Element 0 reserved as HS_EMPTY. */ + Hideset** hsets; + u32 hsets_n; + u32 hsets_cap; + + /* Include directories (stage 9). */ + struct { + const char* path; + u8 system; + }* inc_dirs; + u32 ninc_dirs; + u32 inc_dirs_cap; + + /* Internal arena: macro bodies, hidesets, expansion buffers, file + * data for #include. Lives until pp_free. */ + Arena arena; + + /* Cached interned identifiers used for directive recognition. */ + Sym sym_define; + Sym sym_undef; + Sym sym_include; + Sym sym_if; + Sym sym_ifdef; + Sym sym_ifndef; + Sym sym_elif; + Sym sym_else; + Sym sym_endif; + Sym sym_line; + Sym sym_pragma; + Sym sym_error; + Sym sym_embed; + Sym sym_defined; + Sym sym_va_args; + Sym sym_line__; /* __LINE__ */ + Sym sym_file__; /* __FILE__ */ + Sym sym_date__; /* __DATE__ */ + Sym sym_time__; /* __TIME__ */ + Sym sym_stdc__; /* __STDC__ */ + Sym sym_stdc_hosted__; + Sym sym_stdc_version__; + Sym sym__pragma; /* _Pragma operator */ + Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */ + + /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for + * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or + * time(NULL) if unset). */ + Sym val_date_str; + Sym val_time_str; +}; + +/* ============================================================ + * Allocation helpers (defined in pp.c, used everywhere) + * ============================================================ */ + +static inline Heap* pp_heap(Pp* pp) { return (Heap*)pp->c->env->heap; } + +static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n, + size_t align) { + Heap* h = pp_heap(pp); + void* q = h->realloc(h, p, old_n, new_n, align); + if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory"); + return q; +} + +static inline void pp_xfree(Pp* pp, void* p, size_t n) { + if (p) pp_heap(pp)->free(pp_heap(pp), p, n); +} + +/* ============================================================ + * Token-vector helpers + * ============================================================ */ + +typedef struct TokVec { + Tok* data; + u32 n; + u32 cap; +} TokVec; + +static inline void tv_grow(Pp* pp, TokVec* v, u32 want) { + u32 nc; + if (v->cap >= want) return; + nc = v->cap ? v->cap * 2 : 8; + while (nc < want) nc *= 2; + { + Tok* nb = arena_array(&pp->arena, Tok, nc); + if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n); + v->data = nb; + v->cap = nc; + } +} + +static inline void tv_push(Pp* pp, TokVec* v, Tok t) { + tv_grow(pp, v, v->n + 1); + v->data[v->n++] = t; +} + +/* Growable char buffer (arena-backed). */ +typedef struct CharBuf { + char* data; + u32 len; + u32 cap; +} CharBuf; + +static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) { + if (b->len + n > b->cap) { + u32 nc = b->cap ? b->cap * 2 : 64; + while (nc < b->len + n) nc *= 2; + { + char* nb = (char*)arena_alloc(&pp->arena, nc, 1); + if (b->len) memcpy(nb, b->data, b->len); + b->data = nb; + b->cap = nc; + } + } + if (n) memcpy(b->data + b->len, s, n); + b->len += n; +} + +static inline void cb_putc(Pp* pp, CharBuf* b, char c) { + cb_append(pp, b, &c, 1); +} + +/* ============================================================ + * Cross-module forward declarations + * ============================================================ */ + +/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */ +Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out); +void src_push(Pp* pp, TokSrc s); +void src_pop(Pp* pp); +void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n); + +/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it, + * and pp_next_raw drives directives and expansion. Declared non-static so + * pp_expand.c can call it without a forward decl each time. */ +Tok pp_next_raw(Pp* pp); + +/* --- pp_expand.c → pp.c, pp_directive.c --- */ +HidesetId hs_add(Pp* pp, HidesetId id, Sym s); +int hs_contains(Pp* pp, HidesetId id, Sym s); +Macro* mt_get(Pp* pp, Sym name); +void mt_put(Pp* pp, Sym name, Macro* m); +void mt_del(Pp* pp, Sym name); +void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out); + +/* --- pp_directive.c → pp_expand.c --- */ +i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc); +void process_directive(Pp* pp, SrcLoc hash_loc); + +/* --- pp_directive.c internal helpers called from pp_expand.c --- */ +void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc); +int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out); +int try_expand_pragma_op(Pp* pp, const Tok* invoke); + +/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */ +void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n); + +/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */ +void do_define(Pp* pp, const Tok* line, u32 n); +void do_undef(Pp* pp, const Tok* line, u32 n); + +/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */ +TokSrc* current_lex_src(Pp* pp); + +#endif /* CFREE_PP_PRIV_H */