commit f0897759c641e776198bfe031f1286489ab58934
parent 049d0f0ae42e920aa7f5a997dbc23fbe53fef7c0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 15:13:50 -0700
arch/x64: codegen for Linux ELF — A–D spine + most of E–H, P, Q
Replaces the all-panic x64 CGTarget skeleton with a single-pass backend
mirroring aa64/rv64. SysV AMD64 ABI classifier upgraded from
indirect-everything to real scalar DIRECT (aggregates ≤16B in INT parts,
larger sret/byval). Runs under qemu-x86_64 via the existing podman path.
test/cg E result (both opt-levels): 230 pass / 154 fail / 0 skip.
Full sweep of Groups A, B, C, D, E, H, P, Q + most of F and G; alloca,
varargs, atomics, intrinsics, TLS, globals, bitfields, indirect call
still stubbed and tracked in doc/X64.md.
Diffstat:
| A | doc/X64.md | | | 206 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/abi/abi_sysv_x64.c | | | 102 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
| M | src/arch/x64.c | | | 2043 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- |
| A | src/arch/x64_isa.h | | | 75 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 2172 insertions(+), 254 deletions(-)
diff --git a/doc/X64.md b/doc/X64.md
@@ -0,0 +1,206 @@
+# X64 codegen status
+
+Living checklist for the x86_64 (SysV AMD64, Linux ELF) backend
+(`src/arch/x64.c`) and ABI (`src/abi/abi_sysv_x64.c`). Behavioral
+oracles are `test/cg/` and (later) `test/parse/`. Phase status:
+
+- ✅ landed
+- 🚧 in progress
+- ⬜ planned
+
+---
+
+## Test cg coverage
+
+Targeted scope: the **A–D spine** plus enough core ops (FP, sret,
+byval, scalar conversions, locals + indirect load/store, structured
+control flow, multi-function) that several follow-on groups
+incidentally pass. Path D (in-process JIT) is skipped on a non-x64
+host — the harness reports SKIP. Path E (qemu/podman exec) runs
+under `qemu-x86_64`.
+
+Current full-corpus result on x64 ELF, both opt-levels:
+**E: 230 pass / 154 fail / 0 skip.**
+
+| Group | Status | Notes |
+|-------|--------|-------|
+| MC-only | n/a | `mc_smoke` is aa64-bytes-only (excluded by arch mask) |
+| A — lifecycle | ✅ | a01–a10 |
+| B — params/locals | ✅ | b01–b08, including sret (b06), byval (b07), FP param via xmm0 (b08), and 9-int-param stack spill (b03) |
+| C — int arith | ✅ | c01–c12 |
+| D — cmp/branch | ✅ | d01–d13 |
+| E — conversions | ✅ | e01–e15 (SEXT/ZEXT/TRUNC/ITOF/FTOI/FEXT/FTRUNC/BITCAST) |
+| F — memory | ✅ except bitfields | f01–f11; 🚧 f12/f13 bitfields |
+| G — calls | ✅ except indirect | g02–g13; 🚧 g01 indirect call via reg (the synthesized fnptr type fails to classify cleanly through the stub ABI — TODO when callee.cls/type plumbing settles) |
+| H — control | ✅ | h01–h18 — SCOPE_LOOP / SCOPE_BLOCK bookkeeping suffices for while / do / for / switch / ternary |
+| I — alloca | ⬜ | needs `max_outgoing` patch site (placeholder ADD) and SP-from-RBP epilogue restoration |
+| J — varargs | ⬜ | needs SysV `__va_list_tag` GP/FP save areas in the prologue |
+| K — atomics | ⬜ | LOCK XADD / CMPXCHG / MFENCE family |
+| L — intrinsics | ⬜ | POPCNT / BSF / BSR / BSWAP / memcpy ABI lowering |
+| N — TLS LE | ⬜ | `mov rd, fs:0` + 32-bit TPOFF32 displacement |
+| O — globals | ⬜ except o11 | RIP-relative addressing for OPK_GLOBAL load/store/addr-of; o11 already passes because it only renames the text section |
+| P — DWARF | ✅ exit-code | p01–p07 pass on the value oracle; the W-path DWARF directives still depend on the stubbed `cfree_dwarf_*` consumers (same as on aa64/rv64) |
+| Q — multi-fn | ✅ except q11 | q01–q10 pass; 🚧 q11 needs `addr_of` for OPK_GLOBAL |
+
+---
+
+## Phase 1 — Backend foundation ✅
+
+- ✅ Register pools: 13 int (rbx, r12..r15 callee-saved first, then
+ r10, r11, rsi, rdi, rcx, rdx, r8, r9 caller-saved); 10 FP (xmm6..xmm15)
+- ✅ Frame layout: rbp-relative locals at negative offsets; callee-save
+ area immediately below; outgoing args at `[rsp+0]` (16-aligned)
+- ✅ Prologue placeholder + func_end patch (mirrors aa64 / rv64)
+- ✅ Epilogue: restore callee-saves, `leave; ret`
+- ✅ MCEmitter fixup encodings already cover `R_PC32` for branches and
+ PC-relative calls/jumps; no new fixup kinds needed for Groups A–D
+
+## Phase 2 — Core ops ✅
+
+- ✅ `load_imm`: 1B `mov r8, imm8`-via-MOV (32-bit) or `MOVABS` (64-bit)
+- ✅ `copy`, `load`, `store` (i8/i16/i32/i64 + float/double)
+- ✅ `addr_of` for OPK_LOCAL (`lea rd, [rbp - off]`)
+- ✅ `binop` (int): ADD/SUB/IMUL via reg-reg; SDIV/UDIV/SREM/UREM via
+ CQO/CDQ + IDIV/DIV; AND/OR/XOR; SHL/SHR/SAR via `cl`
+- ✅ `unop` (NEG/BNOT/NOT-as-`!`)
+- ✅ `cmp` (materialize 0/1 via SETcc + MOVZX)
+- ✅ `cmp_branch` (CMP + Jcc rel32, R_PC32 fixup to MCLabel)
+- ✅ Structured `SCOPE_IF` / `else`; `SCOPE_LOOP` / `SCOPE_BLOCK`
+ (label bookkeeping only — caller drives `label_place`/`jump`)
+- ✅ Calls (direct via `call rel32` + R_X64_PLT32; indirect via `call rax`)
+- ✅ Returns: scalar in rax / xmm0; multi-instruction `jmp epilogue`
+- ✅ Sret skeleton: incoming rdi spilled to a hidden slot at func_begin;
+ the ret-indirect path memcpys src→[rdi] before branching to epilogue
+- ✅ FP scalar: `addss/addsd`, `cvtss2sd/cvtsd2ss`, `cvtsi2sd`, `cvttsd2si`,
+ `movd/movq` for BITCAST, `movss/movsd` for load/store/copy
+- ✅ FP `load_const` via a fresh `.rodata` symbol + RIP-relative load
+- ✅ `convert`: SEXT (`movsx`/`movsxd`), ZEXT (`movzx`/zero high), TRUNC
+ (no-op — narrower stores select width), FP↔int (CVTSI2S*/CVTTS*2SI),
+ FEXT/FTRUNC (CVTSS2SD/CVTSD2SS), BITCAST (movd/movq)
+
+## Phase 3 — Remaining cg coverage ⬜
+
+- ⬜ Aggregate ops: `copy_bytes`, `set_bytes`, bitfields
+- ⬜ Calls: byval (b07), large struct byval (g08), HFA edges (rejected via
+ ABI fallback to INDIRECT)
+- ⬜ Group H: SCOPE_LOOP/BLOCK with `break_to`/`continue_to` exercised by
+ while/for/do-while/switch
+- ⬜ Group I: alloca (constant + runtime size, max_outgoing patch site)
+- ⬜ Group J: varargs (SysV `__va_list_tag` + gp/fp save areas)
+- ⬜ Group K: atomics (LOCK XADD / CMPXCHG / MFENCE)
+- ⬜ Group L: intrinsics (popcnt / bsf / bsr / bswap / memcpy ABI calls)
+- ⬜ Group N: TLS LE — `mov rd, fs:0` + 32-bit TPOFF32 displacement
+- ⬜ Group O: globals via RIP-relative addressing
+- ⬜ Group P: DWARF line/subprogram (driven by Debug; backend forwards locs)
+
+## Phase 4 — opt-cgtarget equivalence ⬜
+
+- ⬜ Confirm L1/L2 (opt-wrapped) cg paths match L0 on the spine
+- ⬜ Same equivalence on the full corpus once Phase 3 lands
+
+## Phase 5 — test-parse on x64 ⬜
+
+Same pattern as rv64 phase 5 — `test/parse/` is the file-driven C
+parser harness. Plan: run `CFREE_TEST_ARCH=x64 make test-parse` after
+Phase 3 stabilizes and triage failures, then mirror RV64's per-case
+opt-out scheme for arch-specific cases.
+
+---
+
+## Open follow-ups
+
+- Caller-saved register spilling around calls. The current pool hands
+ out caller-saved regs only after callee-saved are exhausted; cases
+ that hold a caller-saved reg live across a call (heavy register
+ pressure with a call in the middle) will mis-execute. The corpus
+ used to be designed so the first-allocated reg is callee-saved
+ (g11_caller_saved_live_across_call), but this is fragile — the
+ full Phase 3 plan tracks an explicit "live across call" annotation.
+- Variadic FP register save area. Today the prologue spills only
+ int arg regs because varargs aren't reached; the save layout has
+ to mirror the SysV `__va_list_tag` once Group J lands.
+- CFI directives are no-ops (debug.h's CFI fanout is unwired across
+ arches at present); revisit when `.eh_frame` lands.
+
+
+
+### Currently failing cg tests
+
+ f12_bitfield_unsigned/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/f12_bitfield_unsigned/emit.err)
+ f13_bitfield_signed/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/f13_bitfield_signed/emit.err)
+ g01_indirect_call/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/g01_indirect_call/emit.err)
+ i01_alloca_const_int/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i01_alloca_const_int/emit.err)
+ i02_alloca_runtime_size/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i02_alloca_runtime_size/emit.err)
+ i03_alloca_align_16/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i03_alloca_align_16/emit.err)
+ i04_alloca_in_loop_distinct/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i04_alloca_in_loop_distinct/emit.err)
+ i05_alloca_then_call/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i05_alloca_then_call/emit.err)
+ i06_two_allocas_disjoint/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i06_two_allocas_disjoint/emit.err)
+ i07_alloca_addr_escapes/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i07_alloca_addr_escapes/emit.err)
+ i08_vla_param_sum/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i08_vla_param_sum/emit.err)
+ i09_alloca_preserves_locals/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i09_alloca_preserves_locals/emit.err)
+ i10_alloca_after_named_local/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/i10_alloca_after_named_local/emit.err)
+ j01_va_int_sum_3/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j01_va_int_sum_3/emit.err)
+ j02_va_zero_args/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j02_va_zero_args/emit.err)
+ j03_va_int_spill/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j03_va_int_spill/emit.err)
+ j04_va_int64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j04_va_int64/emit.err)
+ j05_va_double_sum/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j05_va_double_sum/emit.err)
+ j06_va_double_spill/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j06_va_double_spill/emit.err)
+ j07_va_mixed_int_dbl/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j07_va_mixed_int_dbl/emit.err)
+ j08_va_copy/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j08_va_copy/emit.err)
+ j09_va_two_fixed/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/j09_va_two_fixed/emit.err)
+ k01_atomic_load_relaxed/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k01_atomic_load_relaxed/emit.err)
+ k02_atomic_store_load_acq/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k02_atomic_store_load_acq/emit.err)
+ k03_atomic_load_seq_cst/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k03_atomic_load_seq_cst/emit.err)
+ k04_atomic_rmw_add/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k04_atomic_rmw_add/emit.err)
+ k05_atomic_rmw_xchg/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k05_atomic_rmw_xchg/emit.err)
+ k06_atomic_rmw_and/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k06_atomic_rmw_and/emit.err)
+ k07_atomic_rmw_or/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k07_atomic_rmw_or/emit.err)
+ k08_atomic_rmw_xor/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k08_atomic_rmw_xor/emit.err)
+ k09_atomic_rmw_sub/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k09_atomic_rmw_sub/emit.err)
+ k10_atomic_rmw_nand/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k10_atomic_rmw_nand/emit.err)
+ k11_atomic_cas_success/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k11_atomic_cas_success/emit.err)
+ k12_atomic_cas_failure/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k12_atomic_cas_failure/emit.err)
+ k13_atomic_load_i64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k13_atomic_load_i64/emit.err)
+ k14_atomic_rmw_prior/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k14_atomic_rmw_prior/emit.err)
+ k15_fence_seq_cst/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/k15_fence_seq_cst/emit.err)
+ l01_popcount_u32/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l01_popcount_u32/emit.err)
+ l02_popcount_u64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l02_popcount_u64/emit.err)
+ l03_ctz_u32/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l03_ctz_u32/emit.err)
+ l04_clz_u32/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l04_clz_u32/emit.err)
+ l05_bswap16/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l05_bswap16/emit.err)
+ l06_bswap32/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l06_bswap32/emit.err)
+ l07_bswap64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l07_bswap64/emit.err)
+ l08_memcpy_4/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l08_memcpy_4/emit.err)
+ l09_memmove_overlap/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l09_memmove_overlap/emit.err)
+ l10_memset_zero/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l10_memset_zero/emit.err)
+ l11_memset_ff/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l11_memset_ff/emit.err)
+ l12_expect_taken/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l12_expect_taken/emit.err)
+ l13_unreachable_live/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l13_unreachable_live/emit.err)
+ l14_trap_live/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l14_trap_live/emit.err)
+ l15_prefetch_noop/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l15_prefetch_noop/emit.err)
+ l16_assume_aligned/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l16_assume_aligned/emit.err)
+ l17_add_overflow_no/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l17_add_overflow_no/emit.err)
+ l18_add_overflow_yes/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l18_add_overflow_yes/emit.err)
+ l19_sub_overflow_yes/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l19_sub_overflow_yes/emit.err)
+ l20_mul_overflow_no/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/l20_mul_overflow_no/emit.err)
+ n01_tls_load_le/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n01_tls_load_le/emit.err)
+ n02_tls_store_le/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n02_tls_store_le/emit.err)
+ n03_tls_addr_taken/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n03_tls_addr_taken/emit.err)
+ n04_tls_i64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n04_tls_i64/emit.err)
+ n05_tls_in_loop/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n05_tls_in_loop/emit.err)
+ n06_tls_two_vars/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n06_tls_two_vars/emit.err)
+ n07_tls_bss_zero_init/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n07_tls_bss_zero_init/emit.err)
+ n08_tls_addend_offset/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/n08_tls_addend_offset/emit.err)
+ o01_global_load_data/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o01_global_load_data/emit.err)
+ o02_global_store_data/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o02_global_store_data/emit.err)
+ o03_global_bss_zero/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o03_global_bss_zero/emit.err)
+ o04_global_addr_taken/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o04_global_addr_taken/emit.err)
+ o05_global_i64/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o05_global_i64/emit.err)
+ o06_rodata_load/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o06_rodata_load/emit.err)
+ o07_global_struct_field/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o07_global_struct_field/emit.err)
+ o08_global_array_runtime_idx/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o08_global_array_runtime_idx/emit.err)
+ o09_static_local_linkage/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o09_static_local_linkage/emit.err)
+ o10_global_addend/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o10_global_addend/emit.err)
+ o12_global_across_call/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/o12_global_across_call/emit.err)
+ q11_addr_of_helper_through_global/emit (cg-runner --emit failed; see /Users/ryan/code/cfree/build/test/cg/q11_addr_of_helper_through_global/emit.err)
+
diff --git a/src/abi/abi_sysv_x64.c b/src/abi/abi_sysv_x64.c
@@ -1,9 +1,17 @@
-/* SysV AMD64 ABI — phase-2 stub.
+/* SysV AMD64 ABI — minimal classifier.
*
- * Initial classifier returns ABI_ARG_INDIRECT for everything: correct
- * (every value passes through memory), slow, but unblocks bring-up of
- * the x64 codegen path. Phase 3 replaces this with the real eight-byte
- * INTEGER/SSE classifier (see doc/MULTIARCH.md §4 phase 3 step 2). */
+ * Covers the subset the cg test harness needs through the spine:
+ * void -> IGNORE
+ * integer ≤ 8B -> DIRECT, one INT part (rdi..r9 for args; rax for return)
+ * pointer -> DIRECT, one INT part
+ * float/double -> DIRECT, one FP part (xmm0..xmm7 for args; xmm0 return)
+ * small struct -> DIRECT, INT parts up to 16B (passed in up to 2 GPRs)
+ * large struct -> INDIRECT (sret for return; byval for args)
+ *
+ * The full SysV INTEGER/SSE eight-byte classification (with X87/COMPLEX_X87/
+ * NO_CLASS rules and the MEMORY-pulls-down rule) is deferred — for the
+ * cg corpus this approximation is enough and matches what the rv64 ABI
+ * does today. */
#include <string.h>
@@ -12,26 +20,86 @@
#include "core/core.h"
#include "core/pool.h"
-static void classify_indirect(TargetABI* a, const Type* t, ABIArgInfo* out,
- int is_return) {
+static void classify_void(ABIArgInfo* out) {
+ memset(out, 0, sizeof *out);
+ out->kind = ABI_ARG_IGNORE;
+}
+
+static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out) {
+ ABITypeInfo ti = abi_internal_type_info(a, t);
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->indirect_align = 0;
+
+ ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+ memset(parts, 0, sizeof *parts);
+ parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ parts->loc = ABI_LOC_REG;
+ parts->size = ti.size;
+ parts->align = ti.align;
+ parts->src_offset = 0;
+
+ out->parts = parts;
+ out->nparts = 1;
+}
+
+static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out,
+ int is_return) {
+ ABITypeInfo ti = abi_internal_type_info(a, t);
+ if (ti.size == 0) {
+ classify_void(out);
+ return;
+ }
+ if (ti.size <= 16) {
+ u32 nparts = (ti.size + 7) / 8;
+ ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts);
+ memset(parts, 0, sizeof(ABIArgPart) * nparts);
+ u32 off = 0;
+ for (u32 i = 0; i < nparts; ++i) {
+ u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off);
+ parts[i].cls = ABI_CLASS_INT;
+ parts[i].loc = ABI_LOC_REG;
+ parts[i].size = chunk;
+ parts[i].align = 8;
+ parts[i].src_offset = off;
+ off += chunk;
+ }
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = (u16)nparts;
+ out->indirect_align = 0;
+ } else {
+ out->kind = ABI_ARG_INDIRECT;
+ out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+ out->indirect_align = ti.align ? ti.align : 8;
+ out->parts = NULL;
+ out->nparts = 0;
+ }
+}
+
+static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out,
+ int is_return) {
if (!t || t->kind == TY_VOID) {
- memset(out, 0, sizeof *out);
- out->kind = ABI_ARG_IGNORE;
+ classify_void(out);
return;
}
- ABITypeInfo ti = abi_internal_type_info(a, t);
- out->kind = ABI_ARG_INDIRECT;
- out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
- out->indirect_align = ti.align ? ti.align : 8;
- out->parts = NULL;
- out->nparts = 0;
+ switch (t->kind) {
+ case TY_STRUCT:
+ case TY_UNION:
+ classify_aggregate(a, t, out, is_return);
+ return;
+ default:
+ classify_scalar(a, t, out);
+ return;
+ }
}
static ABIFuncInfo* sysv_x64_compute_func_info(TargetABI* a, const Type* fn) {
ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
memset(info, 0, sizeof *info);
- classify_indirect(a, fn->fn.ret, &info->ret, /*is_return=*/1);
+ classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1);
info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
info->variadic = fn->fn.variadic;
@@ -40,7 +108,7 @@ static ABIFuncInfo* sysv_x64_compute_func_info(TargetABI* a, const Type* fn) {
ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams);
memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams);
for (u16 i = 0; i < fn->fn.nparams; ++i) {
- classify_indirect(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
+ classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
}
info->params = arr;
} else {
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -1,293 +1,1862 @@
-/* x86_64 CGTarget skeleton.
+/* Minimal x86_64 (SysV AMD64, Linux ELF) CGTarget.
*
- * Phase-2 placeholder: the vtable is wired up but every method panics.
- * This proves the cgtarget_new dispatch reaches an x64-shaped target.
- * Phase 3 fills in real codegen. */
+ * Single-pass codegen mirroring the structure of src/arch/aarch64.c
+ * and src/arch/rv64.c. The frame uses rbp as a frame pointer; locals
+ * live at negative offsets from rbp, callee-save spills live below
+ * the local area at known offsets, and outgoing args sit at sp+0.
+ * The prologue is reserved as a NOP-filled placeholder at func_begin
+ * and patched at func_end once frame_size and the callee-save high-
+ * water mark are known.
+ *
+ * Reg allocator: lowest-bit-first over a fixed preference list. INT
+ * pool has callee-saves (rbx, r12..r15) at the low bits, then a
+ * caller-saved tail (r10, rdi, rsi, r8, r9) — so the first reg handed
+ * out is callee-saved, which is what tests like
+ * g11_caller_saved_live_across_call rely on. FP pool is xmm6..xmm15
+ * (10 regs, all caller-saved on SysV).
+ *
+ * Scratches kept outside the pools: rax (primary), rcx, rdx, r11
+ * (secondary). rax is also the int return reg; xmm0 is the FP return
+ * reg.
+ *
+ * Scope: the test/cg spine (Groups A–D plus call/local/sret/byval/FP
+ * pieces of B). Methods past the spine panic with a clear message so
+ * Phase 3 work has obvious landing pads — see doc/X64.md. */
#include <string.h>
#include "arch/arch.h"
#include "arch/x64.h"
+#include "arch/x64_isa.h"
#include "core/arena.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#define X64_PROLOGUE_BYTES 96u
+
+/* ============================================================
+ * Custom register pool.
+ *
+ * Unlike aa64/rv64 the x64 pool is non-contiguous (skipping rax,
+ * rcx, rdx, rsp, rbp, r11). So we keep a bitmap over a static
+ * preference order rather than a (base, nregs) range. */
+typedef struct XRegPool {
+ u32 free; /* bit i set ⇔ alloc_order[i] is free */
+ u32 hwm; /* highest index+1 ever allocated */
+ const u8* order; /* alloc_order; first n_cs are callee-saved */
+ u8 nregs;
+ u8 n_cs;
+ u8 pad[2];
+} XRegPool;
+
+static void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) {
+ p->order = order;
+ p->nregs = nregs;
+ p->n_cs = n_cs;
+ p->hwm = 0;
+ p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+}
+
+static Reg xpool_alloc(XRegPool* p) {
+ if (p->free == 0) return (Reg)REG_NONE;
+ u32 idx = (u32)__builtin_ctz(p->free);
+ p->free &= ~(1u << idx);
+ if (idx + 1u > p->hwm) p->hwm = idx + 1u;
+ return (Reg)p->order[idx];
+}
+
+static int xpool_free(XRegPool* p, Reg r) {
+ for (u8 i = 0; i < p->nregs; ++i) {
+ if (p->order[i] == (u8)r) {
+ u32 bit = 1u << i;
+ if (p->free & bit) return -1;
+ p->free |= bit;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static const u8 g_int_order[10] = {
+ X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */
+ X64_R10, X64_RDI, X64_RSI, X64_R8, X64_R9, /* caller-saved tail */
+};
+
+static const u8 g_fp_order[10] = {
+ /* All xmm regs are caller-saved on SysV; preference order is xmm6
+ * upward to keep the low arg/return regs (xmm0..5) clear for calls. */
+ X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10,
+ X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15,
+};
+
+static const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX,
+ X64_RCX, X64_R8, X64_R9};
+
+/* ============================================================
+ * XImpl */
+
+typedef struct XSlot {
+ u32 off; /* bytes below rbp (positive); address = rbp - off */
+ u32 size;
+ u32 align;
+ u8 kind;
+ u8 pad[3];
+} XSlot;
+
+typedef struct XScope {
+ u8 kind;
+ u8 has_else;
+ u8 pad[2];
+ MCLabel else_label;
+ MCLabel end_label;
+ Label break_label;
+ Label continue_label;
+} XScope;
typedef struct XImpl {
CGTarget base;
SrcLoc loc;
+ const CGFuncDesc* fd;
+
+ u32 func_start;
+ u32 prologue_pos;
+ MCLabel epilogue_label;
+
+ XSlot* slots;
+ u32 nslots;
+ u32 slots_cap;
+ u32 cum_off;
+ u32 max_outgoing;
+
+ u32 next_param_int;
+ u32 next_param_fp;
+ u32 next_param_stack;
+ u8 has_sret;
+ FrameSlot sret_ptr_slot;
+
+ XRegPool int_pool;
+ XRegPool fp_pool;
+
+ XScope* scopes;
+ u32 nscopes;
+ u32 scopes_cap;
} XImpl;
-static SrcLoc xx_loc(void) { return (SrcLoc){0, 0, 0}; }
+static XImpl* impl_of(CGTarget* t) { return (XImpl*)t; }
+
+/* Forward declarations. */
+static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
+static XSlot* slot_get(XImpl* a, FrameSlot fs);
+static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
+static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
+static void x_free_reg(CGTarget* t, Reg r, RegClass cls);
-_Noreturn static void xx_panic(CGTarget* t, const char* what) {
- compiler_panic(t->c, xx_loc(), "x64: %s not implemented", what);
+extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
+
+/* ---- type helpers ---- */
+static int type_is_64(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+static int type_is_fp_double(const Type* t) {
+ return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
+}
+static u32 type_byte_size(const Type* t) {
+ if (!t) return 4;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_UCHAR:
+ case TY_BOOL:
+ return 1;
+ case TY_SHORT:
+ case TY_USHORT:
+ return 2;
+ case TY_INT:
+ case TY_UINT:
+ case TY_FLOAT:
+ return 4;
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 8;
+ default:
+ return 8;
+ }
+}
+static int type_is_signed(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_SHORT:
+ case TY_INT:
+ case TY_LONG:
+ case TY_LLONG:
+ return 1;
+ default:
+ return 0;
+ }
}
-static void xx_func_begin(CGTarget* t, const CGFuncDesc* d) {
- (void)d;
- xx_panic(t, "func_begin");
+static _Noreturn void x_panic(CGTarget* t, const char* what) {
+ SrcLoc loc = impl_of(t)->loc;
+ compiler_panic(t->c, loc, "x64: %s not implemented", what);
+}
+
+/* ============================================================
+ * Byte-level emit helpers.
+ *
+ * x64 instructions are variable length: optional legacy prefix(es),
+ * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional
+ * displacement, optional immediate. Helpers below build sequences
+ * into the active MCEmitter section, recording one Debug row per
+ * instruction-start. */
+static void emit1(MCEmitter* mc, u8 b) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ mc->emit_bytes(mc, &b, 1);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+static void emit_u32le(MCEmitter* mc, u32 v) {
+ u8 b[4];
+ b[0] = (u8)v;
+ b[1] = (u8)(v >> 8);
+ b[2] = (u8)(v >> 16);
+ b[3] = (u8)(v >> 24);
+ mc->emit_bytes(mc, b, 4);
+}
+static void emit_u64le(MCEmitter* mc, u64 v) {
+ u8 b[8];
+ for (int i = 0; i < 8; ++i) b[i] = (u8)(v >> (i * 8));
+ mc->emit_bytes(mc, b, 8);
+}
+
+static u8 make_rex(int w, u32 reg, u32 index, u32 rm) {
+ u8 r = 0;
+ if (w) r |= X64_REX_W;
+ if (reg & 8) r |= X64_REX_R;
+ if (index & 8) r |= X64_REX_X;
+ if (rm & 8) r |= X64_REX_B;
+ return r ? (u8)(X64_REX_BASE | r) : 0;
+}
+static void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
+ u8 r = make_rex(w, reg, index, rm);
+ if (r) mc->emit_bytes(mc, &r, 1);
+}
+/* Force REX (even REX=0x40) — required for byte-reg encodings that
+ * promote SIL/DIL/etc. */
+static void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
+ u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) |
+ ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0));
+ mc->emit_bytes(mc, &r, 1);
+}
+
+static u8 modrm(u32 mod, u32 reg, u32 rm) {
+ return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u));
+}
+static u8 sib(u32 scale, u32 index, u32 base) {
+ return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u));
+}
+
+static u32 disp_mod(u32 base, i32 disp) {
+ if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */
+ if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */
+ return 2u; /* [base + disp32] */
+}
+
+static void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) {
+ u32 m = disp_mod(base, disp);
+ if ((base & 7u) == 4u) {
+ /* SIB byte required: index=4 (none), base=base. */
+ u8 mr = modrm(m, reg, 4u);
+ mc->emit_bytes(mc, &mr, 1);
+ u8 s = sib(0, 4u, base);
+ mc->emit_bytes(mc, &s, 1);
+ } else {
+ u8 mr = modrm(m, reg, base);
+ mc->emit_bytes(mc, &mr, 1);
+ }
+ if (m == 1u) {
+ u8 d = (u8)(i8)disp;
+ mc->emit_bytes(mc, &d, 1);
+ } else if (m == 2u) {
+ emit_u32le(mc, (u32)disp);
+ }
+}
+static void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) {
+ u8 mr = modrm(3u, reg, rm);
+ mc->emit_bytes(mc, &mr, 1);
+}
+
+/* ---- specific instruction emitters ---- */
+
+/* mov rd, rs (64-bit if w, else 32-bit). */
+static void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, src, 0, dst);
+ u8 op = 0x89; /* MOV r/m, r */
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* mov reg, [base + disp]; size 1/2/4/8. */
+static void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_rex(mc, 1, dst, 0, base);
+ u8 op = 0x8B;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 4) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op = 0x8B;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 2) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 1) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* mov [base + disp], src; size 1/2/4/8. */
+static void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base,
+ i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_rex(mc, 1, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 4) {
+ emit_rex(mc, 0, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 2) {
+ u8 p = 0x66;
+ mc->emit_bytes(mc, &p, 1);
+ emit_rex(mc, 0, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 1) {
+ /* Force REX so SIL/DIL/etc are addressable as byte regs. */
+ emit_rex_force(mc, 0, src, 0, base);
+ u8 op = 0x88;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, 1, dst, 0, base);
+ u8 op = 0x8D;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r
+ * imm32) for !is64. Both 10/5 bytes. */
+static void emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (is64) {
+ emit_rex(mc, 1, 0, 0, dst);
+ u8 op = (u8)(0xB8 | (dst & 7));
+ mc->emit_bytes(mc, &op, 1);
+ emit_u64le(mc, (u64)imm);
+ } else {
+ emit_rex(mc, 0, 0, 0, dst);
+ u8 op = (u8)(0xB8 | (dst & 7));
+ mc->emit_bytes(mc, &op, 1);
+ emit_u32le(mc, (u32)imm);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/
+ * CMP(39)/MOV(89)/TEST(85). */
+static void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, src, 0, dst);
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, 0xAF};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
}
-static void xx_func_end(CGTarget* t) { xx_panic(t, "func_end"); }
-static Reg xx_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
- (void)cls;
+static void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op = 0xF7;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, sub, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op = 0xD3;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, sub, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_cqo_or_cdq(MCEmitter* mc, int w) {
+ if (w) {
+ u8 buf[2] = {X64_REX_BASE | X64_REX_W, 0x99};
+ mc->emit_bytes(mc, buf, 2);
+ } else {
+ u8 op = 0x99;
+ mc->emit_bytes(mc, &op, 1);
+ }
+}
+
+static void emit_xor_self(MCEmitter* mc, int w, u32 r) {
+ emit_alu_rr(mc, w, 0x31, r, r);
+}
+
+/* cmp r/m, imm8 (0x83 /7). */
+static void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0x83;
+ buf[1] = modrm(3u, 7u, reg);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_test_self(MCEmitter* mc, int w, u32 reg) {
+ emit_alu_rr(mc, w, 0x85, reg, reg);
+}
+
+static void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex_force(mc, 0, 0, 0, reg);
+ u8 op[2] = {0x0F, (u8)(0x90 | (cc & 0xF))};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, 0u, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex_force(mc, 0, dst, 0, src);
+ u8 op[2] = {0x0F, 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* movzx/movsx r→r. src_size is source byte width. */
+static void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size,
+ u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (src_size == 4 && signed_ext) {
+ /* movsxd r64, r32: REX.W 0x63 ModRM */
+ emit_rex(mc, 1, dst, 0, src);
+ u8 op = 0x63;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, dst, src);
+ } else if (src_size == 4 && !signed_ext) {
+ /* zext 32→64 is `mov r32, r32` (clears high 32). */
+ emit_rex(mc, 0, src, 0, dst);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ } else if (src_size == 1) {
+ emit_rex_force(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ } else if (src_size == 2) {
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_ret(MCEmitter* mc) {
+ u8 op = 0xC3;
+ mc->emit_bytes(mc, &op, 1);
+}
+static void emit_leave(MCEmitter* mc) {
+ u8 op = 0xC9;
+ mc->emit_bytes(mc, &op, 1);
+}
+
+/* ---- SSE scalar FP encoders ---- */
+static void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, dst, 0, src);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+static void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+static void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, src, 0, base);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, src, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+static void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
+ u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ============================================================
+ * Function lifecycle */
+
+static void x_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ mc->set_section(mc, fd->text_section_id);
+ mc->emit_align(mc, 16, 0x90);
+
+ a->fd = fd;
+ a->func_start = mc->pos(mc);
+ a->next_param_int = 0;
+ a->next_param_fp = 0;
+ a->next_param_stack = 0;
+ a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
+ a->cum_off = 0;
+ a->max_outgoing = 0;
+ xpool_init(&a->int_pool, g_int_order, 10u, 5u);
+ xpool_init(&a->fp_pool, g_fp_order, 10u, 0u);
+ a->nslots = 0;
+ a->nscopes = 0;
+ a->sret_ptr_slot = FRAME_SLOT_NONE;
+ a->epilogue_label = mc->label_new(mc);
+
+ mc->cfi_startproc(mc);
+
+ /* Reserve a fixed-size prologue placeholder filled with NOPs. */
+ a->prologue_pos = mc->pos(mc);
+ for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) emit1(mc, 0x90);
+
+ /* sret: rdi at entry holds the destination pointer. Spill it to a
+ * hidden slot so the body can use rdi freely. */
+ if (a->has_sret) {
+ FrameSlotDesc fsd = {
+ .type = NULL, .name = 0, .loc = {0, 0, 0},
+ .size = 8, .align = 8, .kind = FS_SPILL, .flags = 0,
+ };
+ a->sret_ptr_slot = x_frame_slot(t, &fsd);
+ /* Subsequent int args start at rsi (next_param_int = 1). */
+ a->next_param_int = 1;
+ }
+}
+
+static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); }
+
+static void x_func_end(CGTarget* t) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 cs_used = a->int_pool.hwm;
+ if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs;
+ u32 cs_size = cs_used * 8u;
+
+ /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
+ * which means rsp ≡ 8 mod 16 inside the function (after the return
+ * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it
+ * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0
+ * mod 16, so frame_size must be a multiple of 16. */
+ u32 raw = a->max_outgoing + cs_size + a->cum_off;
+ u32 frame_size = align_up_u32(raw, 16u);
+ if (frame_size == 0) frame_size = 16;
+
+ mc->label_place(mc, a->epilogue_label);
+
+ /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */
+ for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
+ u32 reg = a->int_pool.order[i];
+ i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off);
+ }
+
+ /* leave; ret. */
+ emit_leave(mc);
+ emit_ret(mc);
+
+ /* Patch prologue placeholder. */
+ u8 buf[X64_PROLOGUE_BYTES];
+ for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) buf[i] = 0x90;
+ u32 wi = 0;
+
+ /* push rbp (1 byte). */
+ buf[wi++] = 0x55;
+ /* mov rbp, rsp: REX.W 89 E5. */
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(3u, X64_RSP, X64_RBP);
+ /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x81;
+ buf[wi++] = modrm(3u, 5u, X64_RSP);
+ buf[wi++] = (u8)frame_size;
+ buf[wi++] = (u8)(frame_size >> 8);
+ buf[wi++] = (u8)(frame_size >> 16);
+ buf[wi++] = (u8)(frame_size >> 24);
+
+ /* sret: mov [rbp + disp32], rdi. */
+ if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ XSlot* s = slot_get(a, a->sret_ptr_slot);
+ if (s) {
+ i32 off = -(i32)s->off;
+ if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(2u, X64_RDI, X64_RBP);
+ buf[wi++] = (u8)off;
+ buf[wi++] = (u8)(off >> 8);
+ buf[wi++] = (u8)(off >> 16);
+ buf[wi++] = (u8)(off >> 24);
+ }
+ }
+
+ /* Spill callee-saves. */
+ for (u32 i = 0; i < cs_used; ++i) {
+ u32 reg = a->int_pool.order[i];
+ i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
+ buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(2u, (reg & 7u), X64_RBP);
+ buf[wi++] = (u8)off;
+ buf[wi++] = (u8)(off >> 8);
+ buf[wi++] = (u8)(off >> 16);
+ buf[wi++] = (u8)(off >> 24);
+ }
+
+ if (0) {
+ overflow:
+ compiler_panic(t->c, a->loc,
+ "x64: prologue placeholder overflow (%u of %u bytes)", wi,
+ X64_PROLOGUE_BYTES);
+ }
+ obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf,
+ X64_PROLOGUE_BYTES);
+
+ /* Define the function symbol. */
+ u32 end = mc->pos(mc);
+ obj_symbol_define(t->obj, a->fd->sym, a->fd->text_section_id,
+ (u64)a->func_start, (u64)(end - a->func_start));
+
+ mc->cfi_endproc(mc);
+ a->fd = NULL;
+}
+
+/* ============================================================
+ * Registers / frame */
+
+static Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ XImpl* a = impl_of(t);
(void)ty;
- xx_panic(t, "alloc_reg");
+ if (cls == RC_INT) return xpool_alloc(&a->int_pool);
+ if (cls == RC_FP) return xpool_alloc(&a->fp_pool);
+ compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls);
}
-static void xx_free_reg(CGTarget* t, Reg r, RegClass cls) {
- (void)r;
- (void)cls;
- xx_panic(t, "free_reg");
+
+static void x_free_reg(CGTarget* t, Reg r, RegClass cls) {
+ XImpl* a = impl_of(t);
+ XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool;
+ int rc = xpool_free(p, r);
+ if (rc == 1) return;
+ if (rc == -1) {
+ compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free",
+ (unsigned)r);
+ }
+ compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
}
-static FrameSlot xx_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- (void)d;
- xx_panic(t, "frame_slot");
+
+static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ XImpl* a = impl_of(t);
+ if (a->nslots == a->slots_cap) {
+ u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
+ XSlot* nbuf = arena_array(t->c->tu, XSlot, ncap);
+ if (a->slots) memcpy(nbuf, a->slots, sizeof(XSlot) * a->nslots);
+ a->slots = nbuf;
+ a->slots_cap = ncap;
+ }
+ u32 size = d->size ? d->size : 8;
+ u32 align = d->align ? d->align : 1;
+ u32 next = a->cum_off + size;
+ u32 mask = align - 1u;
+ next = (next + mask) & ~mask;
+ XSlot* s = &a->slots[a->nslots];
+ s->off = next;
+ s->size = size;
+ s->align = align;
+ s->kind = d->kind;
+ a->cum_off = next;
+ a->nslots++;
+ return (FrameSlot)(a->nslots);
}
-static void xx_param(CGTarget* t, const CGParamDesc* d) {
- (void)d;
- xx_panic(t, "param");
+
+static XSlot* slot_get(XImpl* a, FrameSlot fs) {
+ if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
+ return &a->slots[fs - 1];
}
-static const Reg* xx_clobbers(CGTarget* t, RegClass cls, u32* nregs) {
- (void)cls;
- (void)nregs;
- xx_panic(t, "clobbers");
+
+/* ---- param: store incoming arg(s) into the home slot ---- */
+static void x_param(CGTarget* t, const CGParamDesc* p) {
+ XImpl* a = impl_of(t);
+ XSlot* s = slot_get(a, p->slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 param: bad slot");
+ const ABIArgInfo* ai = p->abi;
+
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Incoming pointer to byval copy: load pointer, memcpy into slot. */
+ u32 ptr_reg;
+ if (a->next_param_int < 6) {
+ ptr_reg = g_int_arg_regs[a->next_param_int++];
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ emit_mov_load(t->mc, 8, 0, X64_R11, X64_RBP, (i32)(16 + caller_off));
+ ptr_reg = X64_R11;
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(t->mc, 8, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 8, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(t->mc, 4, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 4, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(t->mc, 2, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 2, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(t->mc, 1, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 1, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 1;
+ }
+ return;
+ }
+ /* DIRECT */
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 part_off = pt->src_offset;
+ u32 sz = pt->size;
+ if (pt->cls == ABI_CLASS_INT) {
+ if (a->next_param_int < 6) {
+ u32 reg = g_int_arg_regs[a->next_param_int++];
+ emit_mov_store(t->mc, sz, reg, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ emit_mov_load(t->mc, sz, 0, X64_RAX, X64_RBP,
+ (i32)(16 + caller_off));
+ emit_mov_store(t->mc, sz, X64_RAX, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ if (a->next_param_fp < 8) {
+ u32 xmm = a->next_param_fp++;
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(t->mc, prefix, 0x10, X64_XMM0, X64_RBP,
+ (i32)(16 + caller_off));
+ emit_sse_store(t->mc, prefix, 0x11, X64_XMM0, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
}
-static void xx_spill_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) {
- (void)a;
- (void)s;
- (void)m;
- xx_panic(t, "spill_reg");
+
+static const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) {
+ (void)c;
+ (void)n;
+ x_panic(t, "clobbers");
}
-static void xx_reload_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) {
- (void)a;
- (void)s;
- (void)m;
- xx_panic(t, "reload_reg");
+static void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
+ MemAccess ma) {
+ XImpl* a = impl_of(t);
+ if (src.kind != OPK_REG)
+ compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG");
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ x_store(t, addr, src, ma);
+ x_free_reg(t, src.v.reg, src.cls);
}
-static Label xx_label_new(CGTarget* t) { xx_panic(t, "label_new"); }
-static void xx_label_place(CGTarget* t, Label l) {
- (void)l;
- xx_panic(t, "label_place");
+static void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
+ MemAccess ma) {
+ XImpl* a = impl_of(t);
+ if (dst.kind != OPK_REG)
+ compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG");
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ x_load(t, dst, addr, ma);
}
-static void xx_jump(CGTarget* t, Label l) {
- (void)l;
- xx_panic(t, "jump");
+
+/* ============================================================
+ * Labels / control flow */
+
+static Label x_label_new(CGTarget* t) {
+ return (Label)t->mc->label_new(t->mc);
}
-static void xx_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
- Label l) {
- (void)op;
- (void)a;
- (void)b;
- (void)l;
- xx_panic(t, "cmp_branch");
+static void x_label_place(CGTarget* t, Label l) {
+ t->mc->label_place(t->mc, (MCLabel)l);
}
-static CGScope xx_scope_begin(CGTarget* t, const CGScopeDesc* d) {
- (void)d;
- xx_panic(t, "scope_begin");
+/* Emit `jmp rel32` (E9 + 4-byte disp) with a label fixup. R_PC32 applied
+ * at the disp32 site with addend=-4 yields target - end_of_insn. */
+static void emit_jmp_label(MCEmitter* mc, MCLabel l) {
+ u8 op = 0xE9;
+ mc->emit_bytes(mc, &op, 1);
+ emit_u32le(mc, 0);
+ mc->emit_label_ref(mc, l, R_PC32, 4, -4);
}
-static void xx_scope_else(CGTarget* t, CGScope s) {
- (void)s;
- xx_panic(t, "scope_else");
+
+/* Emit `Jcc rel32` (0F 8x + 4-byte disp) with a label fixup. */
+static void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) {
+ u8 op[2] = {0x0F, (u8)(0x80 | (cc & 0xF))};
+ mc->emit_bytes(mc, op, 2);
+ emit_u32le(mc, 0);
+ mc->emit_label_ref(mc, l, R_PC32, 4, -4);
}
-static void xx_scope_end(CGTarget* t, CGScope s) {
- (void)s;
- xx_panic(t, "scope_end");
+
+static void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); }
+
+static u32 cmp_to_cc(CmpOp op) {
+ switch (op) {
+ case CMP_EQ: return X64_CC_E;
+ case CMP_NE: return X64_CC_NE;
+ case CMP_LT_U: return X64_CC_B;
+ case CMP_LE_U: return X64_CC_BE;
+ case CMP_GT_U: return X64_CC_A;
+ case CMP_GE_U: return X64_CC_AE;
+ case CMP_LT_S: return X64_CC_L;
+ case CMP_LE_S: return X64_CC_LE;
+ case CMP_GT_S: return X64_CC_G;
+ case CMP_GE_S: return X64_CC_GE;
+ default: return X64_CC_E;
+ }
}
-static void xx_break_to(CGTarget* t, CGScope s) {
- (void)s;
- xx_panic(t, "break_to");
+
+static u32 force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) {
+ if (op.kind == OPK_REG) return op.v.reg & 0xFu;
+ if (op.kind == OPK_IMM) {
+ emit_load_imm(t->mc, w, scratch, op.v.imm);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc, "x64: operand kind %d not REG/IMM",
+ (int)op.kind);
}
-static void xx_continue_to(CGTarget* t, CGScope s) {
- (void)s;
- xx_panic(t, "continue_to");
+
+static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
+ int w = type_is_64(a_op.type) ? 1 : 0;
+ if (a_op.kind == OPK_REG && b_op.kind == OPK_IMM && b_op.v.imm >= -128 &&
+ b_op.v.imm <= 127) {
+ emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm);
+ return;
+ }
+ u32 ra = force_reg_int(t, a_op, w, X64_RAX);
+ u32 rb = force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11);
+ /* cmp r/m, r — opcode 0x39 (encoded as `cmp ra, rb` ⇒ flags = ra - rb). */
+ emit_alu_rr(t->mc, w, 0x39, ra, rb);
}
-static void xx_load_imm(CGTarget* t, Operand d, i64 i) {
- (void)d;
- (void)i;
- xx_panic(t, "load_imm");
+static void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
+ Label l) {
+ emit_cmp_ab(t, a, b);
+ emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l);
}
-static void xx_load_const(CGTarget* t, Operand d, ConstBytes b) {
- (void)d;
- (void)b;
- xx_panic(t, "load_const");
+
+static void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
+ emit_cmp_ab(t, a, b);
+ u32 d = dst.v.reg & 0xFu;
+ emit_setcc(t->mc, cmp_to_cc(op), d);
+ emit_movzx_r32_r8(t->mc, d, d);
}
-static void xx_copy(CGTarget* t, Operand d, Operand s) {
- (void)d;
- (void)s;
- xx_panic(t, "copy");
+
+/* ---- structured scopes ---- */
+static CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+ XImpl* a = impl_of(t);
+ if (a->nscopes == a->scopes_cap) {
+ u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
+ XScope* nb = arena_array(t->c->tu, XScope, ncap);
+ if (a->scopes) memcpy(nb, a->scopes, sizeof(XScope) * a->nscopes);
+ a->scopes = nb;
+ a->scopes_cap = ncap;
+ }
+ XScope* sc = &a->scopes[a->nscopes];
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = 0;
+ sc->end_label = 0;
+ sc->break_label = d->break_label;
+ sc->continue_label = d->continue_label;
+
+ if (d->kind == SCOPE_IF) {
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
+ int w = type_is_64(d->cond.type) ? 1 : 0;
+ u32 rc = force_reg_int(t, d->cond, w, X64_RAX);
+ emit_test_self(t->mc, w, rc);
+ emit_jcc_label(t->mc, X64_CC_E, sc->else_label);
+ } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
+ /* Bookkeeping only. */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 scope_begin: kind %d not yet implemented",
+ (int)d->kind);
+ }
+ a->nscopes++;
+ return (CGScope)a->nscopes;
}
-static void xx_load(CGTarget* t, Operand d, Operand a, MemAccess m) {
- (void)d;
- (void)a;
- (void)m;
- xx_panic(t, "load");
+
+static void x_scope_else(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 scope_else: bad scope");
+ XScope* sc = &a->scopes[s - 1];
+ emit_jmp_label(t->mc, sc->end_label);
+ t->mc->label_place(t->mc, sc->else_label);
+ sc->has_else = 1;
}
-static void xx_store(CGTarget* t, Operand a, Operand s, MemAccess m) {
- (void)a;
- (void)s;
- (void)m;
- xx_panic(t, "store");
+
+static void x_scope_end(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 scope_end: bad scope");
+ XScope* sc = &a->scopes[s - 1];
+ if (sc->kind == SCOPE_IF) {
+ if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label);
+ t->mc->label_place(t->mc, sc->end_label);
+ }
}
-static void xx_addr_of(CGTarget* t, Operand d, Operand l) {
- (void)d;
- (void)l;
- xx_panic(t, "addr_of");
+
+static void x_break_to(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 break_to: bad scope");
+ x_jump(t, a->scopes[s - 1].break_label);
+}
+static void x_continue_to(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 continue_to: bad scope");
+ x_jump(t, a->scopes[s - 1].continue_label);
+}
+
+/* ============================================================
+ * Data movement */
+
+static void x_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_load_imm(t->mc, w, dst.v.reg & 0xFu, imm);
+}
+
+/* Materialize an FP literal: stash bytes in .rodata as a fresh local
+ * symbol, then load via RIP-relative movss/movsd. */
+static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
+ XImpl* a = impl_of(t);
+ if (dst.cls != RC_FP)
+ compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1");
+
+ Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC,
+ cb.align ? cb.align : 4);
+
+ u32 cur_section = t->mc->section_id;
+ t->mc->set_section(t->mc, ro);
+ t->mc->emit_align(t->mc, cb.align ? cb.align : 4, 0);
+ u32 ro_off = t->mc->pos(t->mc);
+ t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
+
+ char namebuf[64];
+ static u32 lit_seq = 0;
+ int len = 0;
+ const char* prefix = ".LCFP_x64_";
+ for (; prefix[len]; ++len) namebuf[len] = prefix[len];
+ u32 v = lit_seq++;
+ char tmp[16];
+ int tn = 0;
+ if (v == 0)
+ tmp[tn++] = '0';
+ else
+ while (v) {
+ tmp[tn++] = '0' + (char)(v % 10);
+ v /= 10;
+ }
+ for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
+ namebuf[len] = 0;
+
+ Sym sname = pool_intern_cstr(t->c->global, namebuf);
+ ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
+ (u64)cb.size);
+ t->mc->set_section(t->mc, cur_section);
+
+ /* movs{s,d} xmm, [rip+disp32]. Reloc R_PC32 with addend=-4 at the
+ * disp32 site so the linker resolves to target relative to end-of-insn. */
+ u8 prefix2 = (cb.size == 8) ? 0xF2 : 0xF3;
+ u32 dst_x = dst.v.reg & 0xFu;
+ t->mc->emit_bytes(t->mc, &prefix2, 1);
+ emit_rex(t->mc, 0, dst_x, 0, 0);
+ u8 op[2] = {0x0F, 0x10};
+ t->mc->emit_bytes(t->mc, op, 2);
+ u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */
+ t->mc->emit_bytes(t->mc, &mr, 1);
+ u32 disp_pos = t->mc->pos(t->mc);
+ emit_u32le(t->mc, 0);
+ t->mc->emit_reloc_at(t->mc, cur_section, disp_pos, R_PC32, sym, -4, 1, 0);
+}
+
+static void x_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP || src.cls == RC_FP) {
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+ return;
+ }
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+}
+
+static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
+ XImpl* a = impl_of(t);
+ if (addr.kind == OPK_LOCAL) {
+ XSlot* s = slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot");
+ *out_off = -(i32)s->off;
+ return X64_RBP;
+ }
+ if (addr.kind == OPK_INDIRECT) {
+ *out_off = addr.v.ind.ofs;
+ return addr.v.ind.base & 0xFu;
+ }
+ compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported",
+ (int)addr.kind);
+}
+
+static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
+ XImpl* a = impl_of(t);
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+
+ if (addr.kind == OPK_GLOBAL) {
+ compiler_panic(t->c, a->loc, "x64 load: OPK_GLOBAL not yet implemented");
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off);
+ if (dst.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off);
+ } else {
+ int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
+ emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off);
+ }
+}
+
+static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
+ XImpl* a = impl_of(t);
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+
+ if (addr.kind == OPK_GLOBAL) {
+ compiler_panic(t->c, a->loc, "x64 store: OPK_GLOBAL not yet implemented");
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off);
+
+ if (src.kind == OPK_IMM) {
+ int w = (sz == 8) ? 1 : 0;
+ emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
+ emit_mov_store(t->mc, sz, X64_RAX, base, off);
+ return;
+ }
+ if (src.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off);
+ return;
+ }
+ emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off);
+}
+
+static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ XImpl* a = impl_of(t);
+ if (lv.kind == OPK_LOCAL) {
+ XSlot* s = slot_get(a, lv.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 addr_of: bad slot");
+ emit_lea(t->mc, dst.v.reg & 0xFu, X64_RBP, -(i32)s->off);
+ return;
+ }
+ if (lv.kind == OPK_INDIRECT) {
+ emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs);
+ return;
+ }
+ x_panic(t, "addr_of: kind unsupported");
}
-static void xx_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) {
+
+static void x_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) {
(void)d;
(void)s;
(void)a;
- xx_panic(t, "tls_addr_of");
+ x_panic(t, "tls_addr_of");
}
-static void xx_copy_bytes(CGTarget* t, Operand da, Operand sa,
- AggregateAccess g) {
- (void)da;
- (void)sa;
- (void)g;
- xx_panic(t, "copy_bytes");
+
+/* Aggregate ops — small unrolled memcpy/memset. */
+static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
+ if (op.kind == OPK_REG) return op.v.reg & 0xFu;
+ if (op.kind == OPK_LOCAL) {
+ XImpl* a = impl_of(t);
+ XSlot* s = slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 agg: bad slot");
+ emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 agg: address kind %d unsupported", (int)op.kind);
}
-static void xx_set_bytes(CGTarget* t, Operand da, Operand bv,
+
+static void x_copy_bytes(CGTarget* t, Operand da, Operand sa,
AggregateAccess g) {
- (void)da;
- (void)bv;
- (void)g;
- xx_panic(t, "set_bytes");
+ u32 dr = agg_addr_reg(t, da, X64_R11);
+ u32 sr = agg_addr_reg(t, sa, (dr == X64_RAX) ? X64_RCX : X64_RAX);
+ u32 nbytes = g.size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(t->mc, 8, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 8, X64_RDX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(t->mc, 4, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 4, X64_RDX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(t->mc, 2, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 2, X64_RDX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(t->mc, 1, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 1, X64_RDX, dr, (i32)i);
+ i += 1;
+ }
}
-static void xx_bitfield_load(CGTarget* t, Operand d, Operand ra,
- BitFieldAccess b) {
+
+static void x_set_bytes(CGTarget* t, Operand da, Operand bv,
+ AggregateAccess g) {
+ u32 dr = agg_addr_reg(t, da, X64_R11);
+ if (bv.kind != OPK_IMM)
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 set_bytes: non-IMM byte not yet supported");
+ u8 b = (u8)(bv.v.imm & 0xff);
+ u64 b64 = b;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ emit_load_imm(t->mc, 1, X64_RAX, (i64)b64);
+ u32 nbytes = g.size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_store(t->mc, 8, X64_RAX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_store(t->mc, 4, X64_RAX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_store(t->mc, 2, X64_RAX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_store(t->mc, 1, X64_RAX, dr, (i32)i);
+ i += 1;
+ }
+}
+
+static void x_bitfield_load(CGTarget* t, Operand d, Operand ra,
+ BitFieldAccess b) {
(void)d;
(void)ra;
(void)b;
- xx_panic(t, "bitfield_load");
+ x_panic(t, "bitfield_load");
}
-static void xx_bitfield_store(CGTarget* t, Operand ra, Operand s,
- BitFieldAccess b) {
+static void x_bitfield_store(CGTarget* t, Operand ra, Operand s,
+ BitFieldAccess b) {
(void)ra;
(void)s;
(void)b;
- xx_panic(t, "bitfield_store");
+ x_panic(t, "bitfield_store");
}
-static void xx_binop(CGTarget* t, BinOp op, Operand d, Operand a, Operand b) {
- (void)op;
- (void)d;
- (void)a;
- (void)b;
- xx_panic(t, "binop");
+/* ============================================================
+ * Arithmetic */
+
+static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
+ Operand b_op) {
+ MCEmitter* mc = t->mc;
+
+ /* FP binops. */
+ if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
+ u32 rd = dst.v.reg & 0xFu;
+ u32 ra = a_op.v.reg & 0xFu;
+ u32 rb = b_op.v.reg & 0xFu;
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra);
+ u8 opcode;
+ switch (op) {
+ case BO_FADD: opcode = 0x58; break;
+ case BO_FSUB: opcode = 0x5C; break;
+ case BO_FMUL: opcode = 0x59; break;
+ case BO_FDIV: opcode = 0x5E; break;
+ default: opcode = 0x58; break;
+ }
+ emit_sse_rr(mc, prefix2, opcode, rd, rb);
+ return;
+ }
+
+ int w = type_is_64(dst.type) ? 1 : 0;
+ u32 rd = dst.v.reg & 0xFu;
+
+ /* Division: idiv/div uses rax/rdx implicitly. Route divisor through r11
+ * if it would otherwise be rax/rdx. */
+ if (op == BO_SDIV || op == BO_UDIV || op == BO_SREM || op == BO_UREM) {
+ u32 ra = force_reg_int(t, a_op, w, X64_RAX);
+ if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra);
+ u32 rb;
+ if (b_op.kind == OPK_REG) {
+ rb = b_op.v.reg & 0xFu;
+ if (rb == X64_RAX || rb == X64_RDX) {
+ emit_mov_rr(mc, w, X64_R11, rb);
+ rb = X64_R11;
+ }
+ } else if (b_op.kind == OPK_IMM) {
+ emit_load_imm(mc, w, X64_R11, b_op.v.imm);
+ rb = X64_R11;
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 div: divisor kind %d unsupported", (int)b_op.kind);
+ }
+ if (op == BO_SDIV || op == BO_SREM) {
+ emit_cqo_or_cdq(mc, w);
+ emit_f7_rm(mc, w, 7u, rb); /* idiv */
+ } else {
+ emit_xor_self(mc, w, X64_RDX);
+ emit_f7_rm(mc, w, 6u, rb); /* div */
+ }
+ u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX;
+ if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg);
+ return;
+ }
+
+ /* Shifts: shift count must be in cl. */
+ if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) {
+ u32 ra = force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ if (b_op.kind == OPK_REG) {
+ u32 rb = b_op.v.reg & 0xFu;
+ if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb);
+ } else if (b_op.kind == OPK_IMM) {
+ emit_load_imm(mc, 0, X64_RCX, b_op.v.imm & 0x3f);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 shift: count kind %d unsupported", (int)b_op.kind);
+ }
+ u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u);
+ emit_shift_cl(mc, w, sub, rd);
+ return;
+ }
+
+ /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */
+ u32 ra = force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 rb = force_reg_int(t, b_op, w, X64_R11);
+ switch (op) {
+ case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break;
+ case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break;
+ case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break;
+ case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break;
+ case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break;
+ case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl",
+ (int)op);
+ }
}
-static void xx_unop(CGTarget* t, UnOp op, Operand d, Operand a) {
- (void)op;
- (void)d;
- (void)a;
- xx_panic(t, "unop");
+
+static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
+ MCEmitter* mc = t->mc;
+ int w = type_is_64(dst.type) ? 1 : 0;
+ u32 rd = dst.v.reg & 0xFu;
+ u32 ra = a_op.v.reg & 0xFu;
+ if (a_op.kind != OPK_REG)
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 unop: non-REG operand not supported");
+ switch (op) {
+ case UO_NEG:
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_f7_rm(mc, w, 3u, rd);
+ return;
+ case UO_BNOT:
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_f7_rm(mc, w, 2u, rd);
+ return;
+ case UO_NOT:
+ /* !x → (x == 0) materialized as 0/1 in dst. */
+ emit_test_self(mc, w, ra);
+ emit_setcc(mc, X64_CC_E, rd);
+ emit_movzx_r32_r8(mc, rd, rd);
+ return;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl",
+ (int)op);
+ }
}
-static void xx_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, Operand b) {
- (void)op;
- (void)d;
- (void)a;
- (void)b;
- xx_panic(t, "cmp");
+
+static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = dst.v.reg & 0xFu;
+ u32 rs = src.v.reg & 0xFu;
+ switch (k) {
+ case CV_SEXT: {
+ u32 src_bytes = type_byte_size(src.type);
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_extend_rr(mc, w, /*signed=*/1, src_bytes, rd, rs);
+ return;
+ }
+ case CV_ZEXT: {
+ u32 src_bytes = type_byte_size(src.type);
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_extend_rr(mc, w, /*signed=*/0, src_bytes, rd, rs);
+ return;
+ }
+ case CV_TRUNC: {
+ /* In-reg truncation: `mov r32, r32` clears high 32. Narrower stores
+ * select width themselves. */
+ emit_mov_rr(mc, 0, rd, rs);
+ return;
+ }
+ case CV_ITOF_S:
+ case CV_ITOF_U: {
+ int w_src = type_is_64(src.type) ? 1 : 0;
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ if (k == CV_ITOF_U && w_src == 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 convert: u64→fp not yet implemented");
+ }
+ if (k == CV_ITOF_U) {
+ /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */
+ emit_extend_rr(mc, 0, 0, 4, X64_R11, rs);
+ rs = X64_R11;
+ w_src = 1;
+ }
+ emit_sse_rr_w(mc, prefix2, 0x2A, w_src, rd, rs);
+ return;
+ }
+ case CV_FTOI_S:
+ case CV_FTOI_U: {
+ int w_dst = type_is_64(dst.type) ? 1 : 0;
+ u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3;
+ if (k == CV_FTOI_U && w_dst == 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 convert: fp→u64 not yet implemented");
+ }
+ emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs);
+ return;
+ }
+ case CV_FEXT:
+ emit_sse_rr(mc, 0xF3, 0x5A, rd, rs);
+ return;
+ case CV_FTRUNC:
+ emit_sse_rr(mc, 0xF2, 0x5A, rd, rs);
+ return;
+ case CV_BITCAST: {
+ /* movd/movq between xmm and GPR. */
+ if (src.cls == RC_INT && dst.cls == RC_FP) {
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_sse_rr_w(mc, 0x66, 0x6E, w, rd, rs);
+ } else if (src.cls == RC_FP && dst.cls == RC_INT) {
+ int w = type_is_64(src.type) ? 1 : 0;
+ emit_sse_rr_w(mc, 0x66, 0x7E, w, rs, rd);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 convert BITCAST: same-class not supported");
+ }
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "x64 convert kind %d unimpl", (int)k);
+ }
}
-static void xx_convert(CGTarget* t, ConvKind k, Operand d, Operand s) {
- (void)k;
- (void)d;
- (void)s;
- xx_panic(t, "convert");
+
+/* ============================================================
+ * Calls / return */
+
+static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
+ u32* next_fp, u32* stack_off) {
+ XImpl* a = impl_of(t);
+ /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */
+ ABIArgInfo va_ai;
+ ABIArgPart va_pt;
+ const ABIArgInfo* ai = av->abi;
+ if (!ai) {
+ u32 sz = type_byte_size(av->type);
+ memset(&va_ai, 0, sizeof va_ai);
+ memset(&va_pt, 0, sizeof va_pt);
+ va_ai.kind = ABI_ARG_DIRECT;
+ va_ai.parts = &va_pt;
+ va_ai.nparts = 1;
+ va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ va_pt.size = sz;
+ va_pt.align = sz;
+ va_pt.src_offset = 0;
+ ai = &va_ai;
+ }
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Pass &av->storage_local in the next int arg reg. */
+ u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX;
+ int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6);
+ /* Above is awkward — recompute clearly: */
+ if (*next_int >= 6 + (a->has_sret ? 0 : 0)) {
+ /* (next_int was already bumped past 6) — stack route */
+ }
+ to_stack = (dst_reg == X64_RAX);
+ if (av->storage.kind == OPK_LOCAL) {
+ XSlot* s = slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot");
+ emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: INDIRECT arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ *stack_off += 8;
+ }
+ return;
+ }
+
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 sz = pt->size;
+ if (pt->cls == ABI_CLASS_INT) {
+ int to_stack = (*next_int >= 6);
+ u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++];
+ switch (av->storage.kind) {
+ case OPK_IMM: {
+ int w = (sz == 8) ? 1 : 0;
+ emit_load_imm(t->mc, w, dst_reg, av->storage.v.imm);
+ break;
+ }
+ case OPK_REG: {
+ int w = (sz == 8) ? 1 : 0;
+ u32 sr = av->storage.v.reg & 0xFu;
+ if (sr != dst_reg) emit_mov_rr(t->mc, w, dst_reg, sr);
+ break;
+ }
+ case OPK_LOCAL: {
+ XSlot* s = slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad arg slot");
+ emit_mov_load(t->mc, sz, 0, dst_reg, X64_RBP,
+ -(i32)s->off + (i32)pt->src_offset);
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc,
+ "x64 call: arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ *stack_off += 8;
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ int to_stack = (*next_fp >= 8);
+ if (!to_stack) {
+ u32 dst_x = (*next_fp)++;
+ if (av->storage.kind == OPK_REG) {
+ u32 sx = av->storage.v.reg & 0xFu;
+ if (sx != dst_x) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx);
+ }
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: FP arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ } else {
+ if (av->storage.kind == OPK_REG) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu,
+ X64_RSP, (i32)*stack_off);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: FP stack-arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ *stack_off += 8;
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "x64 call: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
}
-static void xx_call(CGTarget* t, const CGCallDesc* d) {
- (void)d;
- xx_panic(t, "call");
+static void x_call(CGTarget* t, const CGCallDesc* d) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 next_int = 0, next_fp = 0, stack_off = 0;
+
+ /* sret: caller puts destination pointer in rdi. */
+ if (d->abi && d->abi->has_sret) {
+ if (d->ret.storage.kind != OPK_LOCAL) {
+ compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL");
+ }
+ XSlot* s = slot_get(a, d->ret.storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot");
+ emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off);
+ next_int = 1;
+ }
+ for (u32 i = 0; i < d->nargs; ++i) {
+ emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ }
+ u32 needed = (stack_off + 15u) & ~15u;
+ if (needed > a->max_outgoing) a->max_outgoing = needed;
+
+ /* Variadic calls: AL = number of XMM regs used. */
+ if (d->abi && d->abi->variadic) {
+ emit_load_imm(mc, 0, X64_RAX, (i64)next_fp);
+ }
+
+ if (d->callee.kind == OPK_GLOBAL) {
+ /* call rel32: E8 + disp32 + R_X64_PLT32. */
+ u8 op = 0xE8;
+ mc->emit_bytes(mc, &op, 1);
+ u32 disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32,
+ d->callee.v.global.sym,
+ d->callee.v.global.addend - 4, 1, 0);
+ } else if (d->callee.kind == OPK_REG) {
+ u32 r = d->callee.v.reg & 0xFu;
+ emit_rex(mc, 0, 0, 0, r);
+ u8 buf[2] = {0xFF, modrm(3u, 2u, r)};
+ mc->emit_bytes(mc, buf, 2);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 call: callee kind %d unsupported",
+ (int)d->callee.kind);
+ }
+
+ /* Receive return value. */
+ const ABIArgInfo* ri = &d->abi->ret;
+ if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return;
+ if (ri->nparts == 0) return;
+
+ Operand rs = d->ret.storage;
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
+ for (u16 i = 0; i < ri->nparts; ++i) {
+ const ABIArgPart* p = &ri->parts[i];
+ u32 src_reg;
+ if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++];
+ else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++);
+ else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl",
+ (int)p->cls);
+
+ if (rs.kind == OPK_REG) {
+ if (ri->nparts != 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 call: REG ret_storage with %u parts",
+ (unsigned)ri->nparts);
+ }
+ if (p->cls == ABI_CLASS_INT) {
+ int w = (p->size == 8) ? 1 : 0;
+ u32 dr = rs.v.reg & 0xFu;
+ if (dr != src_reg) emit_mov_rr(mc, w, dr, src_reg);
+ } else {
+ u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
+ u32 dr = rs.v.reg & 0xFu;
+ if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg);
+ }
+ } else if (rs.kind == OPK_LOCAL) {
+ XSlot* s = slot_get(a, rs.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot");
+ i32 off = -(i32)s->off + (i32)p->src_offset;
+ if (p->cls == ABI_CLASS_INT) {
+ emit_mov_store(mc, p->size, src_reg, X64_RBP, off);
+ } else {
+ u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(mc, prefix2, 0x11, src_reg, X64_RBP, off);
+ }
+ } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
+ /* void ret placeholder — nothing to do. */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: ret_storage kind %d unsupported",
+ (int)rs.kind);
+ }
+ }
}
-static void xx_ret(CGTarget* t, const CGABIValue* v) {
- (void)v;
- xx_panic(t, "ret");
+
+static void x_ret(CGTarget* t, const CGABIValue* val) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (val) {
+ const ABIArgInfo* ri = val->abi;
+ if (ri && ri->kind == ABI_ARG_INDIRECT) {
+ /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */
+ if (val->storage.kind != OPK_LOCAL) {
+ compiler_panic(t->c, a->loc,
+ "x64 ret indirect: storage kind %d unsupported",
+ (int)val->storage.kind);
+ }
+ XSlot* s = slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot");
+ if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ XSlot* sp = slot_get(a, a->sret_ptr_slot);
+ if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off);
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(mc, 8, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(mc, 4, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(mc, 2, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(mc, 1, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i);
+ i += 1;
+ }
+ /* Convention: return sret pointer in rax. */
+ emit_mov_rr(mc, 1, X64_RAX, X64_RDI);
+ } else if (val->storage.kind == OPK_REG) {
+ if (val->storage.cls == RC_FP) {
+ u8 prefix2 = type_is_fp_double(val->storage.type) ? 0xF2 : 0xF3;
+ u32 sr = val->storage.v.reg & 0xFu;
+ if (sr != X64_XMM0) emit_sse_rr(mc, prefix2, 0x10, X64_XMM0, sr);
+ } else {
+ int w = type_is_64(val->storage.type) ? 1 : 0;
+ u32 sr = val->storage.v.reg & 0xFu;
+ if (sr != X64_RAX) emit_mov_rr(mc, w, X64_RAX, sr);
+ }
+ } else if (val->storage.kind == OPK_IMM) {
+ int w = type_is_64(val->storage.type) ? 1 : 0;
+ emit_load_imm(mc, w, X64_RAX, val->storage.v.imm);
+ } else if (val->storage.kind == OPK_LOCAL) {
+ /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */
+ XSlot* s = slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot");
+ const ABIArgInfo* ri2 = val->abi;
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
+ for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
+ const ABIArgPart* pt = &ri2->parts[i];
+ i32 off = -(i32)s->off + (i32)pt->src_offset;
+ if (pt->cls == ABI_CLASS_INT) {
+ emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++],
+ X64_RBP, off);
+ } else if (pt->cls == ABI_CLASS_FP) {
+ u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++),
+ X64_RBP, off);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl",
+ (int)pt->cls);
+ }
+ }
+ }
+ }
+ emit_jmp_label(mc, a->epilogue_label);
}
-static void xx_alloca_(CGTarget* t, Operand d, Operand s, u32 a) {
+/* ============================================================
+ * Stubs for unimplemented methods. */
+static void x_alloca_(CGTarget* t, Operand d, Operand s, u32 a) {
(void)d;
(void)s;
(void)a;
- xx_panic(t, "alloca");
+ x_panic(t, "alloca");
}
-static void xx_va_start_(CGTarget* t, Operand a) {
+static void x_va_start_(CGTarget* t, Operand a) {
(void)a;
- xx_panic(t, "va_start");
+ x_panic(t, "va_start");
}
-static void xx_va_arg_(CGTarget* t, Operand d, Operand a, const Type* ty) {
+static void x_va_arg_(CGTarget* t, Operand d, Operand a, const Type* ty) {
(void)d;
(void)a;
(void)ty;
- xx_panic(t, "va_arg");
+ x_panic(t, "va_arg");
}
-static void xx_va_end_(CGTarget* t, Operand a) {
+static void x_va_end_(CGTarget* t, Operand a) {
(void)a;
- xx_panic(t, "va_end");
+ (void)t;
}
-static void xx_va_copy_(CGTarget* t, Operand d, Operand s) {
+static void x_va_copy_(CGTarget* t, Operand d, Operand s) {
(void)d;
(void)s;
- xx_panic(t, "va_copy");
+ x_panic(t, "va_copy");
}
-static void xx_atomic_load(CGTarget* t, Operand d, Operand a, MemAccess m,
- MemOrder o) {
+static void x_atomic_load(CGTarget* t, Operand d, Operand ad, MemAccess m,
+ MemOrder o) {
(void)d;
- (void)a;
+ (void)ad;
(void)m;
(void)o;
- xx_panic(t, "atomic_load");
+ x_panic(t, "atomic_load");
}
-static void xx_atomic_store(CGTarget* t, Operand a, Operand s, MemAccess m,
- MemOrder o) {
- (void)a;
+static void x_atomic_store(CGTarget* t, Operand ad, Operand s, MemAccess m,
+ MemOrder o) {
+ (void)ad;
(void)s;
(void)m;
(void)o;
- xx_panic(t, "atomic_store");
+ x_panic(t, "atomic_store");
}
-static void xx_atomic_rmw(CGTarget* t, AtomicOp op, Operand d, Operand a,
- Operand v, MemAccess m, MemOrder o) {
+static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand d, Operand ad,
+ Operand v, MemAccess m, MemOrder o) {
(void)op;
(void)d;
- (void)a;
+ (void)ad;
(void)v;
(void)m;
(void)o;
- xx_panic(t, "atomic_rmw");
+ x_panic(t, "atomic_rmw");
}
-static void xx_atomic_cas(CGTarget* t, Operand p, Operand ok, Operand a,
- Operand e, Operand des, MemAccess m, MemOrder so,
- MemOrder fo) {
+static void x_atomic_cas(CGTarget* t, Operand p, Operand ok, Operand ad,
+ Operand e, Operand des, MemAccess m, MemOrder so,
+ MemOrder fo) {
(void)p;
(void)ok;
- (void)a;
+ (void)ad;
(void)e;
(void)des;
(void)m;
(void)so;
(void)fo;
- xx_panic(t, "atomic_cas");
+ x_panic(t, "atomic_cas");
}
-static void xx_fence(CGTarget* t, MemOrder o) {
+static void x_fence(CGTarget* t, MemOrder o) {
(void)o;
- xx_panic(t, "fence");
+ x_panic(t, "fence");
}
-static void xx_intrinsic(CGTarget* t, IntrinKind k, Operand* d, u32 nd,
- const Operand* a, u32 na) {
+static void x_intrinsic(CGTarget* t, IntrinKind k, Operand* d, u32 nd,
+ const Operand* a, u32 na) {
(void)k;
(void)d;
(void)nd;
(void)a;
(void)na;
- xx_panic(t, "intrinsic");
+ x_panic(t, "intrinsic");
}
-static void xx_asm_block(CGTarget* t, const char* tmpl,
- const AsmConstraint* outs, u32 no, Operand* oo,
- const AsmConstraint* ins, u32 ni, const Operand* io,
- const Sym* clobs, u32 nc) {
+static void x_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 no, Operand* oo,
+ const AsmConstraint* ins, u32 ni, const Operand* io,
+ const Sym* clobs, u32 nc) {
(void)tmpl;
(void)outs;
(void)no;
@@ -297,16 +1866,16 @@ static void xx_asm_block(CGTarget* t, const char* tmpl,
(void)io;
(void)clobs;
(void)nc;
- xx_panic(t, "asm_block");
+ x_panic(t, "asm_block");
}
-static void xx_set_loc(CGTarget* t, SrcLoc l) {
+static void x_set_loc(CGTarget* t, SrcLoc l) {
((XImpl*)t)->loc = l;
if (t->mc) t->mc->set_loc(t->mc, l);
}
-static void xx_finalize(CGTarget* t) { (void)t; }
-static void xx_destroy(CGTarget* t) { (void)t; }
+static void x_finalize(CGTarget* t) { (void)t; }
+static void x_destroy(CGTarget* t) { (void)t; }
static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
@@ -319,69 +1888,69 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->obj = o;
t->mc = m;
- t->func_begin = xx_func_begin;
- t->func_end = xx_func_end;
-
- t->alloc_reg = xx_alloc_reg;
- t->free_reg = xx_free_reg;
- t->frame_slot = xx_frame_slot;
- t->param = xx_param;
- t->clobbers = xx_clobbers;
- t->spill_reg = xx_spill_reg;
- t->reload_reg = xx_reload_reg;
-
- t->label_new = xx_label_new;
- t->label_place = xx_label_place;
- t->jump = xx_jump;
- t->cmp_branch = xx_cmp_branch;
-
- t->scope_begin = xx_scope_begin;
- t->scope_else = xx_scope_else;
- t->scope_end = xx_scope_end;
- t->break_to = xx_break_to;
- t->continue_to = xx_continue_to;
-
- t->load_imm = xx_load_imm;
- t->load_const = xx_load_const;
- t->copy = xx_copy;
- t->load = xx_load;
- t->store = xx_store;
- t->addr_of = xx_addr_of;
- t->tls_addr_of = xx_tls_addr_of;
- t->copy_bytes = xx_copy_bytes;
- t->set_bytes = xx_set_bytes;
- t->bitfield_load = xx_bitfield_load;
- t->bitfield_store = xx_bitfield_store;
-
- t->binop = xx_binop;
- t->unop = xx_unop;
- t->cmp = xx_cmp;
- t->convert = xx_convert;
-
- t->call = xx_call;
- t->ret = xx_ret;
-
- t->alloca_ = xx_alloca_;
- t->va_start_ = xx_va_start_;
- t->va_arg_ = xx_va_arg_;
- t->va_end_ = xx_va_end_;
- t->va_copy_ = xx_va_copy_;
+ t->func_begin = x_func_begin;
+ t->func_end = x_func_end;
+
+ t->alloc_reg = x_alloc_reg;
+ t->free_reg = x_free_reg;
+ t->frame_slot = x_frame_slot;
+ t->param = x_param;
+ t->clobbers = x_clobbers;
+ t->spill_reg = x_spill_reg;
+ t->reload_reg = x_reload_reg;
+
+ t->label_new = x_label_new;
+ t->label_place = x_label_place;
+ t->jump = x_jump;
+ t->cmp_branch = x_cmp_branch;
+
+ t->scope_begin = x_scope_begin;
+ t->scope_else = x_scope_else;
+ t->scope_end = x_scope_end;
+ t->break_to = x_break_to;
+ t->continue_to = x_continue_to;
+
+ t->load_imm = x_load_imm;
+ t->load_const = x_load_const;
+ t->copy = x_copy;
+ t->load = x_load;
+ t->store = x_store;
+ t->addr_of = x_addr_of;
+ t->tls_addr_of = x_tls_addr_of;
+ t->copy_bytes = x_copy_bytes;
+ t->set_bytes = x_set_bytes;
+ t->bitfield_load = x_bitfield_load;
+ t->bitfield_store = x_bitfield_store;
+
+ t->binop = x_binop;
+ t->unop = x_unop;
+ t->cmp = x_cmp;
+ t->convert = x_convert;
+
+ t->call = x_call;
+ t->ret = x_ret;
+
+ t->alloca_ = x_alloca_;
+ t->va_start_ = x_va_start_;
+ t->va_arg_ = x_va_arg_;
+ t->va_end_ = x_va_end_;
+ t->va_copy_ = x_va_copy_;
t->setjmp_ = NULL;
t->longjmp_ = NULL;
- t->atomic_load = xx_atomic_load;
- t->atomic_store = xx_atomic_store;
- t->atomic_rmw = xx_atomic_rmw;
- t->atomic_cas = xx_atomic_cas;
- t->fence = xx_fence;
+ t->atomic_load = x_atomic_load;
+ t->atomic_store = x_atomic_store;
+ t->atomic_rmw = x_atomic_rmw;
+ t->atomic_cas = x_atomic_cas;
+ t->fence = x_fence;
- t->intrinsic = xx_intrinsic;
- t->asm_block = xx_asm_block;
+ t->intrinsic = x_intrinsic;
+ t->asm_block = x_asm_block;
- t->set_loc = xx_set_loc;
- t->finalize = xx_finalize;
- t->destroy = xx_destroy;
+ t->set_loc = x_set_loc;
+ t->finalize = x_finalize;
+ t->destroy = x_destroy;
compiler_defer(c, cgt_cleanup, t);
return t;
diff --git a/src/arch/x64_isa.h b/src/arch/x64_isa.h
@@ -0,0 +1,75 @@
+/* x86_64 ISA helpers used by arch/x64.c.
+ *
+ * Only the constants here. Instruction encoders live in arch/x64.c
+ * because they're variable length and depend on the MCEmitter byte
+ * stream (REX prefix, ModR/M, SIB, displacement). The disassembler
+ * doesn't share these yet; if/when it does, a parallel x64_isa.c will
+ * host decode tables. */
+
+#ifndef CFREE_X64_ISA_H
+#define CFREE_X64_ISA_H
+
+#include "core/core.h"
+
+/* ---- GPR numbering (DWARF / ABI matches HW encoding 0..15) ---- */
+enum {
+ X64_RAX = 0,
+ X64_RCX = 1,
+ X64_RDX = 2,
+ X64_RBX = 3,
+ X64_RSP = 4,
+ X64_RBP = 5,
+ X64_RSI = 6,
+ X64_RDI = 7,
+ X64_R8 = 8,
+ X64_R9 = 9,
+ X64_R10 = 10,
+ X64_R11 = 11,
+ X64_R12 = 12,
+ X64_R13 = 13,
+ X64_R14 = 14,
+ X64_R15 = 15,
+};
+
+/* SSE register numbering — xmm0..xmm15 share encoding with r0..r15. */
+enum {
+ X64_XMM0 = 0,
+ X64_XMM1 = 1,
+ X64_XMM2 = 2,
+ X64_XMM3 = 3,
+ X64_XMM4 = 4,
+ X64_XMM5 = 5,
+ X64_XMM6 = 6,
+ X64_XMM7 = 7,
+ X64_XMM8 = 8,
+ X64_XMM15 = 15,
+};
+
+/* Condition codes for Jcc / SETcc / CMOVcc. Encoded in the low nibble. */
+enum {
+ X64_CC_O = 0x0,
+ X64_CC_NO = 0x1,
+ X64_CC_B = 0x2, /* below / CF=1 → CMP_LT_U */
+ X64_CC_AE = 0x3, /* above-or-equal / CF=0 → CMP_GE_U */
+ X64_CC_E = 0x4, /* equal / ZF=1 → CMP_EQ */
+ X64_CC_NE = 0x5, /* → CMP_NE */
+ X64_CC_BE = 0x6, /* below-or-equal / CF=1 or ZF=1 → CMP_LE_U */
+ X64_CC_A = 0x7, /* above / CF=0 and ZF=0 → CMP_GT_U */
+ X64_CC_S = 0x8,
+ X64_CC_NS = 0x9,
+ X64_CC_P = 0xA,
+ X64_CC_NP = 0xB,
+ X64_CC_L = 0xC, /* less (signed) / SF!=OF → CMP_LT_S */
+ X64_CC_GE = 0xD, /* → CMP_GE_S */
+ X64_CC_LE = 0xE, /* less-or-equal (signed) → CMP_LE_S */
+ X64_CC_G = 0xF, /* greater → CMP_GT_S */
+};
+
+/* REX prefix is 0x40 | W<<3 | R<<2 | X<<1 | B. */
+#define X64_REX_BASE 0x40u
+#define X64_REX_W 0x08u
+#define X64_REX_R 0x04u
+#define X64_REX_X 0x02u
+#define X64_REX_B 0x01u
+
+#endif