kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b871959ddf3e4188f507041b2f9f7181d2662750
parent 42ce19068af007468976b1b1cfcbfbde2594abc5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 15:15:37 -0700

test/cg: register Groups J, K, L (varargs, atomics, intrinsics)

Sets the test contract for the next round of CGTarget surface area:
varargs (`va_start_`/`va_arg_`/`va_end_`/`va_copy_`), atomics (load,
store, every `AtomicOp` rmw, cas success/failure, fence), and
intrinsics (popcount/ctz/clz/bswap, mem*, hint kinds, checked arith).
Cases drive the live `TargetABI` and CGTarget interfaces; each fails at
emit-time with the backend's existing "not implemented" diagnostic
until the aa64 lowerings land.

Diffstat:
Mtest/cg/CORPUS.md | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mtest/cg/harness/cases.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/cg/harness/cases_j.c | 584+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/cg/harness/cases_k.c | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/cg/harness/cases_l.c | 416+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/cg/run.sh | 3+++
6 files changed, 1392 insertions(+), 3 deletions(-)

diff --git a/test/cg/CORPUS.md b/test/cg/CORPUS.md @@ -239,13 +239,93 @@ can deref). | `i09_alloca_preserves_locals` | · | named `int` locals before+after alloca; both readable post-alloca | 42 | | `i10_alloca_after_named_local`| · | alloca after a fixed local — frame layout must keep both addressable | 42 | +## Group J — varargs + +Drives `va_start_`, `va_arg_`, `va_end_`, `va_copy_` on `CGTarget` and +the ABI's vararg classification (`abi_va_list_type` + the +`vararg_*_offset` fields on `ABIFuncInfo`). Each case pairs a variadic +helper (`int f(int n, ...)`) with a `test_main` caller; the helper +allocates an `ap` of `abi_va_list_type` size in a local slot and passes +its address to `va_start_`/`va_arg_`. AArch64 PCS routes int and FP var +args through separate save areas, so spill cases exist for each. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `j01_va_int_sum_3` | · | `int sum(int n, ...)`; `sum(3, 1, 2, 3)` (basic va_start/va_arg/va_end) | 6 | +| `j02_va_zero_args` | · | `sum(0)` — va_start/va_end with zero va_arg calls | 0 | +| `j03_va_int_spill` | · | `sum(10, 1..10)` — 10 var ints (>7 in GPR save area; rest spill) | 55 | +| `j04_va_int64` | · | `sum_ll(2, 21LL, 21LL)` — i64 var args; low 32 of sum | 42 | +| `j05_va_double_sum` | · | `int sumd(int n, ...){ftoi_s of fp accumulator}`; `sumd(3, 1.5, 2.0, 3.5)` | 7 | +| `j06_va_double_spill` | · | `sumd(9, 0.5×9)` — exhaust FP save area; last spills | 4 | +| `j07_va_mixed_int_dbl` | · | `int f(int n, int, double, int, double)`; sum truncated to int | 42 | +| `j08_va_copy` | · | `va_copy(b, a)`; consume first arg from each — equal halves of `42` | 42 | +| `j09_va_two_fixed` | · | `int f(int a, int b, ...) { return a+b+va_arg(); }` — second fixed slot | 42 | + +## Group K — atomics + +Exercises `atomic_load`, `atomic_store`, `atomic_rmw` (every `AtomicOp` +kind), `atomic_cas` (success and failure paths), and `fence`. Each case +stores into an `FSF_ADDR_TAKEN` i32/i64 local, performs one atomic op +via the helper's address operand, then reads back via plain load to +verify the post-state. `MemOrder` is varied across cases so a backend +that bakes an ordering bit reset wins consistency. A successful CAS +returns the prior; failure leaves memory unchanged. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `k01_atomic_load_relaxed` | · | `int x=42; r=atomic_load(&x, RELAXED); return r;` | 42 | +| `k02_atomic_store_load_acq` | · | `atomic_store(&x, 42, RELEASE); r=atomic_load(&x, ACQUIRE);` | 42 | +| `k03_atomic_load_seq_cst` | · | `atomic_load(&x, SEQ_CST)` — full barrier ordering | 42 | +| `k04_atomic_rmw_add` | · | `x=40; prior=rmw(ADD,&x,2,SEQ_CST); return atomic_load(&x);` post-state | 42 | +| `k05_atomic_rmw_xchg` | · | `x=99; rmw(XCHG,&x,42); return load(&x);` | 42 | +| `k06_atomic_rmw_and` | · | `x=0xFF; rmw(AND,&x,0x2A); return load(&x);` | 42 | +| `k07_atomic_rmw_or` | · | `x=0x20; rmw(OR,&x,0x0A); return load(&x);` | 42 | +| `k08_atomic_rmw_xor` | · | `x=0xFF; rmw(XOR,&x,0xD5); return load(&x);` (= 0x2A) | 42 | +| `k09_atomic_rmw_sub` | · | `x=44; rmw(SUB,&x,2); return load(&x);` | 42 | +| `k10_atomic_rmw_nand` | · | `x=0xFF; rmw(NAND,&x,0xD5);` post-state low 8 = `~(0xFF&0xD5)&0xFF = 0x2A` | 42 | +| `k11_atomic_cas_success` | · | `x=10; cas(&x,exp=10,des=42)→ok=1;` post-load | 42 | +| `k12_atomic_cas_failure` | · | `x=10; cas(&x,exp=99,des=42)→ok=0;` post-load (unchanged) | 10 | +| `k13_atomic_load_i64` | · | i64 atomic load of `0x1_0000_002A`; low 8 | 42 | +| `k14_atomic_rmw_prior` | · | return `prior` from `rmw(ADD,&x=40,2)` (not post-state) → 40 | 40 | +| `k15_fence_seq_cst` | · | `fence(SEQ_CST)` between two plain stores+loads; no observable race | 42 | + +## Group L — intrinsics + +Drives `CGTarget.intrinsic` across every `IntrinKind` group. Bit ops +return their result in a single REG dst. `MEMCPY`/`MEMMOVE`/`MEMSET` +take three address/byte/n args and write through memory. Hint kinds +(`PREFETCH`, `EXPECT`, `UNREACHABLE`, `TRAP`, `ASSUME_ALIGNED`) are +emitted on a path the test then steps over; the oracle is the post-hint +control flow. Checked-arith intrinsics return `(result, overflow_flag)` +in two REG dsts; cases observe each independently. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `l01_popcount_u32` | · | `popcount(0x000000FF) → 8` | 8 | +| `l02_popcount_u64` | · | `popcount((u64)-1) → 64` | 64 | +| `l03_ctz_u32` | · | `ctz(0x80) → 7` | 7 | +| `l04_clz_u32` | · | `clz(0x000000FF) → 24` (32-bit) | 24 | +| `l05_bswap16` | · | `bswap16(0x1234) → 0x3412`; low 8 | 18 | +| `l06_bswap32` | · | `bswap32(0x11223344) → 0x44332211`; low 8 | 17 | +| `l07_bswap64` | · | `bswap64(0x1122334455667788) → 0x8877665544332211`; low 8 | 17 | +| `l08_memcpy_4` | · | i32 src=42; `memcpy(&dst,&src,4)`; return dst | 42 | +| `l09_memmove_overlap` | · | `int a[5]={1,2,3,4,5}; memmove(a+1,a,16); return a[4];` (overlap-safe) | 4 | +| `l10_memset_zero` | · | `int b[4]; memset(b,0,16); return b[2];` | 0 | +| `l11_memset_ff` | · | `int b; memset(&b,0xFF,4); return b;` low 8 | 255 | +| `l12_expect_taken` | · | `if (__builtin_expect(x==1,1)) return 42;` with `x=1` | 42 | +| `l13_unreachable_live` | · | `if (x) return 42; else __builtin_unreachable();` with `x=1` | 42 | +| `l14_trap_live` | · | `if (x) return 42; else __builtin_trap();` with `x=1` — trap path unreached | 42 | +| `l15_prefetch_noop` | · | `__builtin_prefetch(p); *p = 42; return *p;` — hint must not corrupt p | 42 | +| `l16_assume_aligned` | · | `p = assume_aligned(p,8); *p=42; return *p;` — hint must round-trip p | 42 | +| `l17_add_overflow_no` | · | `add_overflow(20,22,&r) → ovf=0`; return `r` | 42 | +| `l18_add_overflow_yes` | · | `add_overflow(INT_MAX,1,&r) → ovf=1`; return `ovf` | 1 | +| `l19_sub_overflow_yes` | · | `sub_overflow(INT_MIN,1,&r) → ovf=1`; return `ovf` | 1 | +| `l20_mul_overflow_no` | · | `mul_overflow(6,7,&r) → ovf=0`; return `r` | 42 | + ## Deferred groups | Group | Theme | |---|---| -| J | varargs | -| K | atomics | -| L | intrinsics | | M | inline asm | | N | TLS | | O | sections + globals | diff --git a/test/cg/harness/cases.c b/test/cg/harness/cases.c @@ -139,6 +139,53 @@ void build_i08_vla_param_sum(CgTestCtx*); void build_i09_alloca_preserves_locals(CgTestCtx*); void build_i10_alloca_after_named_local(CgTestCtx*); +void build_j01_va_int_sum_3(CgTestCtx*); +void build_j02_va_zero_args(CgTestCtx*); +void build_j03_va_int_spill(CgTestCtx*); +void build_j04_va_int64(CgTestCtx*); +void build_j05_va_double_sum(CgTestCtx*); +void build_j06_va_double_spill(CgTestCtx*); +void build_j07_va_mixed_int_dbl(CgTestCtx*); +void build_j08_va_copy(CgTestCtx*); +void build_j09_va_two_fixed(CgTestCtx*); + +void build_k01_atomic_load_relaxed(CgTestCtx*); +void build_k02_atomic_store_load_acq(CgTestCtx*); +void build_k03_atomic_load_seq_cst(CgTestCtx*); +void build_k04_atomic_rmw_add(CgTestCtx*); +void build_k05_atomic_rmw_xchg(CgTestCtx*); +void build_k06_atomic_rmw_and(CgTestCtx*); +void build_k07_atomic_rmw_or(CgTestCtx*); +void build_k08_atomic_rmw_xor(CgTestCtx*); +void build_k09_atomic_rmw_sub(CgTestCtx*); +void build_k10_atomic_rmw_nand(CgTestCtx*); +void build_k11_atomic_cas_success(CgTestCtx*); +void build_k12_atomic_cas_failure(CgTestCtx*); +void build_k13_atomic_load_i64(CgTestCtx*); +void build_k14_atomic_rmw_prior(CgTestCtx*); +void build_k15_fence_seq_cst(CgTestCtx*); + +void build_l01_popcount_u32(CgTestCtx*); +void build_l02_popcount_u64(CgTestCtx*); +void build_l03_ctz_u32(CgTestCtx*); +void build_l04_clz_u32(CgTestCtx*); +void build_l05_bswap16(CgTestCtx*); +void build_l06_bswap32(CgTestCtx*); +void build_l07_bswap64(CgTestCtx*); +void build_l08_memcpy_4(CgTestCtx*); +void build_l09_memmove_overlap(CgTestCtx*); +void build_l10_memset_zero(CgTestCtx*); +void build_l11_memset_ff(CgTestCtx*); +void build_l12_expect_taken(CgTestCtx*); +void build_l13_unreachable_live(CgTestCtx*); +void build_l14_trap_live(CgTestCtx*); +void build_l15_prefetch_noop(CgTestCtx*); +void build_l16_assume_aligned(CgTestCtx*); +void build_l17_add_overflow_no(CgTestCtx*); +void build_l18_add_overflow_yes(CgTestCtx*); +void build_l19_sub_overflow_yes(CgTestCtx*); +void build_l20_mul_overflow_no(CgTestCtx*); + /* ---- registry ---- */ const CgCase cg_cases[] = { @@ -274,6 +321,56 @@ const CgCase cg_cases[] = { { "i08_vla_param_sum", build_i08_vla_param_sum, 45, CG_CASE_DEFAULT }, { "i09_alloca_preserves_locals", build_i09_alloca_preserves_locals, 42, CG_CASE_DEFAULT }, { "i10_alloca_after_named_local", build_i10_alloca_after_named_local, 42, CG_CASE_DEFAULT }, + + /* Group J — varargs */ + { "j01_va_int_sum_3", build_j01_va_int_sum_3, 6, CG_CASE_DEFAULT }, + { "j02_va_zero_args", build_j02_va_zero_args, 0, CG_CASE_DEFAULT }, + { "j03_va_int_spill", build_j03_va_int_spill, 55, CG_CASE_DEFAULT }, + { "j04_va_int64", build_j04_va_int64, 42, CG_CASE_DEFAULT }, + { "j05_va_double_sum", build_j05_va_double_sum, 7, CG_CASE_DEFAULT }, + { "j06_va_double_spill", build_j06_va_double_spill, 4, CG_CASE_DEFAULT }, + { "j07_va_mixed_int_dbl", build_j07_va_mixed_int_dbl, 42, CG_CASE_DEFAULT }, + { "j08_va_copy", build_j08_va_copy, 42, CG_CASE_DEFAULT }, + { "j09_va_two_fixed", build_j09_va_two_fixed, 42, CG_CASE_DEFAULT }, + + /* Group K — atomics */ + { "k01_atomic_load_relaxed", build_k01_atomic_load_relaxed, 42, CG_CASE_DEFAULT }, + { "k02_atomic_store_load_acq", build_k02_atomic_store_load_acq, 42, CG_CASE_DEFAULT }, + { "k03_atomic_load_seq_cst", build_k03_atomic_load_seq_cst, 42, CG_CASE_DEFAULT }, + { "k04_atomic_rmw_add", build_k04_atomic_rmw_add, 42, CG_CASE_DEFAULT }, + { "k05_atomic_rmw_xchg", build_k05_atomic_rmw_xchg, 42, CG_CASE_DEFAULT }, + { "k06_atomic_rmw_and", build_k06_atomic_rmw_and, 42, CG_CASE_DEFAULT }, + { "k07_atomic_rmw_or", build_k07_atomic_rmw_or, 42, CG_CASE_DEFAULT }, + { "k08_atomic_rmw_xor", build_k08_atomic_rmw_xor, 42, CG_CASE_DEFAULT }, + { "k09_atomic_rmw_sub", build_k09_atomic_rmw_sub, 42, CG_CASE_DEFAULT }, + { "k10_atomic_rmw_nand", build_k10_atomic_rmw_nand, 42, CG_CASE_DEFAULT }, + { "k11_atomic_cas_success", build_k11_atomic_cas_success, 42, CG_CASE_DEFAULT }, + { "k12_atomic_cas_failure", build_k12_atomic_cas_failure, 10, CG_CASE_DEFAULT }, + { "k13_atomic_load_i64", build_k13_atomic_load_i64, 42, CG_CASE_DEFAULT }, + { "k14_atomic_rmw_prior", build_k14_atomic_rmw_prior, 40, CG_CASE_DEFAULT }, + { "k15_fence_seq_cst", build_k15_fence_seq_cst, 42, CG_CASE_DEFAULT }, + + /* Group L — intrinsics */ + { "l01_popcount_u32", build_l01_popcount_u32, 8, CG_CASE_DEFAULT }, + { "l02_popcount_u64", build_l02_popcount_u64, 64, CG_CASE_DEFAULT }, + { "l03_ctz_u32", build_l03_ctz_u32, 7, CG_CASE_DEFAULT }, + { "l04_clz_u32", build_l04_clz_u32, 24, CG_CASE_DEFAULT }, + { "l05_bswap16", build_l05_bswap16, 18, CG_CASE_DEFAULT }, + { "l06_bswap32", build_l06_bswap32, 17, CG_CASE_DEFAULT }, + { "l07_bswap64", build_l07_bswap64, 17, CG_CASE_DEFAULT }, + { "l08_memcpy_4", build_l08_memcpy_4, 42, CG_CASE_DEFAULT }, + { "l09_memmove_overlap", build_l09_memmove_overlap, 4, CG_CASE_DEFAULT }, + { "l10_memset_zero", build_l10_memset_zero, 0, CG_CASE_DEFAULT }, + { "l11_memset_ff", build_l11_memset_ff, 255, CG_CASE_DEFAULT }, + { "l12_expect_taken", build_l12_expect_taken, 42, CG_CASE_DEFAULT }, + { "l13_unreachable_live", build_l13_unreachable_live, 42, CG_CASE_DEFAULT }, + { "l14_trap_live", build_l14_trap_live, 42, CG_CASE_DEFAULT }, + { "l15_prefetch_noop", build_l15_prefetch_noop, 42, CG_CASE_DEFAULT }, + { "l16_assume_aligned", build_l16_assume_aligned, 42, CG_CASE_DEFAULT }, + { "l17_add_overflow_no", build_l17_add_overflow_no, 42, CG_CASE_DEFAULT }, + { "l18_add_overflow_yes", build_l18_add_overflow_yes, 1, CG_CASE_DEFAULT }, + { "l19_sub_overflow_yes", build_l19_sub_overflow_yes, 1, CG_CASE_DEFAULT }, + { "l20_mul_overflow_no", build_l20_mul_overflow_no, 42, CG_CASE_DEFAULT }, }; const unsigned cg_cases_count = sizeof(cg_cases) / sizeof(cg_cases[0]); diff --git a/test/cg/harness/cases_j.c b/test/cg/harness/cases_j.c @@ -0,0 +1,584 @@ +/* Group J — varargs. + * See CORPUS.md for the case list and expected values. */ + +#include "cg_test.h" + +#include "core/pool.h" +#include "core/arena.h" + +#include <string.h> + +/* ============================================================ + * Group J: varargs + * + * Drives va_start_/va_arg_/va_end_/va_copy_ on CGTarget plus the ABI's + * variadic classification (abi_func_info on a type_func with variadic=1 + * carries vararg_gp_offset/vararg_fp_offset/vararg_overflow_offset). The + * standard cgtest_begin_func/cgtest_call helpers hardcode variadic=0 so + * this file mirrors the variadic-aware paths locally. + * + * Each test_main calls a variadic helper. The helper allocates an `ap` + * local of abi_va_list_type size (FSF_ADDR_TAKEN), invokes va_start_ + * with &ap, runs n va_arg_'s, calls va_end_, and returns the + * accumulator. + * ============================================================ */ + +/* ---- variadic-aware helpers ---- */ + +/* Mirrors cgtest_begin_func_at but builds fn_type with variadic=1. + * The caller passes the count of fixed (named) params; var args are + * appended at the call site. */ +static CgTestFn* j_begin_va_func(CgTestCtx* ctx, const char* name, + const Type* ret_ty, + const Type* const* fixed_param_types, + u32 nfixed) +{ + CgTestFn* tf = arena_new(ctx->c->tu, CgTestFn); + memset(tf, 0, sizeof *tf); + tf->ctx = ctx; + tf->ret_ty = ret_ty; + + const Type** ptypes = NULL; + if (nfixed) { + ptypes = arena_array(ctx->c->tu, const Type*, nfixed); + for (u32 i = 0; i < nfixed; ++i) ptypes[i] = fixed_param_types[i]; + } + tf->fn_type = type_func(ctx->pool, ret_ty, ptypes, (u16)nfixed, 1); + tf->abi_info = abi_func_info(ctx->c->abi, tf->fn_type); + tf->sym = cgtest_decl_func(ctx, name); + + CGParamDesc* pds = NULL; + if (nfixed) { + tf->params = arena_array(ctx->c->tu, CgTestParam, nfixed); + memset(tf->params, 0, sizeof(CgTestParam) * nfixed); + pds = arena_array(ctx->c->tu, CGParamDesc, nfixed); + memset(pds, 0, sizeof(CGParamDesc) * nfixed); + for (u32 i = 0; i < nfixed; ++i) { + tf->params[i].type = ptypes[i]; + tf->params[i].abi = &tf->abi_info->params[i]; + pds[i].index = i; + pds[i].type = ptypes[i]; + pds[i].slot = FRAME_SLOT_NONE; + pds[i].abi = &tf->abi_info->params[i]; + pds[i].incoming = tf->abi_info->params[i].parts; + pds[i].nincoming= tf->abi_info->params[i].nparts; + } + } + tf->nparams = nfixed; + + tf->fd.sym = tf->sym; + tf->fd.text_section_id = ctx->text_sec; + tf->fd.group_id = OBJ_GROUP_NONE; + tf->fd.fn_type = tf->fn_type; + tf->fd.abi = tf->abi_info; + tf->fd.params = pds; + tf->fd.nparams = nfixed; + + ctx->target->func_begin(ctx->target, &tf->fd); + + for (u32 i = 0; i < nfixed; ++i) { + FrameSlotDesc fsd = { + .type = ptypes[i], + .size = abi_sizeof (ctx->c->abi, ptypes[i]), + .align = abi_alignof(ctx->c->abi, ptypes[i]), + .kind = FS_PARAM, + .flags = FSF_NONE, + }; + FrameSlot s = ctx->target->frame_slot(ctx->target, &fsd); + tf->params[i].slot = s; + pds[i].slot = s; + ctx->target->param(ctx->target, &pds[i]); + } + return tf; +} + +/* Direct call to a variadic callee. fn_type built with variadic=1; the + * abi info reports per-arg classification including the ABI's + * vararg-vs-fixed split. */ +static void j_call_va(CgTestFn* caller, ObjSymId callee_sym, + const Type* ret_ty, + const Type* const* arg_types, + const CgTestArg* args, + u32 nargs, u32 nfixed, + Operand ret_storage) +{ + CgTestCtx* ctx = caller->ctx; + const Type** ptypes = NULL; + if (nargs) { + ptypes = arena_array(ctx->c->tu, const Type*, nargs); + for (u32 i = 0; i < nargs; ++i) ptypes[i] = arg_types[i]; + } + /* type_func with variadic=1; nparams is the fixed count. nfixed must + * match the helper's named-param count even though we pass nargs + * Type pointers — abi_func_info reads its variadic flag from the + * Type and handles per-arg classification via ABIFuncInfo.params[]. */ + const Type* fn_ty = type_func(ctx->pool, ret_ty, ptypes, (u16)nfixed, 1); + const ABIFuncInfo* info = abi_func_info(ctx->c->abi, fn_ty); + + CGABIValue* avs = NULL; + if (nargs) { + avs = arena_array(ctx->c->tu, CGABIValue, nargs); + memset(avs, 0, sizeof(CGABIValue) * nargs); + for (u32 i = 0; i < nargs; ++i) { + CGABIValue* av = &avs[i]; + av->type = arg_types[i]; + av->abi = (i < info->nparams) ? &info->params[i] : NULL; + switch (args[i].kind) { + case CGT_ARG_IMM: + av->storage = IMM_op(args[i].v.imm, arg_types[i]); break; + case CGT_ARG_REG: + av->storage = REG_op(args[i].v.reg, arg_types[i]); break; + default: + av->storage = LOCAL_op(args[i].v.slot, arg_types[i]); break; + } + } + } + + CGCallDesc desc; memset(&desc, 0, sizeof desc); + desc.fn_type = fn_ty; + desc.abi = info; + desc.callee = GLOBAL_op(callee_sym, 0); + desc.args = avs; + desc.nargs = nargs; + desc.ret.type = ret_ty; + desc.ret.abi = &info->ret; + desc.ret.storage = ret_storage; + ctx->target->call(ctx->target, &desc); +} + +/* ---- shared helpers ---- */ + +/* Allocate an ap local of abi_va_list_type and addr_of into a register. */ +typedef struct VaApRegs { FrameSlot slot; Reg ap_addr; const Type* ap_ty; } VaApRegs; + +static VaApRegs j_alloc_ap(CgTestFn* tf) +{ + CgTestCtx* ctx = tf->ctx; + const Type* ap_ty = abi_va_list_type(ctx->c->abi, ctx->pool); + const Type* ap_pty = T_ptr(ctx, ap_ty); + FrameSlot ap_slot = cgtest_local(tf, ap_ty, FSF_ADDR_TAKEN); + Reg ap_addr = ctx->target->alloc_reg(ctx->target, RC_INT, ap_pty); + ctx->target->addr_of(ctx->target, REG_op(ap_addr, ap_pty), + LOCAL_op(ap_slot, ap_ty)); + return (VaApRegs){ ap_slot, ap_addr, ap_ty }; +} + +/* Build helper: int sum(int n, ...) { va_start(ap); int s=0; for(i=0;i<n;i++) + * s += va_arg(ap, T); va_end(ap); return s; } — T is the va_arg type. */ +static ObjSymId j_build_int_sum_helper(CgTestCtx* ctx, const char* name, + const Type* va_ty, const Type* acc_ty) +{ + const Type* I32 = T_i32(ctx); + const Type* params[] = { I32 }; + CgTestFn* tf = j_begin_va_func(ctx, name, acc_ty, params, 1); + CGTarget* T = ctx->target; + + Reg n = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(n, I32), cgtest_param_slot(tf, 0), I32); + + VaApRegs ap = j_alloc_ap(tf); + T->va_start_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + + /* Accumulator starts at 0. */ + FrameSlot ss = cgtest_local(tf, acc_ty, FSF_NONE); + cgtest_store_local(tf, ss, IMM_op(0, acc_ty), acc_ty); + FrameSlot is = cgtest_local(tf, I32, FSF_NONE); + cgtest_store_local(tf, is, IMM_op(0, I32), I32); + + Label top = T->label_new(T); + Label end = T->label_new(T); + T->label_place(T, top); + Reg ir = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(ir, I32), is, I32); + T->cmp_branch(T, CMP_GE_S, REG_op(ir, I32), REG_op(n, I32), end); + + Reg v = T->alloc_reg(T, RC_INT, va_ty); + T->va_arg_(T, REG_op(v, va_ty), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), va_ty); + + Reg sr = T->alloc_reg(T, RC_INT, acc_ty); + cgtest_load_local(tf, REG_op(sr, acc_ty), ss, acc_ty); + T->binop(T, BO_IADD, REG_op(sr, acc_ty), REG_op(sr, acc_ty), REG_op(v, va_ty)); + cgtest_store_local(tf, ss, REG_op(sr, acc_ty), acc_ty); + + T->binop(T, BO_IADD, REG_op(ir, I32), REG_op(ir, I32), IMM_op(1, I32)); + cgtest_store_local(tf, is, REG_op(ir, I32), I32); + T->jump(T, top); + T->label_place(T, end); + + T->va_end_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + Reg out = T->alloc_reg(T, RC_INT, acc_ty); + cgtest_load_local(tf, REG_op(out, acc_ty), ss, acc_ty); + cgtest_ret_reg(tf, out, acc_ty); + cgtest_end(tf); + return tf->sym; +} + +/* Build helper: int sumd(int n, ...) — fp accumulator, ftoi_s before return. */ +static ObjSymId j_build_double_sum_helper(CgTestCtx* ctx, const char* name) +{ + const Type* I32 = T_i32(ctx); + const Type* F64 = T_f64(ctx); + const Type* params[] = { I32 }; + CgTestFn* tf = j_begin_va_func(ctx, name, I32, params, 1); + CGTarget* T = ctx->target; + + Reg n = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(n, I32), cgtest_param_slot(tf, 0), I32); + + VaApRegs ap = j_alloc_ap(tf); + T->va_start_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + + FrameSlot ss = cgtest_local(tf, F64, FSF_NONE); + Reg zero = T->alloc_reg(T, RC_FP, F64); + /* Materialize 0.0 via a u64 zero bitcast: easier — use convert(0). */ + Reg iz = T->alloc_reg(T, RC_INT, I32); + T->load_imm(T, REG_op(iz, I32), 0); + T->convert(T, CV_ITOF_S, REG_op(zero, F64), REG_op(iz, I32)); + cgtest_store_local(tf, ss, REG_op(zero, F64), F64); + + FrameSlot is = cgtest_local(tf, I32, FSF_NONE); + cgtest_store_local(tf, is, IMM_op(0, I32), I32); + + Label top = T->label_new(T); + Label end = T->label_new(T); + T->label_place(T, top); + Reg ir = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(ir, I32), is, I32); + T->cmp_branch(T, CMP_GE_S, REG_op(ir, I32), REG_op(n, I32), end); + + Reg v = T->alloc_reg(T, RC_FP, F64); + T->va_arg_(T, REG_op(v, F64), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), F64); + + Reg sr = T->alloc_reg(T, RC_FP, F64); + cgtest_load_local(tf, REG_op(sr, F64), ss, F64); + T->binop(T, BO_FADD, REG_op(sr, F64), REG_op(sr, F64), REG_op(v, F64)); + cgtest_store_local(tf, ss, REG_op(sr, F64), F64); + + T->binop(T, BO_IADD, REG_op(ir, I32), REG_op(ir, I32), IMM_op(1, I32)); + cgtest_store_local(tf, is, REG_op(ir, I32), I32); + T->jump(T, top); + T->label_place(T, end); + + T->va_end_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + Reg final = T->alloc_reg(T, RC_FP, F64); + cgtest_load_local(tf, REG_op(final, F64), ss, F64); + Reg ir32 = T->alloc_reg(T, RC_INT, I32); + T->convert(T, CV_FTOI_S, REG_op(ir32, I32), REG_op(final, F64)); + cgtest_ret_reg(tf, ir32, I32); + cgtest_end(tf); + return tf->sym; +} + +/* ---- cases ---- */ + +/* j01_va_int_sum_3 — sum(3, 1, 2, 3) → 6. */ +void build_j01_va_int_sum_3(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + ObjSymId sum = j_build_int_sum_helper(ctx, "j01_sum", I32, I32); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + const Type* atypes[] = { I32, I32, I32, I32 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 3 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 1 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 2 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 3 }, + }; + j_call_va(tf, sum, I32, atypes, args, 4, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* j02_va_zero_args — sum(0); va_start/va_end with no va_arg → 0. */ +void build_j02_va_zero_args(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + ObjSymId sum = j_build_int_sum_helper(ctx, "j02_sum", I32, I32); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + const Type* atypes[] = { I32 }; + CgTestArg args[] = { { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 0 } }; + j_call_va(tf, sum, I32, atypes, args, 1, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* j03_va_int_spill — sum(10, 1..10) → 55. Exhausts AArch64 GPR save area. */ +void build_j03_va_int_spill(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + ObjSymId sum = j_build_int_sum_helper(ctx, "j03_sum", I32, I32); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + const Type* atypes[11] = { I32, I32, I32, I32, I32, I32, I32, I32, I32, I32, I32 }; + CgTestArg args[11]; + args[0] = (CgTestArg){ .kind = CGT_ARG_IMM, .type = I32, .v.imm = 10 }; + for (int i = 0; i < 10; ++i) { + args[i+1] = (CgTestArg){ .kind = CGT_ARG_IMM, .type = I32, .v.imm = i+1 }; + } + j_call_va(tf, sum, I32, atypes, args, 11, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* j04_va_int64 — sum_ll(2, 21LL, 21LL); low 32 of result → 42. */ +void build_j04_va_int64(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + ObjSymId sum = j_build_int_sum_helper(ctx, "j04_sum_ll", I64, I64); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + Reg r64 = T->alloc_reg(T, RC_INT, I64); + const Type* atypes[] = { I32, I64, I64 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 2 }, + { .kind = CGT_ARG_IMM, .type = I64, .v.imm = 21 }, + { .kind = CGT_ARG_IMM, .type = I64, .v.imm = 21 }, + }; + j_call_va(tf, sum, I64, atypes, args, 3, 1, REG_op(r64, I64)); + /* Truncate to i32. */ + Reg r32 = T->alloc_reg(T, RC_INT, I32); + T->convert(T, CV_TRUNC, REG_op(r32, I32), REG_op(r64, I64)); + cgtest_ret_reg(tf, r32, I32); + cgtest_end(tf); +} + +/* ---- helpers for fp + double-arg passing ---- */ + +/* Emit a call_const for a double-precision FP constant from raw little-endian + * bytes; returns the FP reg. */ +static Reg j_load_f64(CgTestCtx* ctx, const u8* bytes_le8) +{ + const Type* F64 = T_f64(ctx); + Reg r = ctx->target->alloc_reg(ctx->target, RC_FP, F64); + ConstBytes cb = { .type = F64, .bytes = bytes_le8, .size = 8, .align = 8 }; + ctx->target->load_const(ctx->target, REG_op(r, F64), cb); + return r; +} + +/* j05_va_double_sum — sumd(3, 1.5, 2.0, 3.5) → 7. */ +void build_j05_va_double_sum(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* F64 = T_f64(ctx); + ObjSymId sumd = j_build_double_sum_helper(ctx, "j05_sumd"); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + /* 1.5, 2.0, 3.5 as little-endian double bytes. */ + static const u8 D15[8] = {0,0,0,0,0,0,0xF8,0x3F}; + static const u8 D20[8] = {0,0,0,0,0,0,0x00,0x40}; + static const u8 D35[8] = {0,0,0,0,0,0,0x0C,0x40}; + Reg r1 = j_load_f64(ctx, D15); + Reg r2 = j_load_f64(ctx, D20); + Reg r3 = j_load_f64(ctx, D35); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + + const Type* atypes[] = { I32, F64, F64, F64 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 3 }, + { .kind = CGT_ARG_REG, .type = F64, .v.reg = r1 }, + { .kind = CGT_ARG_REG, .type = F64, .v.reg = r2 }, + { .kind = CGT_ARG_REG, .type = F64, .v.reg = r3 }, + }; + j_call_va(tf, sumd, I32, atypes, args, 4, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* j06_va_double_spill — sumd(9, 0.5×9) → 4 (after ftoi_s of 4.5). */ +void build_j06_va_double_spill(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* F64 = T_f64(ctx); + ObjSymId sumd = j_build_double_sum_helper(ctx, "j06_sumd"); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + static const u8 D05[8] = {0,0,0,0,0,0,0xE0,0x3F}; /* 0.5 */ + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + + const Type* atypes[10] = { I32, F64, F64, F64, F64, F64, F64, F64, F64, F64 }; + CgTestArg args[10]; + args[0] = (CgTestArg){ .kind = CGT_ARG_IMM, .type = I32, .v.imm = 9 }; + for (int i = 0; i < 9; ++i) { + Reg r = j_load_f64(ctx, D05); + args[i+1] = (CgTestArg){ .kind = CGT_ARG_REG, .type = F64, .v.reg = r }; + } + j_call_va(tf, sumd, I32, atypes, args, 10, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* helper for j07: int f(int n, int a, double b, int c, double d) — fixed n, + * then 4 var args of mixed kind. Body sums int+(int)b+int+(int)d. */ +static ObjSymId j_build_j07_helper(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* F64 = T_f64(ctx); + const Type* params[] = { I32 }; + CgTestFn* tf = j_begin_va_func(ctx, "j07_f", I32, params, 1); + CGTarget* T = ctx->target; + + VaApRegs ap = j_alloc_ap(tf); + T->va_start_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + + Reg a = T->alloc_reg(T, RC_INT, I32); + Reg c = T->alloc_reg(T, RC_INT, I32); + Reg b = T->alloc_reg(T, RC_FP, F64); + Reg d = T->alloc_reg(T, RC_FP, F64); + T->va_arg_(T, REG_op(a, I32), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), I32); + T->va_arg_(T, REG_op(b, F64), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), F64); + T->va_arg_(T, REG_op(c, I32), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), I32); + T->va_arg_(T, REG_op(d, F64), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), F64); + + Reg ib = T->alloc_reg(T, RC_INT, I32); + Reg id = T->alloc_reg(T, RC_INT, I32); + T->convert(T, CV_FTOI_S, REG_op(ib, I32), REG_op(b, F64)); + T->convert(T, CV_FTOI_S, REG_op(id, I32), REG_op(d, F64)); + Reg s = T->alloc_reg(T, RC_INT, I32); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(a, I32), REG_op(ib, I32)); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(s, I32), REG_op(c, I32)); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(s, I32), REG_op(id, I32)); + + T->va_end_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + cgtest_ret_reg(tf, s, I32); + cgtest_end(tf); + return tf->sym; +} + +/* j07_va_mixed_int_dbl — f(_, 10, 16.5, 7, 8.5) → 10+16+7+8 = 41 truncated. + * Adjust constants so int sum lands at 42: 10 + (int)16.0 + 8 + (int)8.0 = 42. + */ +void build_j07_va_mixed_int_dbl(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* F64 = T_f64(ctx); + ObjSymId f = j_build_j07_helper(ctx); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + static const u8 D16[8] = {0,0,0,0,0,0,0x30,0x40}; /* 16.0 */ + static const u8 D08[8] = {0,0,0,0,0,0,0x20,0x40}; /* 8.0 */ + Reg b = j_load_f64(ctx, D16); + Reg d = j_load_f64(ctx, D08); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + + const Type* atypes[] = { I32, I32, F64, I32, F64 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 0 /* unused n */ }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 10 }, + { .kind = CGT_ARG_REG, .type = F64, .v.reg = b }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 8 }, + { .kind = CGT_ARG_REG, .type = F64, .v.reg = d }, + }; + j_call_va(tf, f, I32, atypes, args, 5, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* helper for j08: int f(int n, ...) { va_list a, b; va_start(a); va_copy(b,a); + * int x = va_arg(a, int); int y = va_arg(b, int); return x + y; } + * Both ap and bp see the same first var arg, so x == y. */ +static ObjSymId j_build_j08_helper(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* params[] = { I32 }; + CgTestFn* tf = j_begin_va_func(ctx, "j08_f", I32, params, 1); + CGTarget* T = ctx->target; + + /* Two va_list locals + their addresses. */ + const Type* ap_ty = abi_va_list_type(ctx->c->abi, ctx->pool); + const Type* ap_pty = T_ptr(ctx, ap_ty); + FrameSlot ap = cgtest_local(tf, ap_ty, FSF_ADDR_TAKEN); + FrameSlot bp = cgtest_local(tf, ap_ty, FSF_ADDR_TAKEN); + Reg a_addr = T->alloc_reg(T, RC_INT, ap_pty); + Reg b_addr = T->alloc_reg(T, RC_INT, ap_pty); + T->addr_of(T, REG_op(a_addr, ap_pty), LOCAL_op(ap, ap_ty)); + T->addr_of(T, REG_op(b_addr, ap_pty), LOCAL_op(bp, ap_ty)); + + T->va_start_(T, REG_op(a_addr, ap_pty)); + T->va_copy_ (T, REG_op(b_addr, ap_pty), REG_op(a_addr, ap_pty)); + + Reg x = T->alloc_reg(T, RC_INT, I32); + Reg y = T->alloc_reg(T, RC_INT, I32); + T->va_arg_(T, REG_op(x, I32), REG_op(a_addr, ap_pty), I32); + T->va_arg_(T, REG_op(y, I32), REG_op(b_addr, ap_pty), I32); + + Reg s = T->alloc_reg(T, RC_INT, I32); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(x, I32), REG_op(y, I32)); + + T->va_end_(T, REG_op(a_addr, ap_pty)); + T->va_end_(T, REG_op(b_addr, ap_pty)); + cgtest_ret_reg(tf, s, I32); + cgtest_end(tf); + return tf->sym; +} + +/* j08_va_copy — f(_, 21) → 21+21 = 42 (both va_lists see arg 0). */ +void build_j08_va_copy(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + ObjSymId f = j_build_j08_helper(ctx); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + const Type* atypes[] = { I32, I32 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 0 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 21 }, + }; + j_call_va(tf, f, I32, atypes, args, 2, 1, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} + +/* helper for j09: int f(int a, int b, ...) { va_list ap; va_start(ap, b); + * int c = va_arg(ap, int); va_end(ap); return a + b + c; } */ +static ObjSymId j_build_j09_helper(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* params[] = { I32, I32 }; + CgTestFn* tf = j_begin_va_func(ctx, "j09_f", I32, params, 2); + CGTarget* T = ctx->target; + + Reg a = T->alloc_reg(T, RC_INT, I32); + Reg b = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(a, I32), cgtest_param_slot(tf, 0), I32); + cgtest_load_local(tf, REG_op(b, I32), cgtest_param_slot(tf, 1), I32); + + VaApRegs ap = j_alloc_ap(tf); + T->va_start_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + Reg c = T->alloc_reg(T, RC_INT, I32); + T->va_arg_(T, REG_op(c, I32), REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty)), I32); + T->va_end_(T, REG_op(ap.ap_addr, T_ptr(ctx, ap.ap_ty))); + + Reg s = T->alloc_reg(T, RC_INT, I32); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(a, I32), REG_op(b, I32)); + T->binop(T, BO_IADD, REG_op(s, I32), REG_op(s, I32), REG_op(c, I32)); + cgtest_ret_reg(tf, s, I32); + cgtest_end(tf); + return tf->sym; +} + +/* j09_va_two_fixed — f(10, 15, 17) → 42. */ +void build_j09_va_two_fixed(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + ObjSymId f = j_build_j09_helper(ctx); + + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg dst = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + const Type* atypes[] = { I32, I32, I32 }; + CgTestArg args[] = { + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 10 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 15 }, + { .kind = CGT_ARG_IMM, .type = I32, .v.imm = 17 }, + }; + j_call_va(tf, f, I32, atypes, args, 3, 2, REG_op(dst, I32)); + cgtest_ret_reg(tf, dst, I32); + cgtest_end(tf); +} diff --git a/test/cg/harness/cases_k.c b/test/cg/harness/cases_k.c @@ -0,0 +1,209 @@ +/* Group K — atomics. + * See CORPUS.md for the case list and expected values. */ + +#include "cg_test.h" + +/* ============================================================ + * Group K: atomics + * + * Drives atomic_load / atomic_store / atomic_rmw / atomic_cas / fence + * on CGTarget across every AtomicOp and several MemOrders. Every case + * uses an FSF_ADDR_TAKEN i32 (or i64 for k13) local as the atomic + * object: store-into via plain store sets the prior state, the atomic + * op is then dispatched against the address, and a plain load after + * reads the post-state for the oracle. The MF_ATOMIC flag rides along + * the MemAccess so the backend can route to ldar/stlr-class encodings. + * ============================================================ */ + +/* Helper: build the standard prelude — a single addr-taken i32 local x + * pre-initialized to `init`, plus its address in a register. */ +typedef struct KCtx { CgTestFn* tf; FrameSlot x; Reg p_addr; } KCtx; + +static KCtx k_open_i32(CgTestCtx* ctx, i64 init) +{ + const Type* I32 = T_i32(ctx); + const Type* PI32 = T_ptr(ctx, I32); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + FrameSlot x = cgtest_local(tf, I32, FSF_ADDR_TAKEN); + cgtest_store_local(tf, x, IMM_op(init, I32), I32); + Reg p = T->alloc_reg(T, RC_INT, PI32); + T->addr_of(T, REG_op(p, PI32), LOCAL_op(x, I32)); + return (KCtx){ tf, x, p }; +} + +/* MemAccess for a 4-byte i32 atomic at &x. */ +static MemAccess k_ma32(CgTestCtx* ctx) +{ + MemAccess ma = { 0 }; + ma.type = T_i32(ctx); + ma.size = 4; + ma.align = 4; + ma.flags = MF_ATOMIC; + ma.alias.kind = ALIAS_LOCAL; + return ma; +} + +/* Reload x and return; helper for the post-state oracle. */ +static void k_close_load_x(KCtx* k) +{ + CgTestCtx* ctx = k->tf->ctx; + const Type* I32 = T_i32(ctx); + Reg r = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + cgtest_load_local(k->tf, REG_op(r, I32), k->x, I32); + cgtest_ret_reg(k->tf, r, I32); + cgtest_end(k->tf); +} + +/* k01_atomic_load_relaxed — return atomic_load(&x=42, RELAXED). */ +void build_k01_atomic_load_relaxed(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 42); + const Type* I32 = T_i32(ctx); + Reg r = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + ctx->target->atomic_load(ctx->target, REG_op(r, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + k_ma32(ctx), MO_RELAXED); + cgtest_ret_reg(k.tf, r, I32); + cgtest_end(k.tf); +} + +/* k02_atomic_store_load_acq — atomic_store(&x, 42, RELEASE) then + * atomic_load(&x, ACQUIRE). */ +void build_k02_atomic_store_load_acq(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 0); + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + T->atomic_store(T, REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(42, I32), k_ma32(ctx), MO_RELEASE); + Reg r = T->alloc_reg(T, RC_INT, I32); + T->atomic_load(T, REG_op(r, I32), REG_op(k.p_addr, T_ptr(ctx, I32)), + k_ma32(ctx), MO_ACQUIRE); + cgtest_ret_reg(k.tf, r, I32); + cgtest_end(k.tf); +} + +/* k03_atomic_load_seq_cst — full-barrier load. */ +void build_k03_atomic_load_seq_cst(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 42); + const Type* I32 = T_i32(ctx); + Reg r = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + ctx->target->atomic_load(ctx->target, REG_op(r, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + k_ma32(ctx), MO_SEQ_CST); + cgtest_ret_reg(k.tf, r, I32); + cgtest_end(k.tf); +} + +/* Shared body for the rmw post-state cases (k04..k10). */ +static void k_rmw_post(CgTestCtx* ctx, AtomicOp op, i64 init, i64 val) +{ + KCtx k = k_open_i32(ctx, init); + const Type* I32 = T_i32(ctx); + Reg prior = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + ctx->target->atomic_rmw(ctx->target, op, REG_op(prior, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(val, I32), k_ma32(ctx), MO_SEQ_CST); + k_close_load_x(&k); +} + +void build_k04_atomic_rmw_add (CgTestCtx* c) { k_rmw_post(c, AO_ADD, 40, 2); } +void build_k05_atomic_rmw_xchg(CgTestCtx* c) { k_rmw_post(c, AO_XCHG, 99, 42); } +void build_k06_atomic_rmw_and (CgTestCtx* c) { k_rmw_post(c, AO_AND, 0xFF, 0x2A); } +void build_k07_atomic_rmw_or (CgTestCtx* c) { k_rmw_post(c, AO_OR, 0x20, 0x0A); } +void build_k08_atomic_rmw_xor (CgTestCtx* c) { k_rmw_post(c, AO_XOR, 0xFF, 0xD5); } +void build_k09_atomic_rmw_sub (CgTestCtx* c) { k_rmw_post(c, AO_SUB, 44, 2); } + +/* k10_atomic_rmw_nand — post-state low 8: ~(0xFF & 0xD5) & 0xFF = 0x2A = 42. */ +void build_k10_atomic_rmw_nand(CgTestCtx* c) { k_rmw_post(c, AO_NAND, 0xFF, 0xD5); } + +/* k11_atomic_cas_success — x=10; cas(&x, exp=10, des=42) → ok=1; load → 42. */ +void build_k11_atomic_cas_success(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 10); + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + Reg prior = T->alloc_reg(T, RC_INT, I32); + Reg ok = T->alloc_reg(T, RC_INT, I32); + T->atomic_cas(T, REG_op(prior, I32), REG_op(ok, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(10, I32), IMM_op(42, I32), + k_ma32(ctx), MO_SEQ_CST, MO_RELAXED); + k_close_load_x(&k); +} + +/* k12_atomic_cas_failure — x=10; cas(&x, exp=99, des=42) → ok=0; x unchanged. */ +void build_k12_atomic_cas_failure(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 10); + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + Reg prior = T->alloc_reg(T, RC_INT, I32); + Reg ok = T->alloc_reg(T, RC_INT, I32); + T->atomic_cas(T, REG_op(prior, I32), REG_op(ok, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(99, I32), IMM_op(42, I32), + k_ma32(ctx), MO_SEQ_CST, MO_RELAXED); + k_close_load_x(&k); +} + +/* k13_atomic_load_i64 — i64 atomic load of 0x1_0000_002A; return low 32 = 42. */ +void build_k13_atomic_load_i64(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* PI64 = T_ptr(ctx, I64); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + FrameSlot x = cgtest_local(tf, I64, FSF_ADDR_TAKEN); + /* Materialize via load_imm into a 64-bit reg, then store. */ + Reg init = T->alloc_reg(T, RC_INT, I64); + T->load_imm(T, REG_op(init, I64), 0x10000002Aull); + cgtest_store_local(tf, x, REG_op(init, I64), I64); + + Reg p = T->alloc_reg(T, RC_INT, PI64); + T->addr_of(T, REG_op(p, PI64), LOCAL_op(x, I64)); + + MemAccess ma = { .type = I64, .size = 8, .align = 8, + .flags = MF_ATOMIC, .alias.kind = ALIAS_LOCAL }; + Reg r64 = T->alloc_reg(T, RC_INT, I64); + T->atomic_load(T, REG_op(r64, I64), REG_op(p, PI64), ma, MO_SEQ_CST); + + Reg r32 = T->alloc_reg(T, RC_INT, I32); + T->convert(T, CV_TRUNC, REG_op(r32, I32), REG_op(r64, I64)); + cgtest_ret_reg(tf, r32, I32); + cgtest_end(tf); +} + +/* k14_atomic_rmw_prior — return the prior value rmw produced (40), not the + * post-state. */ +void build_k14_atomic_rmw_prior(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 40); + const Type* I32 = T_i32(ctx); + Reg prior = ctx->target->alloc_reg(ctx->target, RC_INT, I32); + ctx->target->atomic_rmw(ctx->target, AO_ADD, REG_op(prior, I32), + REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(2, I32), k_ma32(ctx), MO_SEQ_CST); + cgtest_ret_reg(k.tf, prior, I32); + cgtest_end(k.tf); +} + +/* k15_fence_seq_cst — fence between two plain atomic stores; load checks. */ +void build_k15_fence_seq_cst(CgTestCtx* ctx) +{ + KCtx k = k_open_i32(ctx, 0); + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + T->atomic_store(T, REG_op(k.p_addr, T_ptr(ctx, I32)), + IMM_op(42, I32), k_ma32(ctx), MO_RELAXED); + T->fence(T, MO_SEQ_CST); + Reg r = T->alloc_reg(T, RC_INT, I32); + T->atomic_load(T, REG_op(r, I32), REG_op(k.p_addr, T_ptr(ctx, I32)), + k_ma32(ctx), MO_RELAXED); + cgtest_ret_reg(k.tf, r, I32); + cgtest_end(k.tf); +} diff --git a/test/cg/harness/cases_l.c b/test/cg/harness/cases_l.c @@ -0,0 +1,416 @@ +/* Group L — intrinsics. + * See CORPUS.md for the case list and expected values. */ + +#include "cg_test.h" + +/* ============================================================ + * Group L: compiler intrinsics + * + * Drives CGTarget.intrinsic across every IntrinKind. Operand shapes + * follow arch.h's documentation: + * POPCOUNT/CTZ/CLZ/BSWAP* : dsts[0] REG, args[0] REG + * MEMCPY/MEMMOVE : args = (dst_addr, src_addr, n_bytes) + * MEMSET : args = (dst_addr, byte_value, n_bytes) + * PREFETCH : args = (addr) + * ASSUME_ALIGNED : dsts[0] REG, args = (ptr, align) + * EXPECT : dsts[0] REG, args = (val, expected) + * UNREACHABLE / TRAP : no dsts, no args + * *_OVERFLOW : dsts[0] result, dsts[1] i1 ovf; args = (a, b) + * ============================================================ */ + +/* helper: emit a single-result bit-op intrinsic on `in` (returns dst reg). */ +static Reg l_bitop(CgTestCtx* ctx, IntrinKind kind, + const Type* arg_ty, i64 imm) +{ + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + Reg src = T->alloc_reg(T, RC_INT, arg_ty); + T->load_imm(T, REG_op(src, arg_ty), imm); + Reg dst = T->alloc_reg(T, RC_INT, I32); + Operand dsts[1] = { REG_op(dst, I32) }; + Operand args[1] = { REG_op(src, arg_ty) }; + T->intrinsic(T, kind, dsts, 1, args, 1); + return dst; +} + +/* l01_popcount_u32 — popcount(0xFF) → 8. */ +void build_l01_popcount_u32(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U32 = T_u32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_POPCOUNT, U32, 0xFF); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l02_popcount_u64 — popcount((u64)-1) → 64. */ +void build_l02_popcount_u64(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U64 = T_u64(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_POPCOUNT, U64, -1); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l03_ctz_u32 — ctz(0x80) → 7. */ +void build_l03_ctz_u32(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U32 = T_u32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_CTZ, U32, 0x80); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l04_clz_u32 — clz(0xFF) over 32 bits → 24. */ +void build_l04_clz_u32(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U32 = T_u32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_CLZ, U32, 0xFF); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l05_bswap16 — bswap16(0x1234) → 0x3412 (low 8 = 0x12 = 18). */ +void build_l05_bswap16(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U16 = T_u16(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_BSWAP16, U16, 0x1234); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l06_bswap32 — bswap32(0x11223344) → 0x44332211 (low 8 = 0x11 = 17). */ +void build_l06_bswap32(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U32 = T_u32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_bitop(ctx, INTRIN_BSWAP32, U32, 0x11223344); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l07_bswap64 — bswap64(0x1122334455667788) → 0x8877665544332211; low 8 = 17. */ +void build_l07_bswap64(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* U64 = T_u64(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg src = T->alloc_reg(T, RC_INT, U64); + T->load_imm(T, REG_op(src, U64), 0x1122334455667788ll); + Reg dst64 = T->alloc_reg(T, RC_INT, U64); + Operand dsts[1] = { REG_op(dst64, U64) }; + Operand args[1] = { REG_op(src, U64) }; + T->intrinsic(T, INTRIN_BSWAP64, dsts, 1, args, 1); + + Reg r32 = T->alloc_reg(T, RC_INT, I32); + T->convert(T, CV_TRUNC, REG_op(r32, I32), REG_op(dst64, U64)); + cgtest_ret_reg(tf, r32, I32); + cgtest_end(tf); +} + +/* l08_memcpy_4 — int src=42; memcpy(&dst,&src,4); return dst. */ +void build_l08_memcpy_4(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* PI32 = T_ptr(ctx, I32); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + FrameSlot src = cgtest_local(tf, I32, FSF_ADDR_TAKEN); + FrameSlot dst = cgtest_local(tf, I32, FSF_ADDR_TAKEN); + cgtest_store_local(tf, src, IMM_op(42, I32), I32); + + Reg ps = T->alloc_reg(T, RC_INT, PI32); + Reg pd = T->alloc_reg(T, RC_INT, PI32); + T->addr_of(T, REG_op(ps, PI32), LOCAL_op(src, I32)); + T->addr_of(T, REG_op(pd, PI32), LOCAL_op(dst, I32)); + + Operand args[3] = { + REG_op(pd, PI32), + REG_op(ps, PI32), + IMM_op(4, I64), + }; + T->intrinsic(T, INTRIN_MEMCPY, NULL, 0, args, 3); + + Reg r = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(r, I32), dst, I32); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l09_memmove_overlap — int a[5]={1..5}; memmove(a+1,a,16); return a[4]→4. */ +void build_l09_memmove_overlap(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* U8 = T_u8(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + /* alloca 5*4 = 20 bytes, aligned to 4. */ + Reg buf = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32)); + T->alloca_(T, REG_op(buf, T_ptr(ctx, I32)), IMM_op(20, I64), 4); + + MemAccess ma = { .type = I32, .size = 4, .align = 4, + .alias.kind = ALIAS_LOCAL }; + for (int i = 0; i < 5; ++i) { + T->store(T, IND_op(buf, (i32)(i*4), I32), IMM_op(i+1, I32), ma); + } + + /* dst = a + 4 (one i32 forward); use byte arithmetic for the addr. */ + Reg dst = T->alloc_reg(T, RC_INT, T_ptr(ctx, U8)); + T->binop(T, BO_IADD, REG_op(dst, T_ptr(ctx, U8)), + REG_op(buf, T_ptr(ctx, I32)), IMM_op(4, I64)); + + Operand args[3] = { + REG_op(dst, T_ptr(ctx, U8)), + REG_op(buf, T_ptr(ctx, I32)), + IMM_op(16, I64), + }; + T->intrinsic(T, INTRIN_MEMMOVE, NULL, 0, args, 3); + + /* return a[4] (byte offset 16 from buf — old a[3]=4 was copied here). */ + Reg r = T->alloc_reg(T, RC_INT, I32); + T->load(T, REG_op(r, I32), IND_op(buf, 16, I32), ma); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l10_memset_zero — int b[4]; memset(b,0,16); return b[2] → 0. */ +void build_l10_memset_zero(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* U8 = T_u8(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg buf = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32)); + T->alloca_(T, REG_op(buf, T_ptr(ctx, I32)), IMM_op(16, I64), 4); + + /* Pre-poison so the memset is observable. */ + MemAccess ma = { .type = I32, .size = 4, .align = 4, + .alias.kind = ALIAS_LOCAL }; + for (int i = 0; i < 4; ++i) + T->store(T, IND_op(buf, (i32)(i*4), I32), IMM_op(0xDEAD, I32), ma); + + Operand args[3] = { + REG_op(buf, T_ptr(ctx, I32)), + IMM_op(0, U8), + IMM_op(16, I64), + }; + T->intrinsic(T, INTRIN_MEMSET, NULL, 0, args, 3); + + Reg r = T->alloc_reg(T, RC_INT, I32); + T->load(T, REG_op(r, I32), IND_op(buf, 8, I32), ma); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l11_memset_ff — int b; memset(&b,0xFF,4); load → 0xFFFFFFFF; low 8 = 255. */ +void build_l11_memset_ff(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* U8 = T_u8(ctx); + const Type* PI32 = T_ptr(ctx, I32); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + FrameSlot b = cgtest_local(tf, I32, FSF_ADDR_TAKEN); + Reg p = T->alloc_reg(T, RC_INT, PI32); + T->addr_of(T, REG_op(p, PI32), LOCAL_op(b, I32)); + + Operand args[3] = { + REG_op(p, PI32), + IMM_op(0xFF, U8), + IMM_op(4, I64), + }; + T->intrinsic(T, INTRIN_MEMSET, NULL, 0, args, 3); + + Reg r = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(r, I32), b, I32); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l12_expect_taken — int x = expect(1==1, 1); if (x) return 42; else return 99. */ +void build_l12_expect_taken(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg cond = T->alloc_reg(T, RC_INT, I32); + T->load_imm(T, REG_op(cond, I32), 1); + + Reg out = T->alloc_reg(T, RC_INT, I32); + Operand dsts[1] = { REG_op(out, I32) }; + Operand args[2] = { REG_op(cond, I32), IMM_op(1, I32) }; + T->intrinsic(T, INTRIN_EXPECT, dsts, 1, args, 2); + + Label miss = T->label_new(T); + T->cmp_branch(T, CMP_EQ, REG_op(out, I32), IMM_op(0, I32), miss); + cgtest_ret_imm(tf, 42, I32); + T->label_place(T, miss); + cgtest_ret_imm(tf, 99, I32); + cgtest_end(tf); +} + +/* l13_unreachable_live — if(x) return 42; else __builtin_unreachable(). */ +void build_l13_unreachable_live(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg x = T->alloc_reg(T, RC_INT, I32); + T->load_imm(T, REG_op(x, I32), 1); + + Label dead = T->label_new(T); + T->cmp_branch(T, CMP_EQ, REG_op(x, I32), IMM_op(0, I32), dead); + cgtest_ret_imm(tf, 42, I32); + + T->label_place(T, dead); + T->intrinsic(T, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); + cgtest_end(tf); +} + +/* l14_trap_live — if(x) return 42; else __builtin_trap(). */ +void build_l14_trap_live(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg x = T->alloc_reg(T, RC_INT, I32); + T->load_imm(T, REG_op(x, I32), 1); + + Label trap_lbl = T->label_new(T); + T->cmp_branch(T, CMP_EQ, REG_op(x, I32), IMM_op(0, I32), trap_lbl); + cgtest_ret_imm(tf, 42, I32); + + T->label_place(T, trap_lbl); + T->intrinsic(T, INTRIN_TRAP, NULL, 0, NULL, 0); + cgtest_end(tf); +} + +/* l15_prefetch_noop — prefetch(&x); *p=42; return *p. */ +void build_l15_prefetch_noop(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* PI32 = T_ptr(ctx, I32); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + FrameSlot x = cgtest_local(tf, I32, FSF_ADDR_TAKEN); + Reg p = T->alloc_reg(T, RC_INT, PI32); + T->addr_of(T, REG_op(p, PI32), LOCAL_op(x, I32)); + + Operand pf_args[1] = { REG_op(p, PI32) }; + T->intrinsic(T, INTRIN_PREFETCH, NULL, 0, pf_args, 1); + + cgtest_store_local(tf, x, IMM_op(42, I32), I32); + Reg r = T->alloc_reg(T, RC_INT, I32); + cgtest_load_local(tf, REG_op(r, I32), x, I32); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l16_assume_aligned — p = assume_aligned(p, 8); *p = 42; return *p. */ +void build_l16_assume_aligned(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + const Type* I64 = T_i64(ctx); + const Type* PI32 = T_ptr(ctx, I32); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + CGTarget* T = ctx->target; + + Reg p = T->alloc_reg(T, RC_INT, PI32); + T->alloca_(T, REG_op(p, PI32), IMM_op(8, I64), 8); + + Reg p2 = T->alloc_reg(T, RC_INT, PI32); + Operand dsts[1] = { REG_op(p2, PI32) }; + Operand args[2] = { REG_op(p, PI32), IMM_op(8, I32) }; + T->intrinsic(T, INTRIN_ASSUME_ALIGNED, dsts, 1, args, 2); + + MemAccess ma = { .type = I32, .size = 4, .align = 8, + .alias.kind = ALIAS_LOCAL }; + T->store(T, IND_op(p2, 0, I32), IMM_op(42, I32), ma); + Reg r = T->alloc_reg(T, RC_INT, I32); + T->load(T, REG_op(r, I32), IND_op(p2, 0, I32), ma); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* Helper: emit a 2-operand checked-arith intrinsic; return either the result + * or the overflow bit per `which`. */ +static Reg l_chkarith(CgTestCtx* ctx, IntrinKind kind, i64 a, i64 b, + int which /*0=value,1=ovf*/) +{ + const Type* I32 = T_i32(ctx); + CGTarget* T = ctx->target; + Reg ra = T->alloc_reg(T, RC_INT, I32); + Reg rb = T->alloc_reg(T, RC_INT, I32); + T->load_imm(T, REG_op(ra, I32), a); + T->load_imm(T, REG_op(rb, I32), b); + Reg val = T->alloc_reg(T, RC_INT, I32); + Reg ovf = T->alloc_reg(T, RC_INT, I32); + Operand dsts[2] = { REG_op(val, I32), REG_op(ovf, I32) }; + Operand args[2] = { REG_op(ra, I32), REG_op(rb, I32) }; + T->intrinsic(T, kind, dsts, 2, args, 2); + return which ? ovf : val; +} + +/* l17_add_overflow_no — add_overflow(20,22) → val=42, ovf=0; return val. */ +void build_l17_add_overflow_no(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_chkarith(ctx, INTRIN_ADD_OVERFLOW, 20, 22, 0); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l18_add_overflow_yes — add_overflow(INT_MAX,1) → ovf=1; return ovf. */ +void build_l18_add_overflow_yes(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_chkarith(ctx, INTRIN_ADD_OVERFLOW, 0x7FFFFFFF, 1, 1); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l19_sub_overflow_yes — sub_overflow(INT_MIN,1) → ovf=1. */ +void build_l19_sub_overflow_yes(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_chkarith(ctx, INTRIN_SUB_OVERFLOW, (i64)(i32)0x80000000, 1, 1); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} + +/* l20_mul_overflow_no — mul_overflow(6,7) → val=42, ovf=0; return val. */ +void build_l20_mul_overflow_no(CgTestCtx* ctx) +{ + const Type* I32 = T_i32(ctx); + CgTestFn* tf = cgtest_begin_main(ctx, I32); + Reg r = l_chkarith(ctx, INTRIN_MUL_OVERFLOW, 6, 7, 0); + cgtest_ret_reg(tf, r, I32); + cgtest_end(tf); +} diff --git a/test/cg/run.sh b/test/cg/run.sh @@ -142,6 +142,9 @@ if $CC $CFREE_CFLAGS \ "$TEST_DIR/harness/cases_g.c" \ "$TEST_DIR/harness/cases_h.c" \ "$TEST_DIR/harness/cases_i.c" \ + "$TEST_DIR/harness/cases_j.c" \ + "$TEST_DIR/harness/cases_k.c" \ + "$TEST_DIR/harness/cases_l.c" \ "$LIB_AR" -o "$CG_RUNNER" 2>"$BUILD_DIR/cg-runner.err"; then printf ' %s cg-runner\n' "$(color_grn built)" else