kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

interp_smoke_test.c (21557B)


      1 /* Unit smoke test for the threaded-bytecode interpreter.
      2  *
      3  * Mirrors test/opt/cg_ir_lower_test.c: a self-contained heap/diag harness that
      4  * builds tiny CG IR by hand, runs it through opt_run_o1_interp + interp_lower,
      5  * executes it on an InterpStack, and asserts the returned value. This exercises
      6  * the loader + engine directly (the broad differential coverage against the JIT
      7  * lives in test/toy/run.sh's I-path). */
      8 
      9 #include <kit/core.h>
     10 #include <kit/interp.h>
     11 #include <stdarg.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 
     16 #include "cg/ir.h"
     17 #include "interp/interp.h"
     18 #include "lib/kit_unit.h"
     19 #include "opt/opt.h"
     20 
     21 #undef Operand
     22 #undef CGFuncDesc
     23 #undef CGParamDesc
     24 #undef CGCallDesc
     25 #undef CGLocalStorage
     26 
     27 /* Shared test context replaces the per-file heap/diag/counter globals;
     28  * EXPECT aliases CU_EXPECT so the call sites are unchanged. kit_unit_init
     29  * runs once in main (ctx.now is then set to -1 to match the original). */
     30 static KitUnit g_u;
     31 #define EXPECT(cond, ...) CU_EXPECT(&g_u, cond, __VA_ARGS__)
     32 
     33 typedef struct TestCtx {
     34   Compiler* c;
     35   KitCgTypeId i32;
     36   KitCgTypeId i64;
     37   KitCgTypeId f64;
     38 } TestCtx;
     39 
     40 static void tc_init(TestCtx* tc) {
     41   KitTargetSpec target;
     42   KitCgBuiltinTypes b;
     43   memset(tc, 0, sizeof *tc);
     44   target = kit_unit_target(KIT_ARCH_ARM_64, KIT_OS_MACOS, KIT_OBJ_MACHO);
     45   if (kit_unit_compiler_new(&g_u, target, (KitCompiler**)&tc->c) != KIT_OK ||
     46       !tc->c) {
     47     fprintf(stderr, "fatal: compiler allocation failed\n");
     48     abort();
     49   }
     50   b = kit_cg_builtin_types(tc->c);
     51   tc->i32 = b.id[KIT_CG_BUILTIN_I32];
     52   tc->i64 = b.id[KIT_CG_BUILTIN_I64];
     53   tc->f64 = b.id[KIT_CG_BUILTIN_F64];
     54 }
     55 
     56 static void tc_fini(TestCtx* tc) {
     57   kit_compiler_free(tc->c);
     58   tc->c = NULL;
     59 }
     60 
     61 static Operand local_op(CGLocal local, KitCgTypeId type) {
     62   Operand o;
     63   memset(&o, 0, sizeof o);
     64   o.kind = OPK_LOCAL;
     65   o.type = type;
     66   o.v.local = local;
     67   return o;
     68 }
     69 static Operand imm_op(i64 value, KitCgTypeId type) {
     70   Operand o;
     71   memset(&o, 0, sizeof o);
     72   o.kind = OPK_IMM;
     73   o.type = type;
     74   o.v.imm = value;
     75   return o;
     76 }
     77 static CGLocal add_local(CgIrFunc* f, KitCgTypeId type) {
     78   CGLocalDesc d;
     79   memset(&d, 0, sizeof d);
     80   d.type = type;
     81   d.size = 8;
     82   d.align = 8;
     83   return cg_ir_func_add_local(f, &d, 0, 0);
     84 }
     85 static CgIrInst* emit_ops(CgIrFunc* f, CgIrOp op, const Operand* ops, u32 n) {
     86   CgIrInst* in = cg_ir_emit(f, op, (SrcLoc){0, 0, 0});
     87   in->opnds = cg_ir_dup_operands(f->arena, ops, n);
     88   in->nopnds = n;
     89   return in;
     90 }
     91 
     92 /* Run a hand-built leaf CgIrFunc through the interp and return its scalar. */
     93 static KitInterpStatus run_leaf(TestCtx* tc, CgIrFunc* cg, int64_t* out) {
     94   KitInterpProgram* prog = kit_interp_program_new(tc->c);
     95   Func* f = opt_run_o1_interp(tc->c, cg);
     96   InterpFunc* fn =
     97       interp_lower((InterpProgram*)prog, f, OBJ_SYM_NONE, SLICE_NULL, NULL);
     98   KitInterpStatus s = kit_interp_call(prog, (KitInterpFunc*)fn, 0, NULL, out);
     99   kit_interp_program_free(prog);
    100   return s;
    101 }
    102 
    103 static CgIrFunc* new_func(TestCtx* tc, KitCgTypeId ret_type) {
    104   CGFuncDesc fd;
    105   memset(&fd, 0, sizeof fd);
    106   fd.fn_type = ret_type;
    107   fd.result_type = ret_type;
    108   return cg_ir_func_new(tc->c, &fd);
    109 }
    110 
    111 static void ret_local(CgIrFunc* cg, CGLocal v) {
    112   CgIrRetAux* aux = arena_znew(cg->arena, CgIrRetAux);
    113   CgIrInst* ret;
    114   aux->value = v;
    115   aux->present = 1;
    116   ret = cg_ir_emit(cg, CG_IR_RET, (SrcLoc){0, 0, 0});
    117   ret->extra.aux = aux;
    118 }
    119 
    120 /* fn() : i64 { a = 2; a = a + 3; return a; }  => 5 */
    121 static void interp_runs_arithmetic(void) {
    122   TestCtx tc;
    123   CgIrFunc* cg;
    124   CGLocal a;
    125   int64_t ret = -1;
    126   KitInterpStatus s;
    127   tc_init(&tc);
    128   cg = new_func(&tc, tc.i64);
    129   a = add_local(cg, tc.i64);
    130   {
    131     Operand o[] = {local_op(a, tc.i64)};
    132     CgIrInst* li = emit_ops(cg, CG_IR_LOAD_IMM, o, 1);
    133     li->extra.imm = 2;
    134   }
    135   {
    136     Operand o[] = {local_op(a, tc.i64), local_op(a, tc.i64), imm_op(3, tc.i64)};
    137     CgIrInst* bi = emit_ops(cg, CG_IR_BINOP, o, 3);
    138     bi->extra.imm = BO_IADD;
    139   }
    140   ret_local(cg, a);
    141   s = run_leaf(&tc, cg, &ret);
    142   EXPECT(s == KIT_INTERP_DONE, "arithmetic: status %d", (int)s);
    143   EXPECT(ret == 5, "arithmetic: expected 5, got %lld", (long long)ret);
    144   tc_fini(&tc);
    145 }
    146 
    147 /* fn() : i64 { a = 7; if (a == 7) a = 11; return a; }  => 11
    148  * Exercises CMP_BRANCH + a join block + fallthrough succ edges. */
    149 static void interp_runs_branch(void) {
    150   TestCtx tc;
    151   CgIrFunc* cg;
    152   CGLocal a;
    153   Label done;
    154   int64_t ret = -1;
    155   KitInterpStatus s;
    156   tc_init(&tc);
    157   cg = new_func(&tc, tc.i64);
    158   a = add_local(cg, tc.i64);
    159   done = cg_ir_func_add_label(cg);
    160   {
    161     Operand o[] = {local_op(a, tc.i64)};
    162     CgIrInst* li = emit_ops(cg, CG_IR_LOAD_IMM, o, 1);
    163     li->extra.imm = 7;
    164   }
    165   {
    166     /* branch to `done` when a != 7 (i.e. skip the assignment) */
    167     Operand o[] = {local_op(a, tc.i64), imm_op(7, tc.i64)};
    168     CgIrInst* br = emit_ops(cg, CG_IR_CMP_BRANCH, o, 2);
    169     CgIrCmpBranchAux* aux = arena_znew(cg->arena, CgIrCmpBranchAux);
    170     aux->op = CMP_NE;
    171     aux->target = done;
    172     br->extra.aux = aux;
    173   }
    174   {
    175     Operand o[] = {local_op(a, tc.i64)};
    176     CgIrInst* li = emit_ops(cg, CG_IR_LOAD_IMM, o, 1);
    177     li->extra.imm = 11;
    178   }
    179   {
    180     CgIrInst* label = cg_ir_emit(cg, CG_IR_LABEL, (SrcLoc){0, 0, 0});
    181     label->extra.imm = (i64)done;
    182     cg_ir_func_note_label_place(cg, done, (SrcLoc){0, 0, 0});
    183   }
    184   ret_local(cg, a);
    185   s = run_leaf(&tc, cg, &ret);
    186   EXPECT(s == KIT_INTERP_DONE, "branch: status %d", (int)s);
    187   EXPECT(ret == 11, "branch: expected 11, got %lld", (long long)ret);
    188   tc_fini(&tc);
    189 }
    190 
    191 /* ============================================================================
    192  * Spec conformance: the interpreter is the reference implementation of the IR.
    193  *
    194  * Each case builds a PARAMETERIZED CgIrFunc and runs it through
    195  * opt_run_o1_interp + the engine with RUNTIME argument values, so the optimizer
    196  * cannot constant-fold the operation away — the engine's own handler computes
    197  * the result. We then assert the exact value the spec mandates for that edge
    198  * (doc/IR.md "Well-definedness: edge-case semantics", portable mode). These
    199  * lock the engine to the spec; a divergence turns a case red.
    200  * ========================================================================== */
    201 
    202 static u32 ty_size(TestCtx* tc, KitCgTypeId t) {
    203   return (u32)kit_cg_type_size((KitCompiler*)tc->c, t);
    204 }
    205 static u32 ty_align(TestCtx* tc, KitCgTypeId t) {
    206   return (u32)kit_cg_type_align((KitCompiler*)tc->c, t);
    207 }
    208 
    209 /* New function with `np` scalar params; fills out_params[] with the param
    210  * locals (readable directly as source operands). The interpreter assigns each
    211  * param's storage home from the optimizer's local map (not from fn_type's ABI),
    212  * so the leaf func type used here mirrors new_func and needs no real func type.
    213  */
    214 static CgIrFunc* new_func_p(TestCtx* tc, KitCgTypeId ret,
    215                             const KitCgTypeId* ptypes, u32 np,
    216                             CGLocal* out_params) {
    217   CGFuncDesc fd;
    218   CGParamDesc* pds;
    219   CgIrFunc* f;
    220   u32 i;
    221   memset(&fd, 0, sizeof fd);
    222   pds = np ? arena_array(tc->c->tu, CGParamDesc, np) : NULL;
    223   for (i = 0; i < np; ++i) {
    224     memset(&pds[i], 0, sizeof pds[i]);
    225     pds[i].index = i;
    226     pds[i].type = ptypes[i];
    227     pds[i].size = ty_size(tc, ptypes[i]);
    228     pds[i].align = ty_align(tc, ptypes[i]);
    229   }
    230   fd.fn_type = ret;
    231   fd.result_type = ret;
    232   fd.params = pds;
    233   fd.nparams = np;
    234   f = cg_ir_func_new(tc->c, &fd);
    235   for (i = 0; i < np; ++i) {
    236     CGLocalDesc ld;
    237     CGLocal loc;
    238     memset(&ld, 0, sizeof ld);
    239     ld.type = ptypes[i];
    240     ld.size = ty_size(tc, ptypes[i]);
    241     ld.align = ty_align(tc, ptypes[i]);
    242     loc = cg_ir_func_add_local(f, &ld, 1, i);
    243     cg_ir_func_add_param(f, loc, &pds[i]);
    244     out_params[i] = loc;
    245   }
    246   return f;
    247 }
    248 
    249 static CGLocal add_local_ty(CgIrFunc* f, TestCtx* tc, KitCgTypeId t) {
    250   CGLocalDesc d;
    251   memset(&d, 0, sizeof d);
    252   d.type = t;
    253   d.size = ty_size(tc, t);
    254   d.align = ty_align(tc, t);
    255   return cg_ir_func_add_local(f, &d, 0, 0);
    256 }
    257 
    258 static KitInterpStatus run_args(TestCtx* tc, CgIrFunc* cg, const u64* args,
    259                                 u32 nargs, int64_t* out) {
    260   KitInterpProgram* prog = kit_interp_program_new(tc->c);
    261   Func* f = opt_run_o1_interp(tc->c, cg);
    262   InterpFunc* fn =
    263       interp_lower((InterpProgram*)prog, f, OBJ_SYM_NONE, SLICE_NULL, NULL);
    264   KitInterpStatus s =
    265       kit_interp_call_args(prog, (KitInterpFunc*)fn, args, nargs, out);
    266   kit_interp_program_free(prog);
    267   return s;
    268 }
    269 
    270 static void emit_binop(CgIrFunc* f, BinOp op, CGLocal d, KitCgTypeId ty,
    271                        Operand a, Operand b) {
    272   Operand o[3];
    273   CgIrInst* in;
    274   o[0] = local_op(d, ty);
    275   o[1] = a;
    276   o[2] = b;
    277   in = emit_ops(f, CG_IR_BINOP, o, 3);
    278   in->extra.imm = (i64)op;
    279 }
    280 static void emit_unop(CgIrFunc* f, UnOp op, CGLocal d, KitCgTypeId ty,
    281                       Operand a) {
    282   Operand o[2];
    283   CgIrInst* in;
    284   o[0] = local_op(d, ty);
    285   o[1] = a;
    286   in = emit_ops(f, CG_IR_UNOP, o, 2);
    287   in->extra.imm = (i64)op;
    288 }
    289 static void emit_cmp(CgIrFunc* f, CmpOp op, CGLocal d, KitCgTypeId dty,
    290                      Operand a, Operand b) {
    291   Operand o[3];
    292   CgIrInst* in;
    293   o[0] = local_op(d, dty);
    294   o[1] = a;
    295   o[2] = b;
    296   in = emit_ops(f, CG_IR_CMP, o, 3);
    297   in->extra.imm = (i64)op;
    298 }
    299 static void emit_convert(CgIrFunc* f, ConvKind k, CGLocal d, KitCgTypeId dty,
    300                          Operand src) {
    301   Operand o[2];
    302   CgIrInst* in;
    303   o[0] = local_op(d, dty);
    304   o[1] = src;
    305   in = emit_ops(f, CG_IR_CONVERT, o, 2);
    306   in->extra.imm = (i64)k;
    307 }
    308 static void emit_intrin1(CgIrFunc* f, IntrinKind k, CGLocal d, KitCgTypeId dty,
    309                          Operand arg) {
    310   CgIrInst* in = cg_ir_emit(f, CG_IR_INTRINSIC, (SrcLoc){0, 0, 0});
    311   CgIrIntrinsicAux* aux = arena_znew(f->arena, CgIrIntrinsicAux);
    312   Operand dsts[1];
    313   Operand args[1];
    314   dsts[0] = local_op(d, dty);
    315   args[0] = arg;
    316   aux->kind = k;
    317   aux->dsts = cg_ir_dup_operands(f->arena, dsts, 1);
    318   aux->args = cg_ir_dup_operands(f->arena, args, 1);
    319   aux->ndst = 1;
    320   aux->narg = 1;
    321   in->extra.aux = aux;
    322 }
    323 
    324 /* Run a unary i32->i32 op f(x)=OP(x); return the low 32 bits of the result. */
    325 static u32 run_un_i32(TestCtx* tc, BinOp bo, int use_unop, UnOp uo, u32 x) {
    326   CGLocal p[1];
    327   CGLocal r;
    328   CgIrFunc* f;
    329   u64 args[1];
    330   int64_t out = 0;
    331   KitCgTypeId i32 = tc->i32;
    332   f = new_func_p(tc, i32, &i32, 1, p);
    333   r = add_local_ty(f, tc, i32);
    334   if (use_unop)
    335     emit_unop(f, uo, r, i32, local_op(p[0], i32));
    336   else
    337     emit_binop(f, bo, r, i32, local_op(p[0], i32), local_op(p[0], i32));
    338   ret_local(f, r);
    339   args[0] = x;
    340   (void)run_args(tc, f, args, 1, &out);
    341   return (u32)(u64)out;
    342 }
    343 
    344 /* Run a binary i32 op f(x,y)=x OP y; report status + low-32 result. */
    345 static KitInterpStatus run_bin_i32(TestCtx* tc, BinOp bo, u32 x, u32 y,
    346                                    u32* res) {
    347   CGLocal p[2];
    348   CGLocal r;
    349   CgIrFunc* f;
    350   u64 args[2];
    351   int64_t out = 0;
    352   KitInterpStatus s;
    353   KitCgTypeId i32 = tc->i32;
    354   KitCgTypeId pt[2];
    355   pt[0] = i32;
    356   pt[1] = i32;
    357   f = new_func_p(tc, i32, pt, 2, p);
    358   r = add_local_ty(f, tc, i32);
    359   emit_binop(f, bo, r, i32, local_op(p[0], i32), local_op(p[1], i32));
    360   ret_local(f, r);
    361   args[0] = x;
    362   args[1] = y;
    363   s = run_args(tc, f, args, 2, &out);
    364   *res = (u32)(u64)out;
    365   return s;
    366 }
    367 
    368 /* integer wrapping + shift masking (spec: portable). */
    369 static void spec_int_wrap_shift(void) {
    370   TestCtx tc;
    371   u32 res = 0;
    372   tc_init(&tc);
    373   /* imul wraps mod 2^32: 0x10000 * 0x10000 = 2^32 -> 0 */
    374   EXPECT(
    375       run_bin_i32(&tc, BO_IMUL, 0x10000u, 0x10000u, &res) == KIT_INTERP_DONE &&
    376           res == 0u,
    377       "imul wrap: got 0x%08x", res);
    378   /* iadd wraps: 0xffffffff + 1 = 0 */
    379   EXPECT(run_bin_i32(&tc, BO_IADD, 0xffffffffu, 1u, &res) == KIT_INTERP_DONE &&
    380              res == 0u,
    381          "iadd wrap: got 0x%08x", res);
    382   /* shl count reduced mod 32: 1 << 33 == 1 << 1 == 2 */
    383   EXPECT(
    384       run_bin_i32(&tc, BO_SHL, 1u, 33u, &res) == KIT_INTERP_DONE && res == 2u,
    385       "shl mask: got 0x%08x", res);
    386   /* shr_u count mod 32: 0x80000000 >> 33 == >> 1 == 0x40000000 */
    387   EXPECT(
    388       run_bin_i32(&tc, BO_SHR_U, 0x80000000u, 33u, &res) == KIT_INTERP_DONE &&
    389           res == 0x40000000u,
    390       "shr_u mask: got 0x%08x", res);
    391   /* shr_s arithmetic (sign-replicating): -256 >> 4 == -16 */
    392   EXPECT(run_bin_i32(&tc, BO_SHR_S, (u32)(-256), 4u, &res) == KIT_INTERP_DONE &&
    393              res == (u32)(-16),
    394          "shr_s arith: got 0x%08x", res);
    395   /* neg INT_MIN wraps to INT_MIN (two's complement, no trap) */
    396   EXPECT(run_un_i32(&tc, BO_IADD, 1, UO_NEG, 0x80000000u) == 0x80000000u,
    397          "neg INT_MIN wrap");
    398   tc_fini(&tc);
    399 }
    400 
    401 /* division / remainder edges (spec: portable -> div-by-zero traps,
    402  * INT_MIN/-1 wraps). */
    403 static void spec_div_edges(void) {
    404   TestCtx tc;
    405   u32 res = 0;
    406   tc_init(&tc);
    407   /* sdiv by zero traps */
    408   EXPECT(run_bin_i32(&tc, BO_SDIV, 10u, 0u, &res) == KIT_INTERP_TRAP,
    409          "sdiv/0 should trap");
    410   /* udiv by zero traps */
    411   EXPECT(run_bin_i32(&tc, BO_UDIV, 10u, 0u, &res) == KIT_INTERP_TRAP,
    412          "udiv/0 should trap");
    413   /* srem by zero traps */
    414   EXPECT(run_bin_i32(&tc, BO_SREM, 10u, 0u, &res) == KIT_INTERP_TRAP,
    415          "srem/0 should trap");
    416   /* INT_MIN / -1 wraps to INT_MIN, no trap */
    417   EXPECT(run_bin_i32(&tc, BO_SDIV, 0x80000000u, 0xffffffffu, &res) ==
    418                  KIT_INTERP_DONE &&
    419              res == 0x80000000u,
    420          "INT_MIN/-1 wrap: got 0x%08x", res);
    421   /* INT_MIN %% -1 == 0, no trap */
    422   EXPECT(run_bin_i32(&tc, BO_SREM, 0x80000000u, 0xffffffffu, &res) ==
    423                  KIT_INTERP_DONE &&
    424              res == 0u,
    425          "INT_MIN%%-1: got 0x%08x", res);
    426   /* ordinary signed divide truncates toward zero: -7 / 2 == -3 */
    427   EXPECT(run_bin_i32(&tc, BO_SDIV, (u32)(-7), 2u, &res) == KIT_INTERP_DONE &&
    428              res == (u32)(-3),
    429          "sdiv trunc: got 0x%08x", res);
    430   tc_fini(&tc);
    431 }
    432 
    433 /* clz/ctz at zero are defined to equal the bit width (stronger than C). */
    434 static void spec_clz_ctz_zero(void) {
    435   TestCtx tc;
    436   CGLocal p[1];
    437   CGLocal r;
    438   CgIrFunc* f;
    439   u64 args[1];
    440   int64_t out;
    441   KitCgTypeId i32;
    442   tc_init(&tc);
    443   i32 = tc.i32;
    444   /* clz(0) == 32 */
    445   f = new_func_p(&tc, i32, &i32, 1, p);
    446   r = add_local_ty(f, &tc, i32);
    447   emit_intrin1(f, INTRIN_CLZ, r, i32, local_op(p[0], i32));
    448   ret_local(f, r);
    449   args[0] = 0;
    450   out = -1;
    451   EXPECT(run_args(&tc, f, args, 1, &out) == KIT_INTERP_DONE && (u32)out == 32u,
    452          "clz(0)==32: got %lld", (long long)out);
    453   /* ctz(0) == 32 */
    454   f = new_func_p(&tc, i32, &i32, 1, p);
    455   r = add_local_ty(f, &tc, i32);
    456   emit_intrin1(f, INTRIN_CTZ, r, i32, local_op(p[0], i32));
    457   ret_local(f, r);
    458   args[0] = 0;
    459   out = -1;
    460   EXPECT(run_args(&tc, f, args, 1, &out) == KIT_INTERP_DONE && (u32)out == 32u,
    461          "ctz(0)==32: got %lld", (long long)out);
    462   tc_fini(&tc);
    463 }
    464 
    465 static u64 dbits(double d) {
    466   u64 u;
    467   memcpy(&u, &d, 8);
    468   return u;
    469 }
    470 static double bitsd(u64 u) {
    471   double d;
    472   memcpy(&d, &u, 8);
    473   return d;
    474 }
    475 
    476 /* float->int conversion saturates; NaN -> 0 (spec: portable ftoi). */
    477 static u32 run_ftoi(TestCtx* tc, ConvKind k, double in, KitInterpStatus* sp) {
    478   CGLocal p[1];
    479   CGLocal r;
    480   CgIrFunc* f;
    481   u64 args[1];
    482   int64_t out = 0;
    483   KitCgTypeId f64 = tc->f64;
    484   KitCgTypeId i32 = tc->i32;
    485   f = new_func_p(tc, i32, &f64, 1, p);
    486   r = add_local_ty(f, tc, i32);
    487   emit_convert(f, k, r, i32, local_op(p[0], f64));
    488   ret_local(f, r);
    489   args[0] = dbits(in);
    490   *sp = run_args(tc, f, args, 1, &out);
    491   return (u32)(u64)out;
    492 }
    493 
    494 static void spec_ftoi_sat(void) {
    495   TestCtx tc;
    496   KitInterpStatus s;
    497   double nan = bitsd(0x7ff8000000000000ull);
    498   tc_init(&tc);
    499   EXPECT(
    500       run_ftoi(&tc, CV_FTOI_S, 1e30, &s) == 0x7fffffffu && s == KIT_INTERP_DONE,
    501       "ftoi_s overflow -> INT_MAX");
    502   EXPECT(run_ftoi(&tc, CV_FTOI_S, -1e30, &s) == 0x80000000u &&
    503              s == KIT_INTERP_DONE,
    504          "ftoi_s underflow -> INT_MIN");
    505   EXPECT(run_ftoi(&tc, CV_FTOI_S, nan, &s) == 0u && s == KIT_INTERP_DONE,
    506          "ftoi_s NaN -> 0");
    507   EXPECT(
    508       run_ftoi(&tc, CV_FTOI_S, -7.9, &s) == (u32)(-7) && s == KIT_INTERP_DONE,
    509       "ftoi_s trunc toward zero");
    510   EXPECT(run_ftoi(&tc, CV_FTOI_U, -1.0, &s) == 0u && s == KIT_INTERP_DONE,
    511          "ftoi_u negative -> 0");
    512   EXPECT(
    513       run_ftoi(&tc, CV_FTOI_U, 1e30, &s) == 0xffffffffu && s == KIT_INTERP_DONE,
    514       "ftoi_u overflow -> UINT_MAX");
    515   tc_fini(&tc);
    516 }
    517 
    518 /* FP compares: relationals + eq are ordered (NaN -> false); ne is unordered
    519  * (NaN -> true). */
    520 static int run_fcmp(TestCtx* tc, CmpOp op, double a, double b) {
    521   CGLocal p[2];
    522   CGLocal r;
    523   CgIrFunc* f;
    524   u64 args[2];
    525   int64_t out = 0;
    526   KitCgTypeId f64 = tc->f64;
    527   KitCgTypeId i32 = tc->i32;
    528   KitCgTypeId pt[2];
    529   pt[0] = f64;
    530   pt[1] = f64;
    531   f = new_func_p(tc, i32, pt, 2, p);
    532   r = add_local_ty(f, tc, i32);
    533   emit_cmp(f, op, r, i32, local_op(p[0], f64), local_op(p[1], f64));
    534   ret_local(f, r);
    535   args[0] = dbits(a);
    536   args[1] = dbits(b);
    537   (void)run_args(tc, f, args, 2, &out);
    538   return (int)(u32)out;
    539 }
    540 
    541 static void spec_fp_cmp_nan(void) {
    542   TestCtx tc;
    543   double nan = bitsd(0x7ff8000000000000ull);
    544   tc_init(&tc);
    545   /* Ordered relationals + OEQ are false on NaN; the unordered duals are true.
    546    * Each predicate is checked against NaN-lhs, NaN-rhs, both-NaN, ordered, and
    547    * a -0.0/0.0 boundary so the backend's ordered/unordered split is exercised
    548    * end to end. */
    549   EXPECT(run_fcmp(&tc, CMP_OLT_F, nan, 1.0) == 0, "olt NaN-lhs -> false");
    550   EXPECT(run_fcmp(&tc, CMP_OGE_F, 1.0, nan) == 0, "oge NaN-rhs -> false");
    551   EXPECT(run_fcmp(&tc, CMP_OEQ_F, nan, nan) == 0, "oeq both-NaN -> false");
    552   EXPECT(run_fcmp(&tc, CMP_UNE_F, nan, nan) == 1, "une both-NaN -> true");
    553   EXPECT(run_fcmp(&tc, CMP_OEQ_F, -0.0, 0.0) == 1, "oeq -0.0 == 0.0 -> true");
    554   EXPECT(run_fcmp(&tc, CMP_OLT_F, 1.0, 2.0) == 1, "olt ordinary -> true");
    555 
    556   /* Ordered predicates: false on any NaN, normal otherwise. */
    557   EXPECT(run_fcmp(&tc, CMP_OEQ_F, 1.0, 1.0) == 1, "oeq 1==1 -> true");
    558   EXPECT(run_fcmp(&tc, CMP_ONE_F, 1.0, 2.0) == 1, "one 1!=2 -> true");
    559   EXPECT(run_fcmp(&tc, CMP_ONE_F, 1.0, 1.0) == 0, "one 1!=1 -> false");
    560   EXPECT(run_fcmp(&tc, CMP_ONE_F, nan, 1.0) == 0, "one NaN -> false");
    561   EXPECT(run_fcmp(&tc, CMP_OLE_F, 1.0, 1.0) == 1, "ole 1<=1 -> true");
    562   EXPECT(run_fcmp(&tc, CMP_OLE_F, 2.0, 1.0) == 0, "ole 2<=1 -> false");
    563   EXPECT(run_fcmp(&tc, CMP_OLE_F, nan, 1.0) == 0, "ole NaN -> false");
    564   EXPECT(run_fcmp(&tc, CMP_OGT_F, 2.0, 1.0) == 1, "ogt 2>1 -> true");
    565   EXPECT(run_fcmp(&tc, CMP_OGT_F, 1.0, nan) == 0, "ogt NaN-rhs -> false");
    566   EXPECT(run_fcmp(&tc, CMP_OGE_F, 1.0, 1.0) == 1, "oge 1>=1 -> true");
    567 
    568   /* Unordered predicates: true on any NaN, ordered result otherwise. */
    569   EXPECT(run_fcmp(&tc, CMP_UEQ_F, nan, 1.0) == 1, "ueq NaN -> true");
    570   EXPECT(run_fcmp(&tc, CMP_UEQ_F, 1.0, 2.0) == 0, "ueq 1==2 ordered -> false");
    571   EXPECT(run_fcmp(&tc, CMP_UEQ_F, 1.0, 1.0) == 1, "ueq 1==1 ordered -> true");
    572   EXPECT(run_fcmp(&tc, CMP_UNE_F, 1.0, 1.0) == 0, "une 1!=1 ordered -> false");
    573   EXPECT(run_fcmp(&tc, CMP_ULT_F, nan, 1.0) == 1, "ult NaN-lhs -> true");
    574   EXPECT(run_fcmp(&tc, CMP_ULT_F, 1.0, 2.0) == 1, "ult 1<2 -> true");
    575   EXPECT(run_fcmp(&tc, CMP_ULT_F, 2.0, 1.0) == 0, "ult 2<1 -> false");
    576   EXPECT(run_fcmp(&tc, CMP_ULE_F, 1.0, nan) == 1, "ule NaN-rhs -> true");
    577   EXPECT(run_fcmp(&tc, CMP_ULE_F, 1.0, 1.0) == 1, "ule 1<=1 -> true");
    578   EXPECT(run_fcmp(&tc, CMP_ULE_F, 2.0, 1.0) == 0, "ule 2<=1 -> false");
    579   EXPECT(run_fcmp(&tc, CMP_UGT_F, nan, nan) == 1, "ugt both-NaN -> true");
    580   EXPECT(run_fcmp(&tc, CMP_UGT_F, 2.0, 1.0) == 1, "ugt 2>1 -> true");
    581   EXPECT(run_fcmp(&tc, CMP_UGT_F, 1.0, 2.0) == 0, "ugt 1>2 -> false");
    582   EXPECT(run_fcmp(&tc, CMP_UGE_F, nan, 1.0) == 1, "uge NaN-lhs -> true");
    583   EXPECT(run_fcmp(&tc, CMP_UGE_F, 1.0, 1.0) == 1, "uge 1>=1 -> true");
    584   EXPECT(run_fcmp(&tc, CMP_UGE_F, 1.0, 2.0) == 0, "uge 1>=2 -> false");
    585   tc_fini(&tc);
    586 }
    587 
    588 /* fneg flips the sign bit (not 0 - x); fdiv follows IEEE. */
    589 static void spec_fneg_fdiv(void) {
    590   TestCtx tc;
    591   CGLocal p[2];
    592   CGLocal r;
    593   CgIrFunc* f;
    594   u64 args[2];
    595   int64_t out;
    596   KitCgTypeId f64;
    597   KitCgTypeId pt[2];
    598   tc_init(&tc);
    599   f64 = tc.f64;
    600   /* fneg(+0.0) -> -0.0 (sign bit set), proving it is not 0 - x */
    601   f = new_func_p(&tc, f64, &f64, 1, p);
    602   r = add_local_ty(f, &tc, f64);
    603   emit_unop(f, UO_FNEG, r, f64, local_op(p[0], f64));
    604   ret_local(f, r);
    605   args[0] = dbits(0.0);
    606   out = 0;
    607   EXPECT(run_args(&tc, f, args, 1, &out) == KIT_INTERP_DONE &&
    608              (u64)out == 0x8000000000000000ull,
    609          "fneg(+0.0) -> -0.0: got 0x%016llx", (unsigned long long)(u64)out);
    610   /* fdiv 1.0/0.0 -> +inf */
    611   pt[0] = f64;
    612   pt[1] = f64;
    613   f = new_func_p(&tc, f64, pt, 2, p);
    614   r = add_local_ty(f, &tc, f64);
    615   emit_binop(f, BO_FDIV, r, f64, local_op(p[0], f64), local_op(p[1], f64));
    616   ret_local(f, r);
    617   args[0] = dbits(1.0);
    618   args[1] = dbits(0.0);
    619   out = 0;
    620   EXPECT(run_args(&tc, f, args, 2, &out) == KIT_INTERP_DONE &&
    621              (u64)out == 0x7ff0000000000000ull,
    622          "fdiv 1/0 -> +inf: got 0x%016llx", (unsigned long long)(u64)out);
    623   /* fdiv 0.0/0.0 -> NaN */
    624   args[0] = dbits(0.0);
    625   args[1] = dbits(0.0);
    626   out = 0;
    627   (void)run_args(&tc, f, args, 2, &out);
    628   EXPECT(bitsd((u64)out) != bitsd((u64)out), "fdiv 0/0 -> NaN");
    629   tc_fini(&tc);
    630 }
    631 
    632 int main(void) {
    633   kit_unit_init(&g_u);
    634   g_u.ctx.now = -1;
    635   interp_runs_arithmetic();
    636   interp_runs_branch();
    637   spec_int_wrap_shift();
    638   spec_div_edges();
    639   spec_clz_ctz_zero();
    640   spec_ftoi_sat();
    641   spec_fp_cmp_nan();
    642   spec_fneg_fdiv();
    643   if (g_u.fails) {
    644     fprintf(stderr, "interp-smoke: %d/%d failed\n", g_u.fails, g_u.checks);
    645     return 1;
    646   }
    647   printf("interp-smoke: %d checks, 0 failures\n", g_u.checks);
    648   return 0;
    649 }