kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 28f393cc0706394c3300923b6e1bb3bf7192d131
parent 64e882134660367b2546da279ad4d2adbab6158f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 18 May 2026 16:46:57 -0700

Fix typed overflow builtins

Diffstat:
Mlang/c/parse/cg_public_compat.h | 9++++++---
Msrc/api/cg.c | 9++++++---
Msrc/arch/aa64/ops.c | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Msrc/arch/arch.h | 9++++++---
Msrc/arch/rv64/emit.c | 63+++++++++++++++++++++++++++++++++------------------------------
Msrc/arch/rv64/ops.c | 103++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Msrc/arch/x64/ops.c | 50++++++++++++++++++++++++++++++++++++++------------
Mtest/parse/CORPUS.md | 3++-
Atest/parse/cases/6_5_58_large_integer_immediates.c | 17+++++++++++++++++
Atest/parse/cases/6_5_58_large_integer_immediates.expected | 1+
Mtest/parse/cases/builtin_26_sadd_overflow.c | 72+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
11 files changed, 318 insertions(+), 88 deletions(-)

diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h @@ -94,9 +94,12 @@ typedef enum IntrinKind { INTRIN_TRAP, INTRIN_SETJMP, INTRIN_LONGJMP, - INTRIN_ADD_OVERFLOW, - INTRIN_SUB_OVERFLOW, - INTRIN_MUL_OVERFLOW, + INTRIN_SADD_OVERFLOW, + INTRIN_UADD_OVERFLOW, + INTRIN_SSUB_OVERFLOW, + INTRIN_USUB_OVERFLOW, + INTRIN_SMUL_OVERFLOW, + INTRIN_UMUL_OVERFLOW, } IntrinKind; typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; diff --git a/src/api/cg.c b/src/api/cg.c @@ -4071,14 +4071,17 @@ static IntrinKind api_map_intrinsic(CfreeCg *g, CfreeCgIntrinsic intrin, case CFREE_CG_INTRIN_LONGJMP: return INTRIN_LONGJMP; case CFREE_CG_INTRIN_SADD_OVERFLOW: + return INTRIN_SADD_OVERFLOW; case CFREE_CG_INTRIN_UADD_OVERFLOW: - return INTRIN_ADD_OVERFLOW; + return INTRIN_UADD_OVERFLOW; case CFREE_CG_INTRIN_SSUB_OVERFLOW: + return INTRIN_SSUB_OVERFLOW; case CFREE_CG_INTRIN_USUB_OVERFLOW: - return INTRIN_SUB_OVERFLOW; + return INTRIN_USUB_OVERFLOW; case CFREE_CG_INTRIN_SMUL_OVERFLOW: + return INTRIN_SMUL_OVERFLOW; case CFREE_CG_INTRIN_UMUL_OVERFLOW: - return INTRIN_MUL_OVERFLOW; + return INTRIN_UMUL_OVERFLOW; case CFREE_CG_INTRIN_PREFETCH: return INTRIN_PREFETCH; case CFREE_CG_INTRIN_EXPECT: diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -1918,6 +1918,21 @@ static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) { static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) { return aa64_smaddl(Rd, Rn, Rm, AA64_ZR); } +static inline u32 aa64_umaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) { + return aa64_dp3_pack((AA64DP3){ + .sf = 1, .op31 = 5, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd}); +} +static inline u32 aa64_umull(u32 Rd, u32 Rn, u32 Rm) { + return aa64_umaddl(Rd, Rn, Rm, AA64_ZR); +} +static inline u32 aa64_smulh(u32 Rd, u32 Rn, u32 Rm) { + return 0x9B407C00u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} +static inline u32 aa64_umulh(u32 Rd, u32 Rn, u32 Rm) { + return 0x9BC07C00u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) { return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); @@ -2112,8 +2127,10 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, case INTRIN_TRAP: aa64_emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u)); return; - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { + case INTRIN_SADD_OVERFLOW: + case INTRIN_UADD_OVERFLOW: + case INTRIN_SSUB_OVERFLOW: + case INTRIN_USUB_OVERFLOW: { Operand a_op = args[0], b_op = args[1]; Operand dval = dsts[0], dovf = dsts[1]; u32 sf = type_is_64(dval.type) ? 1u : 0u; @@ -2121,30 +2138,57 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, u32 rb = aa64_force_reg_int(t, b_op, sf, (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0); - u32 word = (kind == INTRIN_ADD_OVERFLOW) + u32 word = (kind == INTRIN_SADD_OVERFLOW || + kind == INTRIN_UADD_OVERFLOW) ? aa64_adds_reg(sf, reg_num(dval), ra, rb) : aa64_subs_reg(sf, reg_num(dval), ra, rb); + u32 cond = (kind == INTRIN_UADD_OVERFLOW) ? 0x2u /*CS*/ + : (kind == INTRIN_USUB_OVERFLOW) + ? 0x3u /*CC*/ + : 0x6u /*VS*/; aa64_emit32(mc, word); - aa64_emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/)); + aa64_emit32(mc, aa64_cset(0, reg_num(dovf), cond)); return; } - case INTRIN_MUL_OVERFLOW: { + case INTRIN_SMUL_OVERFLOW: { Operand a_op = args[0], b_op = args[1]; Operand dval = dsts[0], dovf = dsts[1]; u32 sf = type_is_64(dval.type) ? 1u : 0u; + u32 ra = aa64_force_reg_int(t, a_op, sf, AA_TMP0); + u32 rb = + aa64_force_reg_int(t, b_op, sf, + (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0); if (sf) { - compiler_panic( - t->c, a->loc, - "aarch64 intrinsic: mul_overflow on i64 not yet supported"); + aa64_emit32(mc, aa64_mul(1, reg_num(dval), ra, rb)); + aa64_emit32(mc, aa64_smulh(reg_num(dovf), ra, rb)); + aa64_emit32(mc, aa64_sbfm(1, AA_TMP2, reg_num(dval), 63, 63)); + aa64_emit32(mc, aa64_subs_reg(1, 31u, reg_num(dovf), AA_TMP2)); + } else { + aa64_emit32(mc, aa64_smull(AA_TMP2, ra, rb)); + aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, AA_TMP2, AA_TMP2)); + aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2)); } - u32 ra = aa64_force_reg_int(t, a_op, 0, AA_TMP0); + aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/)); + return; + } + case INTRIN_UMUL_OVERFLOW: { + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + u32 sf = type_is_64(dval.type) ? 1u : 0u; + u32 ra = aa64_force_reg_int(t, a_op, sf, AA_TMP0); u32 rb = - aa64_force_reg_int(t, b_op, 0, + aa64_force_reg_int(t, b_op, sf, (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0); - aa64_emit32(mc, aa64_smull(AA_TMP2, ra, rb)); - aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, AA_TMP2, AA_TMP2)); + if (sf) { + aa64_emit32(mc, aa64_mul(1, reg_num(dval), ra, rb)); + aa64_emit32(mc, aa64_umulh(reg_num(dovf), ra, rb)); + } else { + aa64_emit32(mc, aa64_umull(AA_TMP2, ra, rb)); + aa64_emit32(mc, aa64_ubfm(1, reg_num(dovf), AA_TMP2, 32, 63)); + aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2)); + } + aa64_emit32(mc, aa64_subs_imm(1, 31u, reg_num(dovf), 0)); aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/)); - aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2)); return; } default: diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -146,9 +146,12 @@ typedef enum IntrinKind { INTRIN_LONGJMP, /* checked arith — multi-result (value, overflow_flag) */ - INTRIN_ADD_OVERFLOW, - INTRIN_SUB_OVERFLOW, - INTRIN_MUL_OVERFLOW, + INTRIN_SADD_OVERFLOW, + INTRIN_UADD_OVERFLOW, + INTRIN_SSUB_OVERFLOW, + INTRIN_USUB_OVERFLOW, + INTRIN_SMUL_OVERFLOW, + INTRIN_UMUL_OVERFLOW, } IntrinKind; typedef enum OpKind { diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c @@ -65,13 +65,43 @@ void emit_li_32(MCEmitter *mc, u32 rd, i32 imm) { return; } /* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */ - i32 hi = (i32)((u32)(imm + 0x800) >> 12); - i32 lo = (i32)((i32)imm - (i32)(hi << 12)); + i64 hi64 = ((i64)imm + 0x800) >> 12; + i64 lo64 = (i64)imm - (hi64 << 12); + i32 hi = (i32)hi64; + i32 lo = (i32)lo64; rv64_emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu)); if (lo) rv64_emit32(mc, rv_addiw(rd, rd, lo)); } +static i32 sign_extend_12(u32 v) { + v &= 0xfffu; + return (v & 0x800u) ? (i32)v - 4096 : (i32)v; +} + +static int fits_signed32_bits(u64 v) { + return v <= 0x7fffffffull || v >= 0xffffffff80000000ull; +} + +static i32 i32_from_bits(u32 v) { + if (v <= 0x7fffffffu) return (i32)v; + if (v == 0x80000000u) return -2147483647 - 1; + return -(i32)(~v + 1u); +} + +static void emit_li_64(MCEmitter *mc, u32 rd, u64 imm) { + if (fits_signed32_bits(imm)) { + emit_li_32(mc, rd, i32_from_bits((u32)imm)); + return; + } + i32 lo = sign_extend_12((u32)imm); + u64 hi = (imm - (u64)(i64)lo) >> 12; + emit_li_64(mc, rd, hi); + rv64_emit32(mc, rv_slli(rd, rd, 12)); + if (lo) + rv64_emit32(mc, rv_addi(rd, rd, lo)); +} + void rv64_emit_load_imm(MCEmitter *mc, u32 sf, u32 rd, i64 imm) { if (!sf) { /* 32-bit destination: low 32 bits, sign-extended. */ @@ -82,34 +112,7 @@ void rv64_emit_load_imm(MCEmitter *mc, u32 sf, u32 rd, i64 imm) { emit_li_32(mc, rd, (i32)imm); return; } - /* General 64-bit load: split into high and low 32 bits, place high - * into rd << 32, then OR in low via a temp register (t0=x5). The cg - * corpus has no IMM operands that collide with t0, so this is safe. */ - i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */ - i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */ - if (hi64 < (i64)(i32)0x80000000 || hi64 > (i64)(i32)0x7fffffff) { - /* Out of i32 range — fallback: use a smaller chunked approach. - * For the cg corpus this isn't hit; emit a conservative sequence: - * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */ - i32 hi32 = (i32)(imm >> 32); - i32 lo32_i = (i32)imm; - emit_li_32(mc, rd, hi32); - rv64_emit32(mc, rv_slli(rd, rd, 32)); - emit_li_32(mc, RV_T0, lo32_i); - /* zero-extend t0 to clear sign-extension before OR */ - rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32)); - rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32)); - rv64_emit32(mc, rv_or(rd, rd, RV_T0)); - return; - } - emit_li_32(mc, rd, (i32)hi64); - rv64_emit32(mc, rv_slli(rd, rd, 32)); - if (lo32 != 0) { - emit_li_32(mc, RV_T0, (i32)lo32); - rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32)); - rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32)); - rv64_emit32(mc, rv_or(rd, rd, RV_T0)); - } + emit_li_64(mc, rd, (u64)imm); } /* sp += imm. imm can be any signed value the caller passes — we pick diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -1961,8 +1961,8 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u)); return; } - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { + case INTRIN_SADD_OVERFLOW: + case INTRIN_SSUB_OVERFLOW: { /* dsts: [val, ovf]. Signed overflow check. * For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1) * For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */ @@ -1974,19 +1974,19 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, u32 rd = reg_num(dval); u32 rovf = reg_num(dovf); /* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */ - if (kind == INTRIN_ADD_OVERFLOW) { + if (kind == INTRIN_SADD_OVERFLOW) { rv64_emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb)); } else { rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb)); } /* t3 = a XOR t2 */ rv64_emit32(mc, rv_xor(RV_T3, ra, RV_T2)); - if (kind == INTRIN_ADD_OVERFLOW) { - /* t4 = b XOR t2 */ + if (kind == INTRIN_SADD_OVERFLOW) { + /* ovf = b XOR t2 */ rv64_emit32(mc, rv_xor(rovf, rb, RV_T2)); rv64_emit32(mc, rv_and(rovf, rovf, RV_T3)); } else { - /* t4 = a XOR b */ + /* ovf = a XOR b */ rv64_emit32(mc, rv_xor(rovf, ra, rb)); rv64_emit32(mc, rv_and(rovf, rovf, RV_T3)); } @@ -1998,31 +1998,90 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, rv64_emit32(mc, rv_addi(rd, RV_T2, 0)); return; } - case INTRIN_MUL_OVERFLOW: { - /* SMULL: full 64-bit signed product of two i32s, then compare - * with sign-extend of low 32. For i64 inputs we panic for now. */ + case INTRIN_UADD_OVERFLOW: + case INTRIN_USUB_OVERFLOW: { Operand a_op = args[0], b_op = args[1]; Operand dval = dsts[0], dovf = dsts[1]; int is64 = type_is_64(dval.type); + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + u32 rd = reg_num(dval); + u32 rovf = reg_num(dovf); + if (!is64) { + rv64_emit32(mc, rv_slli(RV_T2, ra, 32)); + rv64_emit32(mc, rv_srli(RV_T2, RV_T2, 32)); + rv64_emit32(mc, rv_slli(RV_T3, rb, 32)); + rv64_emit32(mc, rv_srli(RV_T3, RV_T3, 32)); + ra = RV_T2; + rb = RV_T3; + } + if (kind == INTRIN_UADD_OVERFLOW) { + if (is64) { + rv64_emit32(mc, rv_add(RV_T2, ra, rb)); + rv64_emit32(mc, rv_sltu(rovf, RV_T2, ra)); + } else { + rv64_emit32(mc, rv_add(RV_T2, ra, rb)); + rv64_emit32(mc, rv_srli(rovf, RV_T2, 32)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + rv64_emit32(mc, rv_addiw(RV_T2, RV_T2, 0)); + } + } else { + rv64_emit32(mc, rv_sltu(rovf, ra, rb)); + rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb)); + } + rv64_emit32(mc, rv_addi(rd, RV_T2, 0)); + return; + } + case INTRIN_SMUL_OVERFLOW: { + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int is64 = type_is_64(dval.type); + u32 ra = rv64_force_reg_int(t, a_op, RV_T0); + u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); + u32 rd = reg_num(dval); + u32 rovf = reg_num(dovf); if (is64) { - compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI"); + rv64_emit32(mc, rv_mul(RV_T2, ra, rb)); + rv64_emit32(mc, rv_mulh(RV_T3, ra, rb)); + rv64_emit32(mc, rv_srai(rovf, RV_T2, 63)); + rv64_emit32(mc, rv_xor(rovf, RV_T3, rovf)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + rv64_emit32(mc, rv_addi(rd, RV_T2, 0)); + } else { + /* Full 64-bit signed product of two i32s, then compare with + * sign-extension of the low 32 bits. */ + rv64_emit32(mc, rv_addiw(RV_T2, ra, 0)); + rv64_emit32(mc, rv_addiw(RV_T3, rb, 0)); + rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3)); + rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0)); + rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + rv64_emit32(mc, rv_addiw(rd, RV_T2, 0)); } + return; + } + case INTRIN_UMUL_OVERFLOW: { + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int is64 = type_is_64(dval.type); u32 ra = rv64_force_reg_int(t, a_op, RV_T0); u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); u32 rd = reg_num(dval); u32 rovf = reg_num(dovf); - /* Sign-extend inputs from 32 to 64. */ - rv64_emit32(mc, rv_addiw(RV_T2, ra, 0)); - rv64_emit32(mc, rv_addiw(RV_T3, rb, 0)); - /* Full 64-bit product */ - rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3)); - /* sign-ext of low 32 of product */ - rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0)); - /* ovf = (T2 != T3) */ - rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3)); - rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); - /* dval = low 32, sign-extended */ - rv64_emit32(mc, rv_addiw(rd, RV_T2, 0)); + if (is64) { + rv64_emit32(mc, rv_mulhu(rovf, ra, rb)); + rv64_emit32(mc, rv_mul(rd, ra, rb)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + } else { + rv64_emit32(mc, rv_slli(RV_T2, ra, 32)); + rv64_emit32(mc, rv_srli(RV_T2, RV_T2, 32)); + rv64_emit32(mc, rv_slli(RV_T3, rb, 32)); + rv64_emit32(mc, rv_srli(RV_T3, RV_T3, 32)); + rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3)); + rv64_emit32(mc, rv_srli(rovf, RV_T2, 32)); + rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf)); + rv64_emit32(mc, rv_addiw(rd, RV_T2, 0)); + } return; } case INTRIN_MEMCPY: diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -2071,9 +2071,11 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, case INTRIN_TRAP: emit_ud2(mc); return; - case INTRIN_ADD_OVERFLOW: - case INTRIN_SUB_OVERFLOW: { - /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */ + case INTRIN_SADD_OVERFLOW: + case INTRIN_UADD_OVERFLOW: + case INTRIN_SSUB_OVERFLOW: + case INTRIN_USUB_OVERFLOW: { + /* dsts: [val, ovf]. Signed uses OF; unsigned uses CF. */ Operand a_op = args[0], b_op = args[1]; Operand dval = dsts[0], dovf = dsts[1]; int w = type_is_64(dval.type) ? 1 : 0; @@ -2081,24 +2083,27 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); if (rd != ra) emit_mov_rr(mc, w, rd, ra); u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); - u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29; + u8 op = (kind == INTRIN_SADD_OVERFLOW || + kind == INTRIN_UADD_OVERFLOW) + ? 0x01 + : 0x29; + u32 cc = (kind == INTRIN_UADD_OVERFLOW || + kind == INTRIN_USUB_OVERFLOW) + ? X64_CC_B + : X64_CC_O; emit_alu_rr(mc, w, op, rd, rb); u32 dovf_r = dovf.v.reg & 0xFu; - emit_setcc(mc, X64_CC_O, dovf_r); + emit_setcc(mc, cc, dovf_r); emit_movzx_r32_r8(mc, dovf_r, dovf_r); return; } - case INTRIN_MUL_OVERFLOW: { + case INTRIN_SMUL_OVERFLOW: { /* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed - * two-operand form: low 32 bits of product go to dst, OF set if - * the result didn't fit. i64 not yet supported. */ + * two-operand form: low bits of product go to dst, OF set if + * the result didn't fit. */ Operand a_op = args[0], b_op = args[1]; Operand dval = dsts[0], dovf = dsts[1]; int w = type_is_64(dval.type) ? 1 : 0; - if (w) { - compiler_panic(t->c, a->loc, - "x64 intrinsic: mul_overflow on i64 not yet supported"); - } u32 rd = dval.v.reg & 0xFu; u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); if (rd != ra) emit_mov_rr(mc, w, rd, ra); @@ -2109,6 +2114,27 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, emit_movzx_r32_r8(mc, dovf_r, dovf_r); return; } + case INTRIN_UMUL_OVERFLOW: { + /* MUL writes the double-width product to RDX:RAX and sets CF/OF + * when the high half is non-zero. */ + Operand a_op = args[0], b_op = args[1]; + Operand dval = dsts[0], dovf = dsts[1]; + int w = type_is_64(dval.type) ? 1 : 0; + u32 rd = dval.v.reg & 0xFu; + u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); + if (rb == X64_RAX || rb == X64_RDX) { + emit_mov_rr(mc, w, X64_R11, rb); + rb = X64_R11; + } + u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); + if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra); + emit_f7_rm(mc, w, 4u, rb); + if (rd != X64_RAX) emit_mov_rr(mc, w, rd, X64_RAX); + u32 dovf_r = dovf.v.reg & 0xFu; + emit_setcc(mc, X64_CC_O, dovf_r); + emit_movzx_r32_r8(mc, dovf_r, dovf_r); + return; + } default: compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported", (int)kind); diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -184,6 +184,7 @@ here for completeness once they're real cases. | `6_5_55_generic_default_branch` | ★ | `_Generic((double)x, int: 0, default: 42)` — falls through to `default` | 42 | | `6_5_56_compound_literal_struct` | ★ | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 | | `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 | +| `6_5_58_large_integer_immediates` | ★ | 64-bit boundary integer literals including `INT64_MAX`, top-bit unsigned, and all-ones materialize correctly | 42 | ## §6.5.2.2 Aggregate function arguments @@ -501,7 +502,7 @@ ordinary calls. | `builtin_23_atomic_long_literal_convert` | ★ | store/RMW integer literals converted to unsigned long atomic object type | 42 | | `builtin_24_atomic_lock_free` | ★ | target-aware lock-free folding through `if`, `&&`, and `||`; dead 16-byte atomic arms suppress codegen | 42 | | `builtin_25_atomic_fetch_nand` | ★ | `__atomic_fetch_nand` lowers to atomic NAND RMW | 42 | -| `builtin_26_sadd_overflow` | RED | `__builtin_sadd_overflow` stores result and returns overflow flag; currently undeclared | 42 | +| `builtin_26_sadd_overflow` | ★ | signed/unsigned typed overflow builtins store result and return overflow flag | 42 | | `builtin_99_syscall0` | (deferred) | `__cfree_syscall0` requires linking against the syscall stub; covered in `test/libc` | — | ## Variadic coverage diff --git a/test/parse/cases/6_5_58_large_integer_immediates.c b/test/parse/cases/6_5_58_large_integer_immediates.c @@ -0,0 +1,17 @@ +static unsigned long long add_ull(unsigned long long a, + unsigned long long b) { + return a + b; +} + +int test_main(void) { + unsigned long long high = 9223372036854775808ULL; + unsigned long long all = 18446744073709551615ULL; + if (high + high != 0ULL) return 1; + if (add_ull(9223372036854775808ULL, 9223372036854775807ULL) != all) + return 2; + if ((long long)9223372036854775808ULL != -9223372036854775807LL - 1LL) + return 3; + if (9223372036854775807LL + (-9223372036854775807LL) != 0LL) + return 4; + return 42; +} diff --git a/test/parse/cases/6_5_58_large_integer_immediates.expected b/test/parse/cases/6_5_58_large_integer_immediates.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/builtin_26_sadd_overflow.c b/test/parse/cases/builtin_26_sadd_overflow.c @@ -1,7 +1,77 @@ int test_main(void) { int r = 0; + unsigned ur = 0; + long lr = 0; + unsigned long ulr = 0; + long long llr = 0; + unsigned long long ullr = 0; + long l_hi = 4611686018427387904L; + unsigned long ul_hi = 9223372036854775808UL; + long long ll_hi = 4611686018427387904LL; + unsigned long long ull_hi = 9223372036854775808ULL; + long l_max = 9223372036854775807L; + long l_min = (long)9223372036854775808UL; + unsigned long ul_max = 18446744073709551615UL; + long long ll_max = 9223372036854775807LL; + long long ll_min = (long long)9223372036854775808ULL; + unsigned long long ull_max = 18446744073709551615ULL; + if ((unsigned long)l_max != ul_hi - 1UL) return 78; + if ((unsigned long)l_min != ul_hi) return 79; + if ((unsigned long long)ll_max != ull_hi - 1ULL) return 80; + if ((unsigned long long)ll_min != ull_hi) return 81; _Bool ov = __builtin_sadd_overflow(40, 2, &r); if (ov || r != 42) return 1; ov = __builtin_sadd_overflow(2147483647, 1, &r); - return ov ? 42 : 2; + if (!ov || r != -2147483647 - 1) return 2; + ov = __builtin_ssub_overflow(-2147483647 - 1, 1, &r); + if (!ov || r != 2147483647) return 3; + ov = __builtin_smul_overflow(1073741824, 2, &r); + if (!ov || r != -2147483647 - 1) return 4; + + ov = __builtin_uadd_overflow(40u, 2u, &ur); + if (ov || ur != 42u) return 5; + ov = __builtin_uadd_overflow(4294967295u, 1u, &ur); + if (!ov || ur != 0u) return 6; + ov = __builtin_usub_overflow(0u, 1u, &ur); + if (!ov || ur != 4294967295u) return 7; + ov = __builtin_umul_overflow(2147483648u, 2u, &ur); + if (!ov || ur != 0u) return 8; + + ov = __builtin_saddl_overflow(40L, 2L, &lr); + if (ov || lr != 42L) return 15; + ov = __builtin_saddl_overflow(l_max, 1L, &lr); + if (!ov) return 16; + if (lr != l_min) return 160; + ov = __builtin_ssubl_overflow(l_min, 1L, &lr); + if (!ov || lr != l_max) return 17; + ov = __builtin_smull_overflow(l_hi, 2L, &lr); + if (!ov || lr != l_min) return 18; + + ov = __builtin_uaddl_overflow(40UL, 2UL, &ulr); + if (ov || ulr != 42UL) return 19; + ov = __builtin_uaddl_overflow(ul_max, 1UL, &ulr); + if (!ov || ulr != 0UL) return 20; + ov = __builtin_usubl_overflow(0UL, 1UL, &ulr); + if (!ov || ulr != ul_max) return 21; + ov = __builtin_umull_overflow(ul_hi, 2UL, &ulr); + if (!ov || ulr != 0UL) return 22; + + ov = __builtin_saddll_overflow(ll_max, 1LL, &llr); + if (!ov) return 9; + if (llr != ll_min) return 90; + ov = __builtin_ssubll_overflow(ll_min, 1LL, &llr); + if (!ov || llr != ll_max) return 10; + ov = __builtin_smulll_overflow(ll_hi, 2LL, &llr); + if (!ov) return 11; + if (llr != ll_min) return 110; + ov = __builtin_uaddll_overflow(ull_max, 1ULL, &ullr); + if (!ov || ullr != 0ULL) return 12; + ov = __builtin_usubll_overflow(0ULL, 1ULL, &ullr); + if (!ov || ullr != ull_max) return 13; + ov = __builtin_umulll_overflow(ull_hi, 2ULL, &ullr); + if (!ov || ullr != 0ULL) return 14; + ov = __builtin_uaddll_overflow(9223372036854775808ULL, + 9223372036854775808ULL, &ullr); + if (!ov || ullr != 0ULL) return 23; + return 42; }