commit 28f393cc0706394c3300923b6e1bb3bf7192d131
parent 64e882134660367b2546da279ad4d2adbab6158f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 18 May 2026 16:46:57 -0700
Fix typed overflow builtins
Diffstat:
11 files changed, 318 insertions(+), 88 deletions(-)
diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h
@@ -94,9 +94,12 @@ typedef enum IntrinKind {
INTRIN_TRAP,
INTRIN_SETJMP,
INTRIN_LONGJMP,
- INTRIN_ADD_OVERFLOW,
- INTRIN_SUB_OVERFLOW,
- INTRIN_MUL_OVERFLOW,
+ INTRIN_SADD_OVERFLOW,
+ INTRIN_UADD_OVERFLOW,
+ INTRIN_SSUB_OVERFLOW,
+ INTRIN_USUB_OVERFLOW,
+ INTRIN_SMUL_OVERFLOW,
+ INTRIN_UMUL_OVERFLOW,
} IntrinKind;
typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir;
diff --git a/src/api/cg.c b/src/api/cg.c
@@ -4071,14 +4071,17 @@ static IntrinKind api_map_intrinsic(CfreeCg *g, CfreeCgIntrinsic intrin,
case CFREE_CG_INTRIN_LONGJMP:
return INTRIN_LONGJMP;
case CFREE_CG_INTRIN_SADD_OVERFLOW:
+ return INTRIN_SADD_OVERFLOW;
case CFREE_CG_INTRIN_UADD_OVERFLOW:
- return INTRIN_ADD_OVERFLOW;
+ return INTRIN_UADD_OVERFLOW;
case CFREE_CG_INTRIN_SSUB_OVERFLOW:
+ return INTRIN_SSUB_OVERFLOW;
case CFREE_CG_INTRIN_USUB_OVERFLOW:
- return INTRIN_SUB_OVERFLOW;
+ return INTRIN_USUB_OVERFLOW;
case CFREE_CG_INTRIN_SMUL_OVERFLOW:
+ return INTRIN_SMUL_OVERFLOW;
case CFREE_CG_INTRIN_UMUL_OVERFLOW:
- return INTRIN_MUL_OVERFLOW;
+ return INTRIN_UMUL_OVERFLOW;
case CFREE_CG_INTRIN_PREFETCH:
return INTRIN_PREFETCH;
case CFREE_CG_INTRIN_EXPECT:
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -1918,6 +1918,21 @@ static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) {
static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) {
return aa64_smaddl(Rd, Rn, Rm, AA64_ZR);
}
+static inline u32 aa64_umaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) {
+ return aa64_dp3_pack((AA64DP3){
+ .sf = 1, .op31 = 5, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd});
+}
+static inline u32 aa64_umull(u32 Rd, u32 Rn, u32 Rm) {
+ return aa64_umaddl(Rd, Rn, Rm, AA64_ZR);
+}
+static inline u32 aa64_smulh(u32 Rd, u32 Rn, u32 Rm) {
+ return 0x9B407C00u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+static inline u32 aa64_umulh(u32 Rd, u32 Rn, u32 Rm) {
+ return 0x9BC07C00u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) {
return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) |
(Rd & 0x1f);
@@ -2112,8 +2127,10 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
case INTRIN_TRAP:
aa64_emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u));
return;
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
+ case INTRIN_SADD_OVERFLOW:
+ case INTRIN_UADD_OVERFLOW:
+ case INTRIN_SSUB_OVERFLOW:
+ case INTRIN_USUB_OVERFLOW: {
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
u32 sf = type_is_64(dval.type) ? 1u : 0u;
@@ -2121,30 +2138,57 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
u32 rb =
aa64_force_reg_int(t, b_op, sf,
(ra == AA_TMP0) ? AA_TMP1 : AA_TMP0);
- u32 word = (kind == INTRIN_ADD_OVERFLOW)
+ u32 word = (kind == INTRIN_SADD_OVERFLOW ||
+ kind == INTRIN_UADD_OVERFLOW)
? aa64_adds_reg(sf, reg_num(dval), ra, rb)
: aa64_subs_reg(sf, reg_num(dval), ra, rb);
+ u32 cond = (kind == INTRIN_UADD_OVERFLOW) ? 0x2u /*CS*/
+ : (kind == INTRIN_USUB_OVERFLOW)
+ ? 0x3u /*CC*/
+ : 0x6u /*VS*/;
aa64_emit32(mc, word);
- aa64_emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/));
+ aa64_emit32(mc, aa64_cset(0, reg_num(dovf), cond));
return;
}
- case INTRIN_MUL_OVERFLOW: {
+ case INTRIN_SMUL_OVERFLOW: {
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
u32 sf = type_is_64(dval.type) ? 1u : 0u;
+ u32 ra = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
+ u32 rb =
+ aa64_force_reg_int(t, b_op, sf,
+ (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0);
if (sf) {
- compiler_panic(
- t->c, a->loc,
- "aarch64 intrinsic: mul_overflow on i64 not yet supported");
+ aa64_emit32(mc, aa64_mul(1, reg_num(dval), ra, rb));
+ aa64_emit32(mc, aa64_smulh(reg_num(dovf), ra, rb));
+ aa64_emit32(mc, aa64_sbfm(1, AA_TMP2, reg_num(dval), 63, 63));
+ aa64_emit32(mc, aa64_subs_reg(1, 31u, reg_num(dovf), AA_TMP2));
+ } else {
+ aa64_emit32(mc, aa64_smull(AA_TMP2, ra, rb));
+ aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, AA_TMP2, AA_TMP2));
+ aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2));
}
- u32 ra = aa64_force_reg_int(t, a_op, 0, AA_TMP0);
+ aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/));
+ return;
+ }
+ case INTRIN_UMUL_OVERFLOW: {
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ u32 sf = type_is_64(dval.type) ? 1u : 0u;
+ u32 ra = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
u32 rb =
- aa64_force_reg_int(t, b_op, 0,
+ aa64_force_reg_int(t, b_op, sf,
(ra == AA_TMP0) ? AA_TMP1 : AA_TMP0);
- aa64_emit32(mc, aa64_smull(AA_TMP2, ra, rb));
- aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, AA_TMP2, AA_TMP2));
+ if (sf) {
+ aa64_emit32(mc, aa64_mul(1, reg_num(dval), ra, rb));
+ aa64_emit32(mc, aa64_umulh(reg_num(dovf), ra, rb));
+ } else {
+ aa64_emit32(mc, aa64_umull(AA_TMP2, ra, rb));
+ aa64_emit32(mc, aa64_ubfm(1, reg_num(dovf), AA_TMP2, 32, 63));
+ aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2));
+ }
+ aa64_emit32(mc, aa64_subs_imm(1, 31u, reg_num(dovf), 0));
aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/));
- aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2));
return;
}
default:
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -146,9 +146,12 @@ typedef enum IntrinKind {
INTRIN_LONGJMP,
/* checked arith — multi-result (value, overflow_flag) */
- INTRIN_ADD_OVERFLOW,
- INTRIN_SUB_OVERFLOW,
- INTRIN_MUL_OVERFLOW,
+ INTRIN_SADD_OVERFLOW,
+ INTRIN_UADD_OVERFLOW,
+ INTRIN_SSUB_OVERFLOW,
+ INTRIN_USUB_OVERFLOW,
+ INTRIN_SMUL_OVERFLOW,
+ INTRIN_UMUL_OVERFLOW,
} IntrinKind;
typedef enum OpKind {
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -65,13 +65,43 @@ void emit_li_32(MCEmitter *mc, u32 rd, i32 imm) {
return;
}
/* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */
- i32 hi = (i32)((u32)(imm + 0x800) >> 12);
- i32 lo = (i32)((i32)imm - (i32)(hi << 12));
+ i64 hi64 = ((i64)imm + 0x800) >> 12;
+ i64 lo64 = (i64)imm - (hi64 << 12);
+ i32 hi = (i32)hi64;
+ i32 lo = (i32)lo64;
rv64_emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu));
if (lo)
rv64_emit32(mc, rv_addiw(rd, rd, lo));
}
+static i32 sign_extend_12(u32 v) {
+ v &= 0xfffu;
+ return (v & 0x800u) ? (i32)v - 4096 : (i32)v;
+}
+
+static int fits_signed32_bits(u64 v) {
+ return v <= 0x7fffffffull || v >= 0xffffffff80000000ull;
+}
+
+static i32 i32_from_bits(u32 v) {
+ if (v <= 0x7fffffffu) return (i32)v;
+ if (v == 0x80000000u) return -2147483647 - 1;
+ return -(i32)(~v + 1u);
+}
+
+static void emit_li_64(MCEmitter *mc, u32 rd, u64 imm) {
+ if (fits_signed32_bits(imm)) {
+ emit_li_32(mc, rd, i32_from_bits((u32)imm));
+ return;
+ }
+ i32 lo = sign_extend_12((u32)imm);
+ u64 hi = (imm - (u64)(i64)lo) >> 12;
+ emit_li_64(mc, rd, hi);
+ rv64_emit32(mc, rv_slli(rd, rd, 12));
+ if (lo)
+ rv64_emit32(mc, rv_addi(rd, rd, lo));
+}
+
void rv64_emit_load_imm(MCEmitter *mc, u32 sf, u32 rd, i64 imm) {
if (!sf) {
/* 32-bit destination: low 32 bits, sign-extended. */
@@ -82,34 +112,7 @@ void rv64_emit_load_imm(MCEmitter *mc, u32 sf, u32 rd, i64 imm) {
emit_li_32(mc, rd, (i32)imm);
return;
}
- /* General 64-bit load: split into high and low 32 bits, place high
- * into rd << 32, then OR in low via a temp register (t0=x5). The cg
- * corpus has no IMM operands that collide with t0, so this is safe. */
- i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */
- i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */
- if (hi64 < (i64)(i32)0x80000000 || hi64 > (i64)(i32)0x7fffffff) {
- /* Out of i32 range — fallback: use a smaller chunked approach.
- * For the cg corpus this isn't hit; emit a conservative sequence:
- * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */
- i32 hi32 = (i32)(imm >> 32);
- i32 lo32_i = (i32)imm;
- emit_li_32(mc, rd, hi32);
- rv64_emit32(mc, rv_slli(rd, rd, 32));
- emit_li_32(mc, RV_T0, lo32_i);
- /* zero-extend t0 to clear sign-extension before OR */
- rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32));
- rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32));
- rv64_emit32(mc, rv_or(rd, rd, RV_T0));
- return;
- }
- emit_li_32(mc, rd, (i32)hi64);
- rv64_emit32(mc, rv_slli(rd, rd, 32));
- if (lo32 != 0) {
- emit_li_32(mc, RV_T0, (i32)lo32);
- rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32));
- rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32));
- rv64_emit32(mc, rv_or(rd, rd, RV_T0));
- }
+ emit_li_64(mc, rd, (u64)imm);
}
/* sp += imm. imm can be any signed value the caller passes — we pick
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -1961,8 +1961,8 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
return;
}
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
+ case INTRIN_SADD_OVERFLOW:
+ case INTRIN_SSUB_OVERFLOW: {
/* dsts: [val, ovf]. Signed overflow check.
* For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1)
* For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */
@@ -1974,19 +1974,19 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
u32 rd = reg_num(dval);
u32 rovf = reg_num(dovf);
/* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */
- if (kind == INTRIN_ADD_OVERFLOW) {
+ if (kind == INTRIN_SADD_OVERFLOW) {
rv64_emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb));
} else {
rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb));
}
/* t3 = a XOR t2 */
rv64_emit32(mc, rv_xor(RV_T3, ra, RV_T2));
- if (kind == INTRIN_ADD_OVERFLOW) {
- /* t4 = b XOR t2 */
+ if (kind == INTRIN_SADD_OVERFLOW) {
+ /* ovf = b XOR t2 */
rv64_emit32(mc, rv_xor(rovf, rb, RV_T2));
rv64_emit32(mc, rv_and(rovf, rovf, RV_T3));
} else {
- /* t4 = a XOR b */
+ /* ovf = a XOR b */
rv64_emit32(mc, rv_xor(rovf, ra, rb));
rv64_emit32(mc, rv_and(rovf, rovf, RV_T3));
}
@@ -1998,31 +1998,90 @@ static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
rv64_emit32(mc, rv_addi(rd, RV_T2, 0));
return;
}
- case INTRIN_MUL_OVERFLOW: {
- /* SMULL: full 64-bit signed product of two i32s, then compare
- * with sign-extend of low 32. For i64 inputs we panic for now. */
+ case INTRIN_UADD_OVERFLOW:
+ case INTRIN_USUB_OVERFLOW: {
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
int is64 = type_is_64(dval.type);
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+ u32 rd = reg_num(dval);
+ u32 rovf = reg_num(dovf);
+ if (!is64) {
+ rv64_emit32(mc, rv_slli(RV_T2, ra, 32));
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T2, 32));
+ rv64_emit32(mc, rv_slli(RV_T3, rb, 32));
+ rv64_emit32(mc, rv_srli(RV_T3, RV_T3, 32));
+ ra = RV_T2;
+ rb = RV_T3;
+ }
+ if (kind == INTRIN_UADD_OVERFLOW) {
+ if (is64) {
+ rv64_emit32(mc, rv_add(RV_T2, ra, rb));
+ rv64_emit32(mc, rv_sltu(rovf, RV_T2, ra));
+ } else {
+ rv64_emit32(mc, rv_add(RV_T2, ra, rb));
+ rv64_emit32(mc, rv_srli(rovf, RV_T2, 32));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ rv64_emit32(mc, rv_addiw(RV_T2, RV_T2, 0));
+ }
+ } else {
+ rv64_emit32(mc, rv_sltu(rovf, ra, rb));
+ rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb));
+ }
+ rv64_emit32(mc, rv_addi(rd, RV_T2, 0));
+ return;
+ }
+ case INTRIN_SMUL_OVERFLOW: {
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int is64 = type_is_64(dval.type);
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+ u32 rd = reg_num(dval);
+ u32 rovf = reg_num(dovf);
if (is64) {
- compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI");
+ rv64_emit32(mc, rv_mul(RV_T2, ra, rb));
+ rv64_emit32(mc, rv_mulh(RV_T3, ra, rb));
+ rv64_emit32(mc, rv_srai(rovf, RV_T2, 63));
+ rv64_emit32(mc, rv_xor(rovf, RV_T3, rovf));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ rv64_emit32(mc, rv_addi(rd, RV_T2, 0));
+ } else {
+ /* Full 64-bit signed product of two i32s, then compare with
+ * sign-extension of the low 32 bits. */
+ rv64_emit32(mc, rv_addiw(RV_T2, ra, 0));
+ rv64_emit32(mc, rv_addiw(RV_T3, rb, 0));
+ rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0));
+ rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ rv64_emit32(mc, rv_addiw(rd, RV_T2, 0));
}
+ return;
+ }
+ case INTRIN_UMUL_OVERFLOW: {
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int is64 = type_is_64(dval.type);
u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
u32 rd = reg_num(dval);
u32 rovf = reg_num(dovf);
- /* Sign-extend inputs from 32 to 64. */
- rv64_emit32(mc, rv_addiw(RV_T2, ra, 0));
- rv64_emit32(mc, rv_addiw(RV_T3, rb, 0));
- /* Full 64-bit product */
- rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3));
- /* sign-ext of low 32 of product */
- rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0));
- /* ovf = (T2 != T3) */
- rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3));
- rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
- /* dval = low 32, sign-extended */
- rv64_emit32(mc, rv_addiw(rd, RV_T2, 0));
+ if (is64) {
+ rv64_emit32(mc, rv_mulhu(rovf, ra, rb));
+ rv64_emit32(mc, rv_mul(rd, ra, rb));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ } else {
+ rv64_emit32(mc, rv_slli(RV_T2, ra, 32));
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T2, 32));
+ rv64_emit32(mc, rv_slli(RV_T3, rb, 32));
+ rv64_emit32(mc, rv_srli(RV_T3, RV_T3, 32));
+ rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_srli(rovf, RV_T2, 32));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ rv64_emit32(mc, rv_addiw(rd, RV_T2, 0));
+ }
return;
}
case INTRIN_MEMCPY:
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -2071,9 +2071,11 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
case INTRIN_TRAP:
emit_ud2(mc);
return;
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
- /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */
+ case INTRIN_SADD_OVERFLOW:
+ case INTRIN_UADD_OVERFLOW:
+ case INTRIN_SSUB_OVERFLOW:
+ case INTRIN_USUB_OVERFLOW: {
+ /* dsts: [val, ovf]. Signed uses OF; unsigned uses CF. */
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
int w = type_is_64(dval.type) ? 1 : 0;
@@ -2081,24 +2083,27 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
if (rd != ra) emit_mov_rr(mc, w, rd, ra);
u32 rb = x64_force_reg_int(t, b_op, w, X64_R11);
- u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29;
+ u8 op = (kind == INTRIN_SADD_OVERFLOW ||
+ kind == INTRIN_UADD_OVERFLOW)
+ ? 0x01
+ : 0x29;
+ u32 cc = (kind == INTRIN_UADD_OVERFLOW ||
+ kind == INTRIN_USUB_OVERFLOW)
+ ? X64_CC_B
+ : X64_CC_O;
emit_alu_rr(mc, w, op, rd, rb);
u32 dovf_r = dovf.v.reg & 0xFu;
- emit_setcc(mc, X64_CC_O, dovf_r);
+ emit_setcc(mc, cc, dovf_r);
emit_movzx_r32_r8(mc, dovf_r, dovf_r);
return;
}
- case INTRIN_MUL_OVERFLOW: {
+ case INTRIN_SMUL_OVERFLOW: {
/* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed
- * two-operand form: low 32 bits of product go to dst, OF set if
- * the result didn't fit. i64 not yet supported. */
+ * two-operand form: low bits of product go to dst, OF set if
+ * the result didn't fit. */
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
int w = type_is_64(dval.type) ? 1 : 0;
- if (w) {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: mul_overflow on i64 not yet supported");
- }
u32 rd = dval.v.reg & 0xFu;
u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
if (rd != ra) emit_mov_rr(mc, w, rd, ra);
@@ -2109,6 +2114,27 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
emit_movzx_r32_r8(mc, dovf_r, dovf_r);
return;
}
+ case INTRIN_UMUL_OVERFLOW: {
+ /* MUL writes the double-width product to RDX:RAX and sets CF/OF
+ * when the high half is non-zero. */
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int w = type_is_64(dval.type) ? 1 : 0;
+ u32 rd = dval.v.reg & 0xFu;
+ u32 rb = x64_force_reg_int(t, b_op, w, X64_R11);
+ if (rb == X64_RAX || rb == X64_RDX) {
+ emit_mov_rr(mc, w, X64_R11, rb);
+ rb = X64_R11;
+ }
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra);
+ emit_f7_rm(mc, w, 4u, rb);
+ if (rd != X64_RAX) emit_mov_rr(mc, w, rd, X64_RAX);
+ u32 dovf_r = dovf.v.reg & 0xFu;
+ emit_setcc(mc, X64_CC_O, dovf_r);
+ emit_movzx_r32_r8(mc, dovf_r, dovf_r);
+ return;
+ }
default:
compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported",
(int)kind);
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -184,6 +184,7 @@ here for completeness once they're real cases.
| `6_5_55_generic_default_branch` | ★ | `_Generic((double)x, int: 0, default: 42)` — falls through to `default` | 42 |
| `6_5_56_compound_literal_struct` | ★ | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 |
| `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 |
+| `6_5_58_large_integer_immediates` | ★ | 64-bit boundary integer literals including `INT64_MAX`, top-bit unsigned, and all-ones materialize correctly | 42 |
## §6.5.2.2 Aggregate function arguments
@@ -501,7 +502,7 @@ ordinary calls.
| `builtin_23_atomic_long_literal_convert` | ★ | store/RMW integer literals converted to unsigned long atomic object type | 42 |
| `builtin_24_atomic_lock_free` | ★ | target-aware lock-free folding through `if`, `&&`, and `||`; dead 16-byte atomic arms suppress codegen | 42 |
| `builtin_25_atomic_fetch_nand` | ★ | `__atomic_fetch_nand` lowers to atomic NAND RMW | 42 |
-| `builtin_26_sadd_overflow` | RED | `__builtin_sadd_overflow` stores result and returns overflow flag; currently undeclared | 42 |
+| `builtin_26_sadd_overflow` | ★ | signed/unsigned typed overflow builtins store result and return overflow flag | 42 |
| `builtin_99_syscall0` | (deferred) | `__cfree_syscall0` requires linking against the syscall stub; covered in `test/libc` | — |
## Variadic coverage
diff --git a/test/parse/cases/6_5_58_large_integer_immediates.c b/test/parse/cases/6_5_58_large_integer_immediates.c
@@ -0,0 +1,17 @@
+static unsigned long long add_ull(unsigned long long a,
+ unsigned long long b) {
+ return a + b;
+}
+
+int test_main(void) {
+ unsigned long long high = 9223372036854775808ULL;
+ unsigned long long all = 18446744073709551615ULL;
+ if (high + high != 0ULL) return 1;
+ if (add_ull(9223372036854775808ULL, 9223372036854775807ULL) != all)
+ return 2;
+ if ((long long)9223372036854775808ULL != -9223372036854775807LL - 1LL)
+ return 3;
+ if (9223372036854775807LL + (-9223372036854775807LL) != 0LL)
+ return 4;
+ return 42;
+}
diff --git a/test/parse/cases/6_5_58_large_integer_immediates.expected b/test/parse/cases/6_5_58_large_integer_immediates.expected
@@ -0,0 +1 @@
+42
diff --git a/test/parse/cases/builtin_26_sadd_overflow.c b/test/parse/cases/builtin_26_sadd_overflow.c
@@ -1,7 +1,77 @@
int test_main(void) {
int r = 0;
+ unsigned ur = 0;
+ long lr = 0;
+ unsigned long ulr = 0;
+ long long llr = 0;
+ unsigned long long ullr = 0;
+ long l_hi = 4611686018427387904L;
+ unsigned long ul_hi = 9223372036854775808UL;
+ long long ll_hi = 4611686018427387904LL;
+ unsigned long long ull_hi = 9223372036854775808ULL;
+ long l_max = 9223372036854775807L;
+ long l_min = (long)9223372036854775808UL;
+ unsigned long ul_max = 18446744073709551615UL;
+ long long ll_max = 9223372036854775807LL;
+ long long ll_min = (long long)9223372036854775808ULL;
+ unsigned long long ull_max = 18446744073709551615ULL;
+ if ((unsigned long)l_max != ul_hi - 1UL) return 78;
+ if ((unsigned long)l_min != ul_hi) return 79;
+ if ((unsigned long long)ll_max != ull_hi - 1ULL) return 80;
+ if ((unsigned long long)ll_min != ull_hi) return 81;
_Bool ov = __builtin_sadd_overflow(40, 2, &r);
if (ov || r != 42) return 1;
ov = __builtin_sadd_overflow(2147483647, 1, &r);
- return ov ? 42 : 2;
+ if (!ov || r != -2147483647 - 1) return 2;
+ ov = __builtin_ssub_overflow(-2147483647 - 1, 1, &r);
+ if (!ov || r != 2147483647) return 3;
+ ov = __builtin_smul_overflow(1073741824, 2, &r);
+ if (!ov || r != -2147483647 - 1) return 4;
+
+ ov = __builtin_uadd_overflow(40u, 2u, &ur);
+ if (ov || ur != 42u) return 5;
+ ov = __builtin_uadd_overflow(4294967295u, 1u, &ur);
+ if (!ov || ur != 0u) return 6;
+ ov = __builtin_usub_overflow(0u, 1u, &ur);
+ if (!ov || ur != 4294967295u) return 7;
+ ov = __builtin_umul_overflow(2147483648u, 2u, &ur);
+ if (!ov || ur != 0u) return 8;
+
+ ov = __builtin_saddl_overflow(40L, 2L, &lr);
+ if (ov || lr != 42L) return 15;
+ ov = __builtin_saddl_overflow(l_max, 1L, &lr);
+ if (!ov) return 16;
+ if (lr != l_min) return 160;
+ ov = __builtin_ssubl_overflow(l_min, 1L, &lr);
+ if (!ov || lr != l_max) return 17;
+ ov = __builtin_smull_overflow(l_hi, 2L, &lr);
+ if (!ov || lr != l_min) return 18;
+
+ ov = __builtin_uaddl_overflow(40UL, 2UL, &ulr);
+ if (ov || ulr != 42UL) return 19;
+ ov = __builtin_uaddl_overflow(ul_max, 1UL, &ulr);
+ if (!ov || ulr != 0UL) return 20;
+ ov = __builtin_usubl_overflow(0UL, 1UL, &ulr);
+ if (!ov || ulr != ul_max) return 21;
+ ov = __builtin_umull_overflow(ul_hi, 2UL, &ulr);
+ if (!ov || ulr != 0UL) return 22;
+
+ ov = __builtin_saddll_overflow(ll_max, 1LL, &llr);
+ if (!ov) return 9;
+ if (llr != ll_min) return 90;
+ ov = __builtin_ssubll_overflow(ll_min, 1LL, &llr);
+ if (!ov || llr != ll_max) return 10;
+ ov = __builtin_smulll_overflow(ll_hi, 2LL, &llr);
+ if (!ov) return 11;
+ if (llr != ll_min) return 110;
+ ov = __builtin_uaddll_overflow(ull_max, 1ULL, &ullr);
+ if (!ov || ullr != 0ULL) return 12;
+ ov = __builtin_usubll_overflow(0ULL, 1ULL, &ullr);
+ if (!ov || ullr != ull_max) return 13;
+ ov = __builtin_umulll_overflow(ull_hi, 2ULL, &ullr);
+ if (!ov || ullr != 0ULL) return 14;
+ ov = __builtin_uaddll_overflow(9223372036854775808ULL,
+ 9223372036854775808ULL, &ullr);
+ if (!ov || ullr != 0ULL) return 23;
+ return 42;
}