commit 4e8b7ce43241e6eab39044bd54f6eaaa72b02b8e
parent 12b4f3c5b47994a7fc4a4b85d587295914c148d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 19 May 2026 17:36:28 -0700
Fix FP negation and address codegen
Diffstat:
17 files changed, 156 insertions(+), 26 deletions(-)
diff --git a/rt/lib/fp_tf/fp_tf.c b/rt/lib/fp_tf/fp_tf.c
@@ -75,6 +75,8 @@ COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
return __addtf3(a, fromRep(toRep(b) ^ signBit));
}
+COMPILER_RT_ABI fp_t __negtf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
// ---- multf3.c ----
#define QUAD_PRECISION
#include "fp_lib.h"
diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h
@@ -205,6 +205,10 @@ static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) {
return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
((Rn & 0x1f) << 5) | (Rd & 0x1f);
}
+static inline u32 aa64_fneg(u32 type, u32 Rd, u32 Rn) {
+ return 0x1E214000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
static inline u32 aa64_fcmp(u32 type, u32 Rn, u32 Rm) {
return 0x1E202000u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -287,22 +287,13 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
if (lv.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, lv.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot");
- aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0));
+ aa64_emit_addr_adjust(t->mc, reg_num(dst), 29, -(i32)s->off);
return;
}
if (lv.kind == OPK_INDIRECT) {
i32 ofs = lv.v.ind.ofs;
u32 base = lv.v.ind.base & 0x1f;
- if (ofs == 0) {
- aa64_emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base));
- } else if (ofs > 0 && ofs <= 0xfff) {
- aa64_emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0));
- } else if (ofs < 0 && -ofs <= 0xfff) {
- aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 addr_of: indirect offset %d unsupported", ofs);
- }
+ aa64_emit_addr_adjust(t->mc, reg_num(dst), base, ofs);
return;
}
if (lv.kind == OPK_GLOBAL) {
@@ -700,11 +691,21 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
- u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
u32 word;
+ if (op == UO_FNEG) {
+ if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 unop: FP neg requires FP REG operand");
+ }
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_fneg(type, rd, reg_num(a_op)));
+ return;
+ }
+
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
switch (op) {
case UO_NEG:
word = aa64_neg(sf, rd, rn);
@@ -915,6 +916,9 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
} else if (av->storage.kind == OPK_INDIRECT) {
aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f,
av->storage.v.ind.ofs);
+ } else if (av->storage.kind == OPK_GLOBAL) {
+ emit_global_addr(t, dst_reg, av->storage.v.global.sym,
+ av->storage.v.global.addend);
} else {
compiler_panic(t->c, a->loc,
"aarch64 call: INDIRECT arg storage kind %d unsupported",
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -56,6 +56,7 @@ typedef enum BinOp {
typedef enum UnOp {
UO_NEG,
+ UO_FNEG,
UO_NOT, /* logical: 0/1 */
UO_BNOT, /* bitwise ~ */
} UnOp;
@@ -731,8 +732,8 @@ struct CGTarget {
* (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an
* imm-form encoding and materializing the literal into a scratch
* register based on whether the value fits the instruction's imm
- * field. FP ops require REG sources — FP literals reach the value
- * stack through load_const into OPK_REG. cg and opt's machinize/emit
+ * field. FP binops and UO_FNEG require REG sources — FP literals reach the
+ * value stack through load_const into OPK_REG. cg and opt's machinize/emit
* both rely on this contract to pass small constants through without
* burning a value-stack register on materialization. */
void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/,
diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h
@@ -228,6 +228,9 @@ static inline u32 rv_fdiv(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
static inline u32 rv_fsgnj(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x0, rd, RV_OP_FP);
}
+static inline u32 rv_fsgnjn(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
+ return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x1, rd, RV_OP_FP);
+}
/* FCVT — integer/FP conversions. funct7 = 0x18..0x1d depending on direction;
* rs2 encodes the partner type:
* 0x60(W <- S) 0x61(W <- D)
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -628,8 +628,18 @@ static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
+ if (op == UO_FNEG) {
+ if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64 unop: FP neg requires FP REG operand");
+ }
+ u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
+ rv64_emit32(mc, rv_fsgnjn(fmt, rd, reg_num(a_op), reg_num(a_op)));
+ return;
+ }
+
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
/* IMM operand is legal per the CGTarget contract (arch.h); materialize
* into t0 when not already a register. cg folds literal unops upstream
* via cg_fold_unop. */
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -704,8 +704,43 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
- int w = type_is_64(dst.type) ? 1 : 0;
u32 rd = dst.v.reg & 0xFu;
+ if (op == UO_FNEG) {
+ u8 mask_bytes[8];
+ ConstBytes cb;
+ Operand mask;
+ u32 ra;
+ if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 unop: FP neg requires FP REG operand");
+ }
+ ra = a_op.v.reg & 0xFu;
+ if (rd != ra) emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0xF2 : 0xF3,
+ 0x10, rd, ra);
+ memset(mask_bytes, 0, sizeof mask_bytes);
+ if (type_is_fp_double(dst.type)) {
+ mask_bytes[7] = 0x80u;
+ cb.size = 8;
+ cb.align = 8;
+ } else {
+ mask_bytes[3] = 0x80u;
+ cb.size = 4;
+ cb.align = 4;
+ }
+ cb.type = dst.type;
+ cb.bytes = mask_bytes;
+ memset(&mask, 0, sizeof mask);
+ mask.kind = OPK_REG;
+ mask.cls = RC_FP;
+ mask.type = dst.type;
+ mask.v.reg = X64_XMM15;
+ x_load_const(t, mask, cb);
+ emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0x66 : 0, 0x57, rd,
+ X64_XMM15);
+ return;
+ }
+
+ int w = type_is_64(dst.type) ? 1 : 0;
/* IMM operand is legal per the CGTarget contract (arch.h); materialize
* into a scratch register when not already a register. cg folds
* literal unops upstream (cg_fold_unop), so this path is reached only
diff --git a/src/cg/arith.c b/src/cg/arith.c
@@ -120,6 +120,20 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) {
}
}
+ if (iop == UO_FNEG) {
+ if (api_type_class(ty) != RC_FP) {
+ compiler_panic(g->c, g->cur_loc,
+ "CfreeCg: FP negation requires floating operand");
+ }
+ ra = api_force_reg(g, &a, ty);
+ rr = api_alloc_reg_or_spill(g, RC_FP, ty);
+ dst = api_op_reg(rr, ty);
+ T->unop(T, iop, dst, ra);
+ api_release(g, &a);
+ api_push(g, api_make_sv(dst, ty));
+ return;
+ }
+
if (!flags && api_sv_op_is(&a, OPK_IMM) &&
api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) {
api_release(g, &a);
@@ -535,19 +549,16 @@ void cfree_cg_fp_binop(CfreeCg* g, CfreeCgFpBinOp op, uint32_t flags) {
void cfree_cg_fp_unop(CfreeCg* g, CfreeCgFpUnOp op, uint32_t flags) {
(void)flags;
- (void)op;
+ if (!g) return;
+ if (op != CFREE_CG_FP_NEG) {
+ compiler_panic(g->c, g->cur_loc, "CfreeCg: FP unary op unsupported");
+ }
if (api_f128_stack_top(g, 0)) {
CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128);
- CfreeCgTypeId ps[2];
- ApiSValue args[2];
- args[1] = api_pop(g);
- args[0] = api_make_f128_const(g, 0.0, f128);
- ps[0] = f128;
- ps[1] = f128;
- api_runtime_call_values(g, "__subtf3", f128, ps, 2, args);
+ api_f128_call_unary(g, "__negtf2", f128, f128);
return;
}
- api_cg_unop(g, UO_NEG, 0);
+ api_cg_unop(g, UO_FNEG, 0);
}
void cfree_cg_fp_cmp(CfreeCg* g, CfreeCgFpCmpOp op) {
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -193,6 +193,7 @@ here for completeness once they're real cases.
| `6_5_56_compound_literal_struct` | ★ | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 |
| `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 |
| `6_5_58_large_integer_immediates` | ★ | 64-bit boundary integer literals including `INT64_MAX`, top-bit unsigned, and all-ones materialize correctly | 42 |
+| `6_5_71_fp_unary_neg_zero` | ★ | `-0.0f`, `-0.0`, and unary `-` on float zero preserve the negative sign bit | 0 |
| `6_5_65_file_scope_compound_literal` | RED | `static int *p = (int[]){42}; return p[0];` — file-scope compound literal has static storage duration | 42 |
| `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 |
| `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 |
diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.c b/test/parse/cases/6_5_03_large_indirect_addr_of.c
@@ -0,0 +1,13 @@
+struct Big {
+ unsigned char pad[18080];
+ int value;
+};
+
+int test_main(void) {
+ struct Big s;
+ struct Big *p = &s;
+ int *q;
+ p->value = 37;
+ q = &p->value;
+ return *q;
+}
diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.expected b/test/parse/cases/6_5_03_large_indirect_addr_of.expected
@@ -0,0 +1 @@
+37
diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c
@@ -0,0 +1,10 @@
+struct record {
+ double lat;
+ double lon;
+};
+
+int test_main(void) {
+ struct record r = {37.0, -122.0};
+ double x = 122.0;
+ return (r.lon == -122.0 && -x == -122.0) ? 0 : 1;
+}
diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected
@@ -0,0 +1 @@
+0
diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.c b/test/parse/cases/6_5_71_fp_unary_neg_zero.c
@@ -0,0 +1,12 @@
+int test_main(void) {
+ float f = -0.0f;
+ double d = -0.0;
+ float zf = 0.0f;
+ double zd = 0.0;
+
+ if (!((1.0f / f) < 0.0f)) return 1;
+ if (!((1.0 / d) < 0.0)) return 2;
+ if (!((1.0f / -zf) < 0.0f)) return 3;
+ if (!((1.0 / -zd) < 0.0)) return 4;
+ return 0;
+}
diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.expected b/test/parse/cases/6_5_71_fp_unary_neg_zero.expected
@@ -0,0 +1 @@
+0
diff --git a/test/parse/cases/call_large_const_global_struct_byval.c b/test/parse/cases/call_large_const_global_struct_byval.c
@@ -0,0 +1,20 @@
+struct S {
+ void *next;
+ void *prev;
+ void *child;
+ int type;
+ void *valuestring;
+ int valueint;
+ double valuedouble;
+ void *string;
+};
+
+static int read_struct(struct S v) {
+ return (v.type == 0 && v.valuestring == 0 && v.valuedouble == 0.0) ? 0 : 1;
+}
+
+static const struct S invalid = {0, 0, 0, 0, 0, 0, 0.0, 0};
+
+int test_main(void) {
+ return read_struct(invalid);
+}
diff --git a/test/parse/cases/call_large_const_global_struct_byval.expected b/test/parse/cases/call_large_const_global_struct_byval.expected
@@ -0,0 +1 @@
+0