Fix FP negation and address codegen - kit

commit 4e8b7ce43241e6eab39044bd54f6eaaa72b02b8e
parent 12b4f3c5b47994a7fc4a4b85d587295914c148d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 17:36:28 -0700

Fix FP negation and address codegen

Diffstat:
M rt/lib/fp_tf/fp_tf.c  | 2 ++
M src/arch/aa64/internal.h  | 4 ++++
M src/arch/aa64/ops.c  | 30 +++++++++++++++++-------------
M src/arch/arch.h  | 5 +++--
M src/arch/rv64/isa.h  | 3 +++
M src/arch/rv64/ops.c  | 12 +++++++++++-
M src/arch/x64/ops.c  | 37 ++++++++++++++++++++++++++++++++++++-
M src/cg/arith.c  | 29 ++++++++++++++++++++---------
M test/parse/CORPUS.md  | 1 +
A test/parse/cases/6_5_03_large_indirect_addr_of.c  | 13 +++++++++++++
A test/parse/cases/6_5_03_large_indirect_addr_of.expected  | 1 +
A test/parse/cases/6_5_70_fp_unary_neg_struct_field.c  | 10 ++++++++++
A test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected  | 1 +
A test/parse/cases/6_5_71_fp_unary_neg_zero.c  | 12 ++++++++++++
A test/parse/cases/6_5_71_fp_unary_neg_zero.expected  | 1 +
A test/parse/cases/call_large_const_global_struct_byval.c  | 20 ++++++++++++++++++++
A test/parse/cases/call_large_const_global_struct_byval.expected  | 1 +

17 files changed, 156 insertions(+), 26 deletions(-)
diff --git a/rt/lib/fp_tf/fp_tf.c b/rt/lib/fp_tf/fp_tf.c
@@ -75,6 +75,8 @@ COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
   return __addtf3(a, fromRep(toRep(b) ^ signBit));
 }
 
+COMPILER_RT_ABI fp_t __negtf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
 // ---- multf3.c ----
 #define QUAD_PRECISION
 #include "fp_lib.h"
diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h
@@ -205,6 +205,10 @@ static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) {
   return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
          ((Rn & 0x1f) << 5) | (Rd & 0x1f);
 }
+static inline u32 aa64_fneg(u32 type, u32 Rd, u32 Rn) {
+  return 0x1E214000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+         (Rd & 0x1f);
+}
 
 static inline u32 aa64_fcmp(u32 type, u32 Rn, u32 Rm) {
   return 0x1E202000u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -287,22 +287,13 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
   if (lv.kind == OPK_LOCAL) {
     AASlot* s = aa64_slot_get(a, lv.v.frame_slot);
     if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot");
-    aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0));
+    aa64_emit_addr_adjust(t->mc, reg_num(dst), 29, -(i32)s->off);
     return;
   }
   if (lv.kind == OPK_INDIRECT) {
     i32 ofs = lv.v.ind.ofs;
     u32 base = lv.v.ind.base & 0x1f;
-    if (ofs == 0) {
-      aa64_emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base));
-    } else if (ofs > 0 && ofs <= 0xfff) {
-      aa64_emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0));
-    } else if (ofs < 0 && -ofs <= 0xfff) {
-      aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0));
-    } else {
-      compiler_panic(t->c, a->loc,
-                     "aarch64 addr_of: indirect offset %d unsupported", ofs);
-    }
+    aa64_emit_addr_adjust(t->mc, reg_num(dst), base, ofs);
     return;
   }
   if (lv.kind == OPK_GLOBAL) {
@@ -700,11 +691,21 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
 
 static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
   MCEmitter* mc = t->mc;
-  u32 sf = type_is_64(dst.type) ? 1u : 0u;
   u32 rd = reg_num(dst);
-  u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
   u32 word;
 
+  if (op == UO_FNEG) {
+    if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+      compiler_panic(t->c, impl_of(t)->loc,
+                     "aarch64 unop: FP neg requires FP REG operand");
+    }
+    u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+    aa64_emit32(mc, aa64_fneg(type, rd, reg_num(a_op)));
+    return;
+  }
+
+  u32 sf = type_is_64(dst.type) ? 1u : 0u;
+  u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
   switch (op) {
     case UO_NEG:
       word = aa64_neg(sf, rd, rn);
@@ -915,6 +916,9 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
     } else if (av->storage.kind == OPK_INDIRECT) {
       aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f,
                        av->storage.v.ind.ofs);
+    } else if (av->storage.kind == OPK_GLOBAL) {
+      emit_global_addr(t, dst_reg, av->storage.v.global.sym,
+                       av->storage.v.global.addend);
     } else {
       compiler_panic(t->c, a->loc,
                      "aarch64 call: INDIRECT arg storage kind %d unsupported",
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -56,6 +56,7 @@ typedef enum BinOp {
 
 typedef enum UnOp {
   UO_NEG,
+  UO_FNEG,
   UO_NOT,  /* logical: 0/1 */
   UO_BNOT, /* bitwise ~  */
 } UnOp;
@@ -731,8 +732,8 @@ struct CGTarget {
    * (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an
    * imm-form encoding and materializing the literal into a scratch
    * register based on whether the value fits the instruction's imm
-   * field. FP ops require REG sources — FP literals reach the value
-   * stack through load_const into OPK_REG. cg and opt's machinize/emit
+   * field. FP binops and UO_FNEG require REG sources — FP literals reach the
+   * value stack through load_const into OPK_REG. cg and opt's machinize/emit
    * both rely on this contract to pass small constants through without
    * burning a value-stack register on materialization. */
   void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/,
diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h
@@ -228,6 +228,9 @@ static inline u32 rv_fdiv(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
 static inline u32 rv_fsgnj(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
   return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x0, rd, RV_OP_FP);
 }
+static inline u32 rv_fsgnjn(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
+  return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x1, rd, RV_OP_FP);
+}
 /* FCVT — integer/FP conversions. funct7 = 0x18..0x1d depending on direction;
  * rs2 encodes the partner type:
  *   0x60(W <- S)  0x61(W <- D)
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -628,8 +628,18 @@ static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
 
 static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
   MCEmitter* mc = t->mc;
-  u32 sf = type_is_64(dst.type) ? 1u : 0u;
   u32 rd = reg_num(dst);
+  if (op == UO_FNEG) {
+    if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+      compiler_panic(t->c, impl_of(t)->loc,
+                     "rv64 unop: FP neg requires FP REG operand");
+    }
+    u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
+    rv64_emit32(mc, rv_fsgnjn(fmt, rd, reg_num(a_op), reg_num(a_op)));
+    return;
+  }
+
+  u32 sf = type_is_64(dst.type) ? 1u : 0u;
   /* IMM operand is legal per the CGTarget contract (arch.h); materialize
    * into t0 when not already a register. cg folds literal unops upstream
    * via cg_fold_unop. */
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -704,8 +704,43 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
 
 static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
   MCEmitter* mc = t->mc;
-  int w = type_is_64(dst.type) ? 1 : 0;
   u32 rd = dst.v.reg & 0xFu;
+  if (op == UO_FNEG) {
+    u8 mask_bytes[8];
+    ConstBytes cb;
+    Operand mask;
+    u32 ra;
+    if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) {
+      compiler_panic(t->c, impl_of(t)->loc,
+                     "x64 unop: FP neg requires FP REG operand");
+    }
+    ra = a_op.v.reg & 0xFu;
+    if (rd != ra) emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0xF2 : 0xF3,
+                              0x10, rd, ra);
+    memset(mask_bytes, 0, sizeof mask_bytes);
+    if (type_is_fp_double(dst.type)) {
+      mask_bytes[7] = 0x80u;
+      cb.size = 8;
+      cb.align = 8;
+    } else {
+      mask_bytes[3] = 0x80u;
+      cb.size = 4;
+      cb.align = 4;
+    }
+    cb.type = dst.type;
+    cb.bytes = mask_bytes;
+    memset(&mask, 0, sizeof mask);
+    mask.kind = OPK_REG;
+    mask.cls = RC_FP;
+    mask.type = dst.type;
+    mask.v.reg = X64_XMM15;
+    x_load_const(t, mask, cb);
+    emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0x66 : 0, 0x57, rd,
+                X64_XMM15);
+    return;
+  }
+
+  int w = type_is_64(dst.type) ? 1 : 0;
   /* IMM operand is legal per the CGTarget contract (arch.h); materialize
    * into a scratch register when not already a register. cg folds
    * literal unops upstream (cg_fold_unop), so this path is reached only
diff --git a/src/cg/arith.c b/src/cg/arith.c
@@ -120,6 +120,20 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) {
     }
   }
 
+  if (iop == UO_FNEG) {
+    if (api_type_class(ty) != RC_FP) {
+      compiler_panic(g->c, g->cur_loc,
+                     "CfreeCg: FP negation requires floating operand");
+    }
+    ra = api_force_reg(g, &a, ty);
+    rr = api_alloc_reg_or_spill(g, RC_FP, ty);
+    dst = api_op_reg(rr, ty);
+    T->unop(T, iop, dst, ra);
+    api_release(g, &a);
+    api_push(g, api_make_sv(dst, ty));
+    return;
+  }
+
   if (!flags && api_sv_op_is(&a, OPK_IMM) &&
       api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) {
     api_release(g, &a);
@@ -535,19 +549,16 @@ void cfree_cg_fp_binop(CfreeCg* g, CfreeCgFpBinOp op, uint32_t flags) {
 
 void cfree_cg_fp_unop(CfreeCg* g, CfreeCgFpUnOp op, uint32_t flags) {
   (void)flags;
-  (void)op;
+  if (!g) return;
+  if (op != CFREE_CG_FP_NEG) {
+    compiler_panic(g->c, g->cur_loc, "CfreeCg: FP unary op unsupported");
+  }
   if (api_f128_stack_top(g, 0)) {
     CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128);
-    CfreeCgTypeId ps[2];
-    ApiSValue args[2];
-    args[1] = api_pop(g);
-    args[0] = api_make_f128_const(g, 0.0, f128);
-    ps[0] = f128;
-    ps[1] = f128;
-    api_runtime_call_values(g, "__subtf3", f128, ps, 2, args);
+    api_f128_call_unary(g, "__negtf2", f128, f128);
     return;
   }
-  api_cg_unop(g, UO_NEG, 0);
+  api_cg_unop(g, UO_FNEG, 0);
 }
 
 void cfree_cg_fp_cmp(CfreeCg* g, CfreeCgFpCmpOp op) {
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -193,6 +193,7 @@ here for completeness once they're real cases.
 | `6_5_56_compound_literal_struct` | ★ | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 |
 | `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 |
 | `6_5_58_large_integer_immediates` | ★ | 64-bit boundary integer literals including `INT64_MAX`, top-bit unsigned, and all-ones materialize correctly | 42 |
+| `6_5_71_fp_unary_neg_zero` | ★ | `-0.0f`, `-0.0`, and unary `-` on float zero preserve the negative sign bit | 0 |
 | `6_5_65_file_scope_compound_literal` | RED | `static int *p = (int[]){42}; return p[0];` — file-scope compound literal has static storage duration | 42 |
 | `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 |
 | `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 |
diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.c b/test/parse/cases/6_5_03_large_indirect_addr_of.c
@@ -0,0 +1,13 @@
+struct Big {
+  unsigned char pad[18080];
+  int value;
+};
+
+int test_main(void) {
+  struct Big s;
+  struct Big *p = &s;
+  int *q;
+  p->value = 37;
+  q = &p->value;
+  return *q;
+}
diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.expected b/test/parse/cases/6_5_03_large_indirect_addr_of.expected
@@ -0,0 +1 @@
+37
diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c
@@ -0,0 +1,10 @@
+struct record {
+  double lat;
+  double lon;
+};
+
+int test_main(void) {
+  struct record r = {37.0, -122.0};
+  double x = 122.0;
+  return (r.lon == -122.0 && -x == -122.0) ? 0 : 1;
+}
diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected
@@ -0,0 +1 @@
+0
diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.c b/test/parse/cases/6_5_71_fp_unary_neg_zero.c
@@ -0,0 +1,12 @@
+int test_main(void) {
+  float f = -0.0f;
+  double d = -0.0;
+  float zf = 0.0f;
+  double zd = 0.0;
+
+  if (!((1.0f / f) < 0.0f)) return 1;
+  if (!((1.0 / d) < 0.0)) return 2;
+  if (!((1.0f / -zf) < 0.0f)) return 3;
+  if (!((1.0 / -zd) < 0.0)) return 4;
+  return 0;
+}
diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.expected b/test/parse/cases/6_5_71_fp_unary_neg_zero.expected
@@ -0,0 +1 @@
+0
diff --git a/test/parse/cases/call_large_const_global_struct_byval.c b/test/parse/cases/call_large_const_global_struct_byval.c
@@ -0,0 +1,20 @@
+struct S {
+  void *next;
+  void *prev;
+  void *child;
+  int type;
+  void *valuestring;
+  int valueint;
+  double valuedouble;
+  void *string;
+};
+
+static int read_struct(struct S v) {
+  return (v.type == 0 && v.valuestring == 0 && v.valuedouble == 0.0) ? 0 : 1;
+}
+
+static const struct S invalid = {0, 0, 0, 0, 0, 0, 0.0, 0};
+
+int test_main(void) {
+  return read_struct(invalid);
+}
diff --git a/test/parse/cases/call_large_const_global_struct_byval.expected b/test/parse/cases/call_large_const_global_struct_byval.expected
@@ -0,0 +1 @@
+0

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	rt/lib/fp_tf/fp_tf.c	\|	2	++
M	src/arch/aa64/internal.h	\|	4	++++
M	src/arch/aa64/ops.c	\|	30	+++++++++++++++++-------------
M	src/arch/arch.h	\|	5	+++--
M	src/arch/rv64/isa.h	\|	3	+++
M	src/arch/rv64/ops.c	\|	12	+++++++++++-
M	src/arch/x64/ops.c	\|	37	++++++++++++++++++++++++++++++++++++-
M	src/cg/arith.c	\|	29	++++++++++++++++++++---------
M	test/parse/CORPUS.md	\|	1	+
A	test/parse/cases/6_5_03_large_indirect_addr_of.c	\|	13	+++++++++++++
A	test/parse/cases/6_5_03_large_indirect_addr_of.expected	\|	1	+
A	test/parse/cases/6_5_70_fp_unary_neg_struct_field.c	\|	10	++++++++++
A	test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected	\|	1	+
A	test/parse/cases/6_5_71_fp_unary_neg_zero.c	\|	12	++++++++++++
A	test/parse/cases/6_5_71_fp_unary_neg_zero.expected	\|	1	+
A	test/parse/cases/call_large_const_global_struct_byval.c	\|	20	++++++++++++++++++++
A	test/parse/cases/call_large_const_global_struct_byval.expected	\|	1	+