kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4e8b7ce43241e6eab39044bd54f6eaaa72b02b8e
parent 12b4f3c5b47994a7fc4a4b85d587295914c148d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 17:36:28 -0700

Fix FP negation and address codegen

Diffstat:
Mrt/lib/fp_tf/fp_tf.c | 2++
Msrc/arch/aa64/internal.h | 4++++
Msrc/arch/aa64/ops.c | 30+++++++++++++++++-------------
Msrc/arch/arch.h | 5+++--
Msrc/arch/rv64/isa.h | 3+++
Msrc/arch/rv64/ops.c | 12+++++++++++-
Msrc/arch/x64/ops.c | 37++++++++++++++++++++++++++++++++++++-
Msrc/cg/arith.c | 29++++++++++++++++++++---------
Mtest/parse/CORPUS.md | 1+
Atest/parse/cases/6_5_03_large_indirect_addr_of.c | 13+++++++++++++
Atest/parse/cases/6_5_03_large_indirect_addr_of.expected | 1+
Atest/parse/cases/6_5_70_fp_unary_neg_struct_field.c | 10++++++++++
Atest/parse/cases/6_5_70_fp_unary_neg_struct_field.expected | 1+
Atest/parse/cases/6_5_71_fp_unary_neg_zero.c | 12++++++++++++
Atest/parse/cases/6_5_71_fp_unary_neg_zero.expected | 1+
Atest/parse/cases/call_large_const_global_struct_byval.c | 20++++++++++++++++++++
Atest/parse/cases/call_large_const_global_struct_byval.expected | 1+
17 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/rt/lib/fp_tf/fp_tf.c b/rt/lib/fp_tf/fp_tf.c @@ -75,6 +75,8 @@ COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) { return __addtf3(a, fromRep(toRep(b) ^ signBit)); } +COMPILER_RT_ABI fp_t __negtf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + // ---- multf3.c ---- #define QUAD_PRECISION #include "fp_lib.h" diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h @@ -205,6 +205,10 @@ static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) { return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); } +static inline u32 aa64_fneg(u32 type, u32 Rd, u32 Rn) { + return 0x1E214000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} static inline u32 aa64_fcmp(u32 type, u32 Rn, u32 Rm) { return 0x1E202000u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) | diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -287,22 +287,13 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) { if (lv.kind == OPK_LOCAL) { AASlot* s = aa64_slot_get(a, lv.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot"); - aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0)); + aa64_emit_addr_adjust(t->mc, reg_num(dst), 29, -(i32)s->off); return; } if (lv.kind == OPK_INDIRECT) { i32 ofs = lv.v.ind.ofs; u32 base = lv.v.ind.base & 0x1f; - if (ofs == 0) { - aa64_emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base)); - } else if (ofs > 0 && ofs <= 0xfff) { - aa64_emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0)); - } else if (ofs < 0 && -ofs <= 0xfff) { - aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0)); - } else { - compiler_panic(t->c, a->loc, - "aarch64 addr_of: indirect offset %d unsupported", ofs); - } + aa64_emit_addr_adjust(t->mc, reg_num(dst), base, ofs); return; } if (lv.kind == OPK_GLOBAL) { @@ -700,11 +691,21 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; - u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); - u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0); u32 word; + if (op == UO_FNEG) { + if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 unop: FP neg requires FP REG operand"); + } + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + aa64_emit32(mc, aa64_fneg(type, rd, reg_num(a_op))); + return; + } + + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0); switch (op) { case UO_NEG: word = aa64_neg(sf, rd, rn); @@ -915,6 +916,9 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, } else if (av->storage.kind == OPK_INDIRECT) { aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f, av->storage.v.ind.ofs); + } else if (av->storage.kind == OPK_GLOBAL) { + emit_global_addr(t, dst_reg, av->storage.v.global.sym, + av->storage.v.global.addend); } else { compiler_panic(t->c, a->loc, "aarch64 call: INDIRECT arg storage kind %d unsupported", diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -56,6 +56,7 @@ typedef enum BinOp { typedef enum UnOp { UO_NEG, + UO_FNEG, UO_NOT, /* logical: 0/1 */ UO_BNOT, /* bitwise ~ */ } UnOp; @@ -731,8 +732,8 @@ struct CGTarget { * (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an * imm-form encoding and materializing the literal into a scratch * register based on whether the value fits the instruction's imm - * field. FP ops require REG sources — FP literals reach the value - * stack through load_const into OPK_REG. cg and opt's machinize/emit + * field. FP binops and UO_FNEG require REG sources — FP literals reach the + * value stack through load_const into OPK_REG. cg and opt's machinize/emit * both rely on this contract to pass small constants through without * burning a value-stack register on materialization. */ void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h @@ -228,6 +228,9 @@ static inline u32 rv_fdiv(u32 fmt, u32 rd, u32 rs1, u32 rs2) { static inline u32 rv_fsgnj(u32 fmt, u32 rd, u32 rs1, u32 rs2) { return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x0, rd, RV_OP_FP); } +static inline u32 rv_fsgnjn(u32 fmt, u32 rd, u32 rs1, u32 rs2) { + return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x1, rd, RV_OP_FP); +} /* FCVT — integer/FP conversions. funct7 = 0x18..0x1d depending on direction; * rs2 encodes the partner type: * 0x60(W <- S) 0x61(W <- D) diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -628,8 +628,18 @@ static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; - u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); + if (op == UO_FNEG) { + if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) { + compiler_panic(t->c, impl_of(t)->loc, + "rv64 unop: FP neg requires FP REG operand"); + } + u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S; + rv64_emit32(mc, rv_fsgnjn(fmt, rd, reg_num(a_op), reg_num(a_op))); + return; + } + + u32 sf = type_is_64(dst.type) ? 1u : 0u; /* IMM operand is legal per the CGTarget contract (arch.h); materialize * into t0 when not already a register. cg folds literal unops upstream * via cg_fold_unop. */ diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -704,8 +704,43 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; - int w = type_is_64(dst.type) ? 1 : 0; u32 rd = dst.v.reg & 0xFu; + if (op == UO_FNEG) { + u8 mask_bytes[8]; + ConstBytes cb; + Operand mask; + u32 ra; + if (dst.cls != RC_FP || a_op.kind != OPK_REG || a_op.cls != RC_FP) { + compiler_panic(t->c, impl_of(t)->loc, + "x64 unop: FP neg requires FP REG operand"); + } + ra = a_op.v.reg & 0xFu; + if (rd != ra) emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0xF2 : 0xF3, + 0x10, rd, ra); + memset(mask_bytes, 0, sizeof mask_bytes); + if (type_is_fp_double(dst.type)) { + mask_bytes[7] = 0x80u; + cb.size = 8; + cb.align = 8; + } else { + mask_bytes[3] = 0x80u; + cb.size = 4; + cb.align = 4; + } + cb.type = dst.type; + cb.bytes = mask_bytes; + memset(&mask, 0, sizeof mask); + mask.kind = OPK_REG; + mask.cls = RC_FP; + mask.type = dst.type; + mask.v.reg = X64_XMM15; + x_load_const(t, mask, cb); + emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0x66 : 0, 0x57, rd, + X64_XMM15); + return; + } + + int w = type_is_64(dst.type) ? 1 : 0; /* IMM operand is legal per the CGTarget contract (arch.h); materialize * into a scratch register when not already a register. cg folds * literal unops upstream (cg_fold_unop), so this path is reached only diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -120,6 +120,20 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { } } + if (iop == UO_FNEG) { + if (api_type_class(ty) != RC_FP) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: FP negation requires floating operand"); + } + ra = api_force_reg(g, &a, ty); + rr = api_alloc_reg_or_spill(g, RC_FP, ty); + dst = api_op_reg(rr, ty); + T->unop(T, iop, dst, ra); + api_release(g, &a); + api_push(g, api_make_sv(dst, ty)); + return; + } + if (!flags && api_sv_op_is(&a, OPK_IMM) && api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) { api_release(g, &a); @@ -535,19 +549,16 @@ void cfree_cg_fp_binop(CfreeCg* g, CfreeCgFpBinOp op, uint32_t flags) { void cfree_cg_fp_unop(CfreeCg* g, CfreeCgFpUnOp op, uint32_t flags) { (void)flags; - (void)op; + if (!g) return; + if (op != CFREE_CG_FP_NEG) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: FP unary op unsupported"); + } if (api_f128_stack_top(g, 0)) { CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - args[1] = api_pop(g); - args[0] = api_make_f128_const(g, 0.0, f128); - ps[0] = f128; - ps[1] = f128; - api_runtime_call_values(g, "__subtf3", f128, ps, 2, args); + api_f128_call_unary(g, "__negtf2", f128, f128); return; } - api_cg_unop(g, UO_NEG, 0); + api_cg_unop(g, UO_FNEG, 0); } void cfree_cg_fp_cmp(CfreeCg* g, CfreeCgFpCmpOp op) { diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -193,6 +193,7 @@ here for completeness once they're real cases. | `6_5_56_compound_literal_struct` | ★ | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 | | `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 | | `6_5_58_large_integer_immediates` | ★ | 64-bit boundary integer literals including `INT64_MAX`, top-bit unsigned, and all-ones materialize correctly | 42 | +| `6_5_71_fp_unary_neg_zero` | ★ | `-0.0f`, `-0.0`, and unary `-` on float zero preserve the negative sign bit | 0 | | `6_5_65_file_scope_compound_literal` | RED | `static int *p = (int[]){42}; return p[0];` — file-scope compound literal has static storage duration | 42 | | `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 | | `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 | diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.c b/test/parse/cases/6_5_03_large_indirect_addr_of.c @@ -0,0 +1,13 @@ +struct Big { + unsigned char pad[18080]; + int value; +}; + +int test_main(void) { + struct Big s; + struct Big *p = &s; + int *q; + p->value = 37; + q = &p->value; + return *q; +} diff --git a/test/parse/cases/6_5_03_large_indirect_addr_of.expected b/test/parse/cases/6_5_03_large_indirect_addr_of.expected @@ -0,0 +1 @@ +37 diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.c @@ -0,0 +1,10 @@ +struct record { + double lat; + double lon; +}; + +int test_main(void) { + struct record r = {37.0, -122.0}; + double x = 122.0; + return (r.lon == -122.0 && -x == -122.0) ? 0 : 1; +} diff --git a/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected b/test/parse/cases/6_5_70_fp_unary_neg_struct_field.expected @@ -0,0 +1 @@ +0 diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.c b/test/parse/cases/6_5_71_fp_unary_neg_zero.c @@ -0,0 +1,12 @@ +int test_main(void) { + float f = -0.0f; + double d = -0.0; + float zf = 0.0f; + double zd = 0.0; + + if (!((1.0f / f) < 0.0f)) return 1; + if (!((1.0 / d) < 0.0)) return 2; + if (!((1.0f / -zf) < 0.0f)) return 3; + if (!((1.0 / -zd) < 0.0)) return 4; + return 0; +} diff --git a/test/parse/cases/6_5_71_fp_unary_neg_zero.expected b/test/parse/cases/6_5_71_fp_unary_neg_zero.expected @@ -0,0 +1 @@ +0 diff --git a/test/parse/cases/call_large_const_global_struct_byval.c b/test/parse/cases/call_large_const_global_struct_byval.c @@ -0,0 +1,20 @@ +struct S { + void *next; + void *prev; + void *child; + int type; + void *valuestring; + int valueint; + double valuedouble; + void *string; +}; + +static int read_struct(struct S v) { + return (v.type == 0 && v.valuestring == 0 && v.valuedouble == 0.0) ? 0 : 1; +} + +static const struct S invalid = {0, 0, 0, 0, 0, 0, 0.0, 0}; + +int test_main(void) { + return read_struct(invalid); +} diff --git a/test/parse/cases/call_large_const_global_struct_byval.expected b/test/parse/cases/call_large_const_global_struct_byval.expected @@ -0,0 +1 @@ +0