kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 1b065cf837d4d5833314562416492fa11d90b786
parent b4d06b0e6b23ba1c8e9beb2984cd60d3a2b9b0ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 12:24:31 -0700

aa64: pass/return long double (fp128) in full 128-bit q register

aa_part_scalar_type() classified any FP ABI part larger than 4 bytes as
F64, so a 16-byte fp128 (long double on aarch64-linux) was moved through
a 64-bit d register on the return path: the backend emitted `ldur d0`
instead of `ldur q0`, truncating the result to its low 64 bits. For most
values that low half is zero, so long double arithmetic returned 0.

Map size>8 FP parts to F128 so the 128-bit q-register move path is taken.
This affects both user code and the self-compiled runtime (fp_tf/fp_ti),
whose __addtf3/__extenddftf2/etc. carried the same truncation.

Also make the rt object/archive rules depend on $(BIN) as a regular (not
order-only) prerequisite: cfree compiles and archives its own runtime, so
a codegen change in the compiler must rebuild the rt. The archive lists
$(RT_OBJS) explicitly rather than $^ so the now-regular cfree binary
prereq is not itself archived.

Diffstat:
Mrt/Makefile | 14+++++++++-----
Msrc/arch/aa64/native.c | 8+++++---
2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/rt/Makefile b/rt/Makefile @@ -242,20 +242,24 @@ RT_OBJS_$(1) := $$(patsubst rt/lib/%,$$(RT_BUILD_DIR)/$(1)/%.o,$$(RT_SRCS_$(1))) rt-$(1): $$(RT_BUILD_DIR)/$(1)/libcfree_rt.a -$$(RT_BUILD_DIR)/$(1)/libcfree_rt.a: $$(RT_OBJS_$(1)) | $$(BIN) +# Regular (not order-only) dep on $(BIN): cfree compiles and archives its own +# runtime, so a codegen or `ar` change in the compiler must rebuild the rt. +# The archive lists $(RT_OBJS) explicitly rather than $^ so the cfree binary +# (now a regular prereq) is not itself archived. +$$(RT_BUILD_DIR)/$(1)/libcfree_rt.a: $$(RT_OBJS_$(1)) $$(BIN) @mkdir -p $$(dir $$@) @rm -f $$@ - $$(RT_AR) rcs $$@ $$^ + $$(RT_AR) rcs $$@ $$(RT_OBJS_$(1)) -$$(RT_BUILD_DIR)/$(1)/%.s.o: rt/lib/%.s | $$(BIN) +$$(RT_BUILD_DIR)/$(1)/%.s.o: rt/lib/%.s $$(BIN) @mkdir -p $$(dir $$@) $$(RT_AS) $$(RT_ASFLAGS_$(1)) $$(RT_AS_COMPILE_FLAGS) $$< -o $$@ -$$(RT_BUILD_DIR)/$(1)/%.S.o: rt/lib/%.S | $$(BIN) +$$(RT_BUILD_DIR)/$(1)/%.S.o: rt/lib/%.S $$(BIN) @mkdir -p $$(dir $$@) $$(RT_AS) $$(RT_ASFLAGS_$(1)) $$(RT_AS_COMPILE_FLAGS) $$< -o $$@ -$$(RT_BUILD_DIR)/$(1)/%.o: rt/lib/% | $$(BIN) +$$(RT_BUILD_DIR)/$(1)/%.o: rt/lib/% $$(BIN) @mkdir -p $$(dir $$@) $$(RT_CC) $$(RT_CFLAGS_$(1)) -c $$< -o $$@ diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -1800,9 +1800,11 @@ static u32 aa_part_stack_align(const ABIArgPart* part) { * args/results are split into parts; each part must move at its own width, not * the (possibly >8-byte) aggregate width. */ static CfreeCgTypeId aa_part_scalar_type(const ABIArgPart* part) { - if (part->cls == ABI_CLASS_FP) - return part->size <= 4u ? builtin_id(CFREE_CG_BUILTIN_F32) - : builtin_id(CFREE_CG_BUILTIN_F64); + if (part->cls == ABI_CLASS_FP) { + if (part->size <= 4u) return builtin_id(CFREE_CG_BUILTIN_F32); + if (part->size <= 8u) return builtin_id(CFREE_CG_BUILTIN_F64); + return builtin_id(CFREE_CG_BUILTIN_F128); + } switch (part->size) { case 1u: return builtin_id(CFREE_CG_BUILTIN_I8);