commit 1b065cf837d4d5833314562416492fa11d90b786
parent b4d06b0e6b23ba1c8e9beb2984cd60d3a2b9b0ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 27 May 2026 12:24:31 -0700
aa64: pass/return long double (fp128) in full 128-bit q register
aa_part_scalar_type() classified any FP ABI part larger than 4 bytes as
F64, so a 16-byte fp128 (long double on aarch64-linux) was moved through
a 64-bit d register on the return path: the backend emitted `ldur d0`
instead of `ldur q0`, truncating the result to its low 64 bits. For most
values that low half is zero, so long double arithmetic returned 0.
Map size>8 FP parts to F128 so the 128-bit q-register move path is taken.
This affects both user code and the self-compiled runtime (fp_tf/fp_ti),
whose __addtf3/__extenddftf2/etc. carried the same truncation.
Also make the rt object/archive rules depend on $(BIN) as a regular (not
order-only) prerequisite: cfree compiles and archives its own runtime, so
a codegen change in the compiler must rebuild the rt. The archive lists
$(RT_OBJS) explicitly rather than $^ so the now-regular cfree binary
prereq is not itself archived.
Diffstat:
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/rt/Makefile b/rt/Makefile
@@ -242,20 +242,24 @@ RT_OBJS_$(1) := $$(patsubst rt/lib/%,$$(RT_BUILD_DIR)/$(1)/%.o,$$(RT_SRCS_$(1)))
rt-$(1): $$(RT_BUILD_DIR)/$(1)/libcfree_rt.a
-$$(RT_BUILD_DIR)/$(1)/libcfree_rt.a: $$(RT_OBJS_$(1)) | $$(BIN)
+# Regular (not order-only) dep on $(BIN): cfree compiles and archives its own
+# runtime, so a codegen or `ar` change in the compiler must rebuild the rt.
+# The archive lists $(RT_OBJS) explicitly rather than $^ so the cfree binary
+# (now a regular prereq) is not itself archived.
+$$(RT_BUILD_DIR)/$(1)/libcfree_rt.a: $$(RT_OBJS_$(1)) $$(BIN)
@mkdir -p $$(dir $$@)
@rm -f $$@
- $$(RT_AR) rcs $$@ $$^
+ $$(RT_AR) rcs $$@ $$(RT_OBJS_$(1))
-$$(RT_BUILD_DIR)/$(1)/%.s.o: rt/lib/%.s | $$(BIN)
+$$(RT_BUILD_DIR)/$(1)/%.s.o: rt/lib/%.s $$(BIN)
@mkdir -p $$(dir $$@)
$$(RT_AS) $$(RT_ASFLAGS_$(1)) $$(RT_AS_COMPILE_FLAGS) $$< -o $$@
-$$(RT_BUILD_DIR)/$(1)/%.S.o: rt/lib/%.S | $$(BIN)
+$$(RT_BUILD_DIR)/$(1)/%.S.o: rt/lib/%.S $$(BIN)
@mkdir -p $$(dir $$@)
$$(RT_AS) $$(RT_ASFLAGS_$(1)) $$(RT_AS_COMPILE_FLAGS) $$< -o $$@
-$$(RT_BUILD_DIR)/$(1)/%.o: rt/lib/% | $$(BIN)
+$$(RT_BUILD_DIR)/$(1)/%.o: rt/lib/% $$(BIN)
@mkdir -p $$(dir $$@)
$$(RT_CC) $$(RT_CFLAGS_$(1)) -c $$< -o $$@
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -1800,9 +1800,11 @@ static u32 aa_part_stack_align(const ABIArgPart* part) {
* args/results are split into parts; each part must move at its own width, not
* the (possibly >8-byte) aggregate width. */
static CfreeCgTypeId aa_part_scalar_type(const ABIArgPart* part) {
- if (part->cls == ABI_CLASS_FP)
- return part->size <= 4u ? builtin_id(CFREE_CG_BUILTIN_F32)
- : builtin_id(CFREE_CG_BUILTIN_F64);
+ if (part->cls == ABI_CLASS_FP) {
+ if (part->size <= 4u) return builtin_id(CFREE_CG_BUILTIN_F32);
+ if (part->size <= 8u) return builtin_id(CFREE_CG_BUILTIN_F64);
+ return builtin_id(CFREE_CG_BUILTIN_F128);
+ }
switch (part->size) {
case 1u:
return builtin_id(CFREE_CG_BUILTIN_I8);