commit bf99dd7c5a89cf727659b2500b110dec54164794
parent f60a16d14658662018ec245649772ee7990d67ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 26 May 2026 17:53:47 -0700
aa64: convert native.c from CGTarget wrapper to NativeTarget implementation
Port the AA64 backend from the old opaque CGTarget wrapper to a direct
NativeTarget implementation, aligning it with the new opt pipeline intake.
Diffstat:
1 file changed, 288 insertions(+), 235 deletions(-)
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -27,14 +27,13 @@
* register conflicts, memory barriers, outputs for aggregates/FP values, and
* file-scope asm integration. */
-#include "arch/aa64/aa64.h"
-
#include <string.h>
+#include "abi/abi.h"
+#include "arch/aa64/aa64.h"
#include "arch/aa64/asm.h"
#include "arch/aa64/isa.h"
#include "arch/aa64/regs.h"
-#include "abi/abi.h"
#include "asm/asm.h"
#include "asm/asm_lex.h"
#include "cg/native_direct_target.h"
@@ -137,8 +136,9 @@ static u32 align_up_u32(u32 v, u32 align) {
static u32 type_size32(NativeTarget* t, CfreeCgTypeId type) {
u64 n = type ? cg_type_size(t->c, type) : 8u;
if (n == 0) n = 8u;
- if (n > 16u) compiler_panic(t->c, (SrcLoc){0, 0, 0},
- "aarch64 native target: scalar too large");
+ if (n > 16u)
+ compiler_panic(t->c, (SrcLoc){0, 0, 0},
+ "aarch64 native target: scalar too large");
return (u32)n;
}
@@ -306,8 +306,8 @@ static __attribute__((unused)) u32 aa_mrs_tpidr_el0(u32 rt) {
}
static u32 aa_fp_bin(u32 op, u32 is_double, u32 rd, u32 rn, u32 rm) {
- return (is_double ? 0x1e600000u : 0x1e200000u) | op |
- ((rm & 0x1fu) << 16) | ((rn & 0x1fu) << 5) | (rd & 0x1fu);
+ return (is_double ? 0x1e600000u : 0x1e200000u) | op | ((rm & 0x1fu) << 16) |
+ ((rn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_fcmp(u32 is_double, u32 rn, u32 rm) {
@@ -327,26 +327,22 @@ static u32 aa_fmov_fp(u32 is_double, u32 rd, u32 rn) {
static u32 aa_scvtf(u32 is_double_dst, u32 is64_src, u32 fd, u32 rn) {
return (is64_src ? 0x9e220000u : 0x1e220000u) |
- (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) |
- (fd & 0x1fu);
+ (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | (fd & 0x1fu);
}
static u32 aa_ucvtf(u32 is_double_dst, u32 is64_src, u32 fd, u32 rn) {
return (is64_src ? 0x9e230000u : 0x1e230000u) |
- (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) |
- (fd & 0x1fu);
+ (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | (fd & 0x1fu);
}
static u32 aa_fcvtzs(u32 is64_dst, u32 is_double_src, u32 rd, u32 fn) {
return (is64_dst ? 0x9e380000u : 0x1e380000u) |
- (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) |
- (rd & 0x1fu);
+ (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_fcvtzu(u32 is64_dst, u32 is_double_src, u32 rd, u32 fn) {
return (is64_dst ? 0x9e390000u : 0x1e390000u) |
- (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) |
- (rd & 0x1fu);
+ (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_fcvt_d_s(u32 rd, u32 rn) {
@@ -368,18 +364,15 @@ static u32 aa_fmov_fp_to_gpr(u32 is64, u32 rd, u32 fn) {
}
static u32 aa_clz(u32 sf, u32 rd, u32 rn) {
- return (sf ? 0xdac01000u : 0x5ac01000u) | ((rn & 0x1fu) << 5) |
- (rd & 0x1fu);
+ return (sf ? 0xdac01000u : 0x5ac01000u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_rbit(u32 sf, u32 rd, u32 rn) {
- return (sf ? 0xdac00000u : 0x5ac00000u) | ((rn & 0x1fu) << 5) |
- (rd & 0x1fu);
+ return (sf ? 0xdac00000u : 0x5ac00000u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_rev(u32 sf, u32 rd, u32 rn) {
- return (sf ? 0xdac00c00u : 0x5ac00800u) | ((rn & 0x1fu) << 5) |
- (rd & 0x1fu);
+ return (sf ? 0xdac00c00u : 0x5ac00800u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu);
}
static u32 aa_sbfm(u32 sf, u32 rd, u32 rn, u32 immr, u32 imms) {
@@ -440,14 +433,19 @@ static u32 aa_umulh(u32 rd, u32 rn, u32 rm) {
}
static u32 aa_subs_reg(u32 sf, u32 rd, u32 rn, u32 rm) {
- return aa64_addsubsr_pack((AA64AddSubSR){
- .sf = sf, .op = 1, .S = 1, .Rm = rm, .Rn = rn, .Rd = rd});
+ return aa64_addsubsr_pack(
+ (AA64AddSubSR){.sf = sf, .op = 1, .S = 1, .Rm = rm, .Rn = rn, .Rd = rd});
}
static u32 aa_add_lsl(u32 rd, u32 rn, u32 rm, u32 shift) {
- return aa64_addsubsr_pack((AA64AddSubSR){
- .sf = 1, .op = 0, .S = 0, .shift = 0, .Rm = rm, .imm6 = shift,
- .Rn = rn, .Rd = rd});
+ return aa64_addsubsr_pack((AA64AddSubSR){.sf = 1,
+ .op = 0,
+ .S = 0,
+ .shift = 0,
+ .Rm = rm,
+ .imm6 = shift,
+ .Rn = rn,
+ .Rd = rd});
}
static u32 aa_cset(u32 sf, u32 rd, u32 cond) {
@@ -556,14 +554,13 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg,
i32 off;
MCEmitter* mc = a->base.mc;
rt = loc_reg(reg);
- sz = size_idx(mem.size ? mem.size
- : type_size32(&a->base, reg.type ? reg.type
- : mem.type));
- if (loc_is_fp(reg) && (mem.size ? mem.size
- : type_size32(&a->base, reg.type
- ? reg.type
- : mem.type)) ==
- 16u) {
+ sz = size_idx(mem.size
+ ? mem.size
+ : type_size32(&a->base, reg.type ? reg.type : mem.type));
+ if (loc_is_fp(reg) &&
+ (mem.size
+ ? mem.size
+ : type_size32(&a->base, reg.type ? reg.type : mem.type)) == 16u) {
aa_emit_mem_q(a, load, reg, addr);
return;
}
@@ -630,11 +627,10 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg,
}
aa_emit_add_imm(a, AA_TMP1, base, off);
aa_emit32(mc, load ? aa_ldur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0)
- : aa_stur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0));
+ : aa_stur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0));
}
-static NativeAllocClass aa_class_for_type(NativeTarget* t,
- CfreeCgTypeId type) {
+static NativeAllocClass aa_class_for_type(NativeTarget* t, CfreeCgTypeId type) {
if (type && cg_type_is_float(t->c, type) && cg_type_size(t->c, type) <= 8u)
return NATIVE_REG_FP;
return NATIVE_REG_INT;
@@ -671,8 +667,8 @@ static void aa_materialize_frame_index(AANativeTarget* a, NativeAddr* addr,
memset(&load, 0, sizeof load);
load.base_kind = NATIVE_ADDR_BASE_FRAME;
load.base.frame = addr->index.frame;
- load.base_type = addr->index_type ? addr->index_type
- : builtin_id(CFREE_CG_BUILTIN_I64);
+ load.base_type =
+ addr->index_type ? addr->index_type : builtin_id(CFREE_CG_BUILTIN_I64);
memset(&idx, 0, sizeof idx);
idx.kind = NATIVE_LOC_REG;
idx.cls = NATIVE_REG_INT;
@@ -1058,8 +1054,7 @@ static void aa_indirect_branch(NativeTarget* t, NativeLoc addr,
aa_emit32(t->mc, aa64_br(loc_reg(addr)));
}
-static void aa_load_label_addr(NativeTarget* t, NativeLoc dst,
- MCLabel target) {
+static void aa_load_label_addr(NativeTarget* t, NativeLoc dst, MCLabel target) {
aa_emit32(t->mc, aa64_adr(loc_reg(dst), 0, 0));
aa_emit32(t->mc, aa64_b(3));
aa_emit32(t->mc, 0);
@@ -1121,8 +1116,8 @@ static void aa_load_addr(NativeTarget* t, NativeLoc dst, NativeAddr addr) {
memset(&load, 0, sizeof load);
load.base_kind = NATIVE_ADDR_BASE_FRAME;
load.base.frame = addr.base.frame;
- load.base_type = addr.base_type ? addr.base_type
- : builtin_id(CFREE_CG_BUILTIN_I64);
+ load.base_type =
+ addr.base_type ? addr.base_type : builtin_id(CFREE_CG_BUILTIN_I64);
memset(&mem, 0, sizeof mem);
mem.type = load.base_type;
mem.size = 8;
@@ -1142,13 +1137,13 @@ static void aa_load_addr(NativeTarget* t, NativeLoc dst, NativeAddr addr) {
if (aa_use_got_for_sym(t, addr.base.global.sym)) {
aa_emit32(t->mc, aa64_adrp(rd, 0, 0));
t->mc->emit_reloc_at(t->mc, t->mc->section_id, pos,
- R_AARCH64_ADR_GOT_PAGE, addr.base.global.sym, 0,
- 0, 0);
+ R_AARCH64_ADR_GOT_PAGE, addr.base.global.sym, 0, 0,
+ 0);
pos = t->mc->pos(t->mc);
aa_emit32(t->mc, aa_ldr_uimm(3, rd, rd, 0));
t->mc->emit_reloc_at(t->mc, t->mc->section_id, pos,
- R_AARCH64_LD64_GOT_LO12_NC,
- addr.base.global.sym, 0, 0, 0);
+ R_AARCH64_LD64_GOT_LO12_NC, addr.base.global.sym,
+ 0, 0, 0);
if (addend) aa_emit_add_i64(a, rd, rd, addend);
aa_apply_index(a, rd, &addr);
return;
@@ -1189,8 +1184,8 @@ static void aa_tls_addr_of(NativeTarget* t, NativeLoc dst, ObjSymId sym,
if (obj_format_tls_via_descriptor(t->c)) {
aa_emit32(mc, aa64_adrp(0, 0, 0));
pos = mc->pos(mc) - 4u;
- mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGE21,
- sym, 0, 0, 0);
+ mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGE21, sym,
+ 0, 0, 0);
aa_emit32(mc, aa_ldr_uimm(3, 0, 0, 0));
pos = mc->pos(mc) - 4u;
mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGEOFF12,
@@ -1435,31 +1430,33 @@ static void aa_convert(NativeTarget* t, ConvKind op, NativeLoc dst,
}
case CV_ITOF_S:
aa_emit32(t->mc, aa_scvtf(type_size32(t, dst.type) == 8u,
- loc_is_64(t, src), loc_reg(dst),
- loc_reg(src)));
+ loc_is_64(t, src), loc_reg(dst), loc_reg(src)));
return;
case CV_ITOF_U:
aa_emit32(t->mc, aa_ucvtf(type_size32(t, dst.type) == 8u,
- loc_is_64(t, src), loc_reg(dst),
- loc_reg(src)));
+ loc_is_64(t, src), loc_reg(dst), loc_reg(src)));
return;
case CV_FTOI_S:
- aa_emit32(t->mc, aa_fcvtzs(loc_is_64(t, dst),
- type_size32(t, src.type) == 8u, loc_reg(dst),
- loc_reg(src)));
+ aa_emit32(t->mc,
+ aa_fcvtzs(loc_is_64(t, dst), type_size32(t, src.type) == 8u,
+ loc_reg(dst), loc_reg(src)));
return;
case CV_FTOI_U:
- aa_emit32(t->mc, aa_fcvtzu(loc_is_64(t, dst),
- type_size32(t, src.type) == 8u, loc_reg(dst),
- loc_reg(src)));
+ aa_emit32(t->mc,
+ aa_fcvtzu(loc_is_64(t, dst), type_size32(t, src.type) == 8u,
+ loc_reg(dst), loc_reg(src)));
return;
case CV_FEXT:
- if (dst_fp && src_fp) aa_emit32(t->mc, aa_fcvt_d_s(loc_reg(dst), loc_reg(src)));
- else aa_move(t, dst, src);
+ if (dst_fp && src_fp)
+ aa_emit32(t->mc, aa_fcvt_d_s(loc_reg(dst), loc_reg(src)));
+ else
+ aa_move(t, dst, src);
return;
case CV_FTRUNC:
- if (dst_fp && src_fp) aa_emit32(t->mc, aa_fcvt_s_d(loc_reg(dst), loc_reg(src)));
- else aa_move(t, dst, src);
+ if (dst_fp && src_fp)
+ aa_emit32(t->mc, aa_fcvt_s_d(loc_reg(dst), loc_reg(src)));
+ else
+ aa_move(t, dst, src);
return;
default:
aa_panic(aa_of(t), "unsupported conversion");
@@ -1491,7 +1488,8 @@ static void aa_alloca(NativeTarget* t, NativeLoc dst, NativeLoc size,
aa_emit32(t->mc, aa64_add_imm(1, loc_reg(dst), AA_SP, 0, 0));
}
-static MemAccess aa_mem_for_type(NativeTarget* t, CfreeCgTypeId type, u32 size) {
+static MemAccess aa_mem_for_type(NativeTarget* t, CfreeCgTypeId type,
+ u32 size) {
MemAccess mem;
memset(&mem, 0, sizeof mem);
mem.type = type;
@@ -1607,8 +1605,7 @@ static void aa_store_outgoing_part(NativeTarget* t, int tail_call,
aa_emit_mem(aa_of(t), 0, src, addr, mem);
}
-static const ABIArgInfo* aa_param_abi(NativeTarget* t,
- const ABIFuncInfo* abi,
+static const ABIArgInfo* aa_param_abi(NativeTarget* t, const ABIFuncInfo* abi,
const NativeCallDesc* desc, u32 i,
ABIArgInfo* scratch) {
if (abi && i < abi->nparams) return &abi->params[i];
@@ -1626,10 +1623,29 @@ static const ABIArgInfo* aa_param_abi(NativeTarget* t,
return scratch;
}
+/* Stack footprint of a single argument part: each occupies a multiple of 8
+ * bytes (so a 16-byte FP part such as binary128 takes 16, not 8). */
+static u32 aa_part_stack_size(const ABIArgPart* part) {
+ return align_up_u32(part->size ? part->size : 8u, 8u);
+}
+
+/* Natural stack alignment of a part: at least 8, capped at 16 (binary128). */
+static u32 aa_part_stack_align(const ABIArgPart* part) {
+ u32 al = part->align ? part->align : 8u;
+ if (al < 8u) al = 8u;
+ if (al > 16u) al = 16u;
+ return al;
+}
+
static u32 aa_class_stack_size(const ABIArgInfo* ai) {
+ u32 total = 0;
if (!ai || ai->kind == ABI_ARG_IGNORE) return 0;
if (ai->kind == ABI_ARG_INDIRECT) return 8u;
- return align_up_u32(ai->nparts ? ai->nparts * 8u : 8u, 8u);
+ for (u32 p = 0; p < ai->nparts; ++p) {
+ total = align_up_u32(total, aa_part_stack_align(&ai->parts[p]));
+ total += aa_part_stack_size(&ai->parts[p]);
+ }
+ return align_up_u32(total ? total : 8u, 8u);
}
static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) {
@@ -1638,8 +1654,8 @@ static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) {
for (u32 i = 0; i < desc->nargs; ++i) {
ABIArgInfo tmp;
const ABIArgInfo* ai = aa_param_abi(t, abi, desc, i, &tmp);
- int force_stack = abi && abi->variadic && abi->vararg_on_stack &&
- i >= abi->nparams;
+ int force_stack =
+ abi && abi->variadic && abi->vararg_on_stack && i >= abi->nparams;
if (ai->kind == ABI_ARG_IGNORE) continue;
if (force_stack) {
stack += aa_class_stack_size(ai);
@@ -1657,13 +1673,17 @@ static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) {
if (part->cls == ABI_CLASS_FP) {
if (next_fp < 8u)
next_fp++;
- else
- stack += 8u;
+ else {
+ stack = align_up_u32(stack, aa_part_stack_align(part));
+ stack += aa_part_stack_size(part);
+ }
} else {
if (next_int < 8u)
next_int++;
- else
- stack += 8u;
+ else {
+ stack = align_up_u32(stack, aa_part_stack_align(part));
+ stack += aa_part_stack_size(part);
+ }
}
}
}
@@ -1688,12 +1708,11 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
u32 next_int = 0, next_fp = 0, stack = 0;
int tail_call = (desc->flags & CG_CALL_TAIL) != 0;
if (abi && abi->has_sret) {
- NativeLoc x8 = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64),
- NATIVE_REG_INT, 8u);
+ NativeLoc x8 =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, 8u);
if (desc->flags & CG_CALL_TAIL) {
AANativeTarget* a = aa_of(t);
- NativeLoc saved =
- aa_stack_loc(x8.type, a->sret_ptr_slot, 0);
+ NativeLoc saved = aa_stack_loc(x8.type, a->sret_ptr_slot, 0);
aa_load_part(t, x8, saved, 0, 8);
} else if (desc->nresults) {
aa_addr_of_loc(t, x8, desc->results[0]);
@@ -1702,8 +1721,8 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
for (u32 i = 0; i < desc->nargs; ++i) {
ABIArgInfo tmp;
const ABIArgInfo* ai = aa_param_abi(t, abi, desc, i, &tmp);
- int force_stack = abi && abi->variadic && abi->vararg_on_stack &&
- i >= abi->nparams;
+ int force_stack =
+ abi && abi->variadic && abi->vararg_on_stack && i >= abi->nparams;
if (ai->kind == ABI_ARG_IGNORE) continue;
if (force_stack) {
NativeLoc tmpreg =
@@ -1744,12 +1763,12 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
NativeLoc dst = aa_reg_loc(desc->args[i].type, cls, next_int++);
aa_load_part(t, dst, desc->args[i], part->src_offset, part->size);
} else {
- NativeLoc tmpreg =
- aa_reg_loc(desc->args[i].type, cls, cls == NATIVE_REG_FP ? 16u
- : AA_TMP0);
+ NativeLoc tmpreg = aa_reg_loc(desc->args[i].type, cls,
+ cls == NATIVE_REG_FP ? 16u : AA_TMP0);
aa_load_part(t, tmpreg, desc->args[i], part->src_offset, part->size);
+ stack = align_up_u32(stack, aa_part_stack_align(part));
aa_store_outgoing_part(t, tail_call, stack, tmpreg, part->size);
- stack += 8u;
+ stack += aa_part_stack_size(part);
}
}
}
@@ -1764,9 +1783,9 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
cls == NATIVE_REG_FP ? nf++ : ni++);
rets[nr].dst = desc->results[0];
if (rets[nr].dst.kind == NATIVE_LOC_FRAME)
- rets[nr].dst = aa_stack_loc(desc->results[0].type,
- desc->results[0].v.frame,
- (i32)part->src_offset);
+ rets[nr].dst =
+ aa_stack_loc(desc->results[0].type, desc->results[0].v.frame,
+ (i32)part->src_offset);
else if (rets[nr].dst.kind == NATIVE_LOC_STACK)
rets[nr].dst.v.stack.offset += (i32)part->src_offset;
else if (rets[nr].dst.kind == NATIVE_LOC_ADDR)
@@ -1801,11 +1820,10 @@ static void aa_emit_tail_site(NativeTarget* t, NativeLoc callee) {
a->ntail_sites++;
for (u32 i = 0; i < AA_TAIL_WORDS; ++i) aa_emit32(t->mc, 0xd503201fu);
if (callee.kind == NATIVE_LOC_GLOBAL) {
- t->mc->emit_reloc_at(t->mc, t->mc->section_id,
- a->tail_sites[a->ntail_sites - 1u].pos +
- (AA_TAIL_WORDS - 1u) * 4u,
- R_AARCH64_JUMP26, callee.v.global.sym,
- callee.v.global.addend, 0, 0);
+ t->mc->emit_reloc_at(
+ t->mc, t->mc->section_id,
+ a->tail_sites[a->ntail_sites - 1u].pos + (AA_TAIL_WORDS - 1u) * 4u,
+ R_AARCH64_JUMP26, callee.v.global.sym, callee.v.global.addend, 0, 0);
}
}
@@ -1842,8 +1860,8 @@ static void aa_plan_ret(NativeTarget* t, const CGFuncDesc* fd,
if (nvalues) rets = arena_zarray(t->c->tu, NativeCallPlanRet, 4);
if (nvalues && abi && abi->ret.kind == ABI_ARG_INDIRECT) {
AANativeTarget* a = aa_of(t);
- NativeLoc dstp = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64),
- NATIVE_REG_INT, AA_TMP1);
+ NativeLoc dstp =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP1);
NativeLoc saved = aa_stack_loc(dstp.type, a->sret_ptr_slot, 0);
NativeAddr dst_addr, src_addr;
AggregateAccess access;
@@ -1877,8 +1895,8 @@ static void aa_plan_ret(NativeTarget* t, const CGFuncDesc* fd,
rets[nr].src.v.stack.offset += (i32)part->src_offset;
else if (rets[nr].src.kind == NATIVE_LOC_ADDR)
rets[nr].src.v.addr.offset += (i32)part->src_offset;
- rets[nr].dst = aa_reg_loc(values[0].type, cls,
- cls == NATIVE_REG_FP ? nf++ : ni++);
+ rets[nr].dst =
+ aa_reg_loc(values[0].type, cls, cls == NATIVE_REG_FP ? nf++ : ni++);
rets[nr].mem = aa_mem_for_type(t, values[0].type, part->size);
nr++;
}
@@ -1953,8 +1971,8 @@ static void aa_bitfield_store(NativeTarget* t, NativeAddr addr, NativeLoc src,
u64 ones = width >= 64u ? ~(u64)0 : ((1ull << width) - 1ull);
u64 field_mask = ones << bf.bit_offset;
NativeAddr saddr = aa_addr_plus(addr, bf.storage_offset);
- NativeLoc word = aa_tmp_loc(bf.storage.type ? bf.storage.type : src.type,
- AA_TMP0);
+ NativeLoc word =
+ aa_tmp_loc(bf.storage.type ? bf.storage.type : src.type, AA_TMP0);
aa_load_native(t, word, saddr, bf.storage);
aa_emit_load_imm(t->mc, sf, AA_TMP1, (i64)~field_mask);
aa_emit32(t->mc, aa64_and(sf, AA_TMP0, AA_TMP0, AA_TMP1));
@@ -2028,9 +2046,9 @@ static void aa_atomic_load(NativeTarget* t, NativeLoc dst, NativeAddr addr,
u32 base = AA_TMP0;
u32 sz = size_idx(mem.size ? mem.size : type_size32(t, dst.type));
aa_atomic_addr_reg(t, addr, base);
- aa_emit32(t->mc, aa_order_acquire(order) ? aa_ldar(sz, loc_reg(dst), base)
- : aa_ldr_uimm(sz, loc_reg(dst),
- base, 0));
+ aa_emit32(t->mc, aa_order_acquire(order)
+ ? aa_ldar(sz, loc_reg(dst), base)
+ : aa_ldr_uimm(sz, loc_reg(dst), base, 0));
if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH));
}
@@ -2040,9 +2058,9 @@ static void aa_atomic_store(NativeTarget* t, NativeAddr addr, NativeLoc src,
u32 sz = size_idx(mem.size ? mem.size : type_size32(t, src.type));
if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH));
aa_atomic_addr_reg(t, addr, base);
- aa_emit32(t->mc, aa_order_release(order) ? aa_stlr(sz, loc_reg(src), base)
- : aa_str_uimm(sz, loc_reg(src),
- base, 0));
+ aa_emit32(t->mc, aa_order_release(order)
+ ? aa_stlr(sz, loc_reg(src), base)
+ : aa_str_uimm(sz, loc_reg(src), base, 0));
if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH));
}
@@ -2088,9 +2106,9 @@ static void aa_atomic_rmw(NativeTarget* t, AtomicOp op, NativeLoc dst,
default:
aa_panic(a, "unsupported atomic rmw op");
}
- aa_emit32(t->mc, aa_order_release(order) ? aa_stlxr(sz, status, next_reg, base)
- : aa_stxr(sz, status, next_reg,
- base));
+ aa_emit32(t->mc, aa_order_release(order)
+ ? aa_stlxr(sz, status, next_reg, base)
+ : aa_stxr(sz, status, next_reg, base));
aa_emit32(t->mc, aa64_cbnz_imm(0, status, 0));
t->mc->emit_label_ref(t->mc, retry, R_AARCH64_CONDBR19, 4, 0);
aa_saved_tmp_restore(a, status);
@@ -2138,8 +2156,8 @@ static void aa_fence(NativeTarget* t, MemOrder order) {
}
static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
- const NativeLoc* dsts, u32 ndst,
- const NativeLoc* args, u32 narg) {
+ const NativeLoc* dsts, u32 ndst, const NativeLoc* args,
+ u32 narg) {
AggregateAccess access;
NativeAddr dst_addr;
NativeAddr src_addr;
@@ -2202,8 +2220,8 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
aa_emit32(t->mc, aa_rev(sf, loc_reg(dsts[0]), loc_reg(args[0])));
if (kind == INTRIN_BSWAP16) {
aa_emit_load_imm(t->mc, 0, AA_TMP0, 16);
- aa_emit32(t->mc, aa64_lsrv(0, loc_reg(dsts[0]), loc_reg(dsts[0]),
- AA_TMP0));
+ aa_emit32(t->mc,
+ aa64_lsrv(0, loc_reg(dsts[0]), loc_reg(dsts[0]), AA_TMP0));
}
return;
}
@@ -2216,15 +2234,21 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
u32 sf = loc_is_64(t, dsts[0]);
u32 rd = loc_reg(dsts[0]);
if (kind == INTRIN_SADD_OVERFLOW || kind == INTRIN_UADD_OVERFLOW)
- aa_emit32(t->mc, aa64_addsubsr_pack((AA64AddSubSR){
- .sf = sf, .op = 0, .S = 1,
- .Rm = loc_reg(args[1]), .Rn = loc_reg(args[0]),
- .Rd = rd}));
+ aa_emit32(t->mc,
+ aa64_addsubsr_pack((AA64AddSubSR){.sf = sf,
+ .op = 0,
+ .S = 1,
+ .Rm = loc_reg(args[1]),
+ .Rn = loc_reg(args[0]),
+ .Rd = rd}));
else
- aa_emit32(t->mc, aa64_addsubsr_pack((AA64AddSubSR){
- .sf = sf, .op = 1, .S = 1,
- .Rm = loc_reg(args[1]), .Rn = loc_reg(args[0]),
- .Rd = rd}));
+ aa_emit32(t->mc,
+ aa64_addsubsr_pack((AA64AddSubSR){.sf = sf,
+ .op = 1,
+ .S = 1,
+ .Rm = loc_reg(args[1]),
+ .Rn = loc_reg(args[0]),
+ .Rd = rd}));
aa_emit32(t->mc,
aa_cset(loc_is_64(t, dsts[1]), loc_reg(dsts[1]),
(kind == INTRIN_SADD_OVERFLOW ||
@@ -2240,16 +2264,16 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
u32 sf = loc_is_64(t, dsts[0]);
if (sf) {
if (kind == INTRIN_SMUL_OVERFLOW) {
- aa_emit32(t->mc, aa_smulh(AA_TMP0, loc_reg(args[0]),
- loc_reg(args[1])));
+ aa_emit32(t->mc,
+ aa_smulh(AA_TMP0, loc_reg(args[0]), loc_reg(args[1])));
aa_emit32(t->mc, aa64_mul(1, loc_reg(dsts[0]), loc_reg(args[0]),
loc_reg(args[1])));
aa_emit32(t->mc, aa_sbfm(1, AA_TMP1, loc_reg(dsts[0]), 63, 63));
aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, AA_TMP0, AA_TMP1));
aa_emit32(t->mc, aa_cset(0, loc_reg(dsts[1]), cmp_cond(CMP_NE)));
} else {
- aa_emit32(t->mc, aa_umulh(AA_TMP0, loc_reg(args[0]),
- loc_reg(args[1])));
+ aa_emit32(t->mc,
+ aa_umulh(AA_TMP0, loc_reg(args[0]), loc_reg(args[1])));
aa_emit32(t->mc, aa64_mul(1, loc_reg(dsts[0]), loc_reg(args[0]),
loc_reg(args[1])));
aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, AA_TMP0, AA64_ZR));
@@ -2302,9 +2326,9 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
dst_addr.base.reg = args[0].v.reg;
src_addr.base_kind = NATIVE_ADDR_BASE_REG;
src_addr.base.reg = args[1].v.reg;
+ aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, args[0].v.reg, args[1].v.reg));
aa_emit32(t->mc,
- aa_subs_reg(1, AA64_ZR, args[0].v.reg, args[1].v.reg));
- aa_emit32(t->mc, aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_LT_U)}));
+ aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_LT_U)}));
t->mc->emit_label_ref(t->mc, forward, R_AARCH64_CONDBR19, 4, 0);
aa_copy_bytes_dir(t, dst_addr, src_addr, access, 1);
aa_jump(t, done);
@@ -2377,79 +2401,110 @@ static const Reg aa_int_scratch[] = {9u, 10u};
static const Reg aa_fp_allocable[] = {18u, 19u};
static const Reg aa_fp_scratch[] = {20u, 21u};
-#define AA_PHYS_INT_ALLOC(r) \
- {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \
+#define AA_PHYS_INT_ALLOC(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_INT, \
+ .abi_index = 0xffu, \
.flags = NATIVE_REG_ALLOCABLE | NATIVE_REG_CALLER_SAVED, \
- .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_INT_CALLER(r) \
- {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \
- .flags = NATIVE_REG_CALLER_SAVED, .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_INT_ARG(r) \
- {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = (r), \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_INT_CALLER(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_INT, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_CALLER_SAVED, \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_INT_ARG(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_INT, \
+ .abi_index = (r), \
.flags = NATIVE_REG_CALLER_SAVED | NATIVE_REG_ARG | \
- ((r) < 2u ? NATIVE_REG_RET : 0), \
- .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_INT_CALLEE(r) \
- {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \
- .flags = NATIVE_REG_CALLEE_SAVED, .spill_cost = 4u, .copy_cost = 1u}
-#define AA_PHYS_INT_RESERVED(r) \
- {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \
- .flags = NATIVE_REG_RESERVED, .spill_cost = 0u, .copy_cost = 0u}
+ ((r) < 2u ? NATIVE_REG_RET : 0), \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_INT_CALLEE(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_INT, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_CALLEE_SAVED, \
+ .spill_cost = 4u, \
+ .copy_cost = 1u}
+#define AA_PHYS_INT_RESERVED(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_INT, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_RESERVED, \
+ .spill_cost = 0u, \
+ .copy_cost = 0u}
static const NativePhysRegInfo aa_int_phys[] = {
- AA_PHYS_INT_ARG(0u), AA_PHYS_INT_ARG(1u),
- AA_PHYS_INT_ARG(2u), AA_PHYS_INT_ARG(3u),
- AA_PHYS_INT_ARG(4u), AA_PHYS_INT_ARG(5u),
- AA_PHYS_INT_ARG(6u), AA_PHYS_INT_ARG(7u),
- AA_PHYS_INT_ALLOC(8u), AA_PHYS_INT_RESERVED(9u),
+ AA_PHYS_INT_ARG(0u), AA_PHYS_INT_ARG(1u),
+ AA_PHYS_INT_ARG(2u), AA_PHYS_INT_ARG(3u),
+ AA_PHYS_INT_ARG(4u), AA_PHYS_INT_ARG(5u),
+ AA_PHYS_INT_ARG(6u), AA_PHYS_INT_ARG(7u),
+ AA_PHYS_INT_ALLOC(8u), AA_PHYS_INT_RESERVED(9u),
AA_PHYS_INT_RESERVED(10u), AA_PHYS_INT_ALLOC(11u),
- AA_PHYS_INT_ALLOC(12u), AA_PHYS_INT_ALLOC(13u),
- AA_PHYS_INT_ALLOC(14u), AA_PHYS_INT_ALLOC(15u),
+ AA_PHYS_INT_ALLOC(12u), AA_PHYS_INT_ALLOC(13u),
+ AA_PHYS_INT_ALLOC(14u), AA_PHYS_INT_ALLOC(15u),
AA_PHYS_INT_RESERVED(16u), AA_PHYS_INT_RESERVED(17u),
AA_PHYS_INT_RESERVED(18u), AA_PHYS_INT_CALLEE(19u),
- AA_PHYS_INT_CALLEE(20u), AA_PHYS_INT_CALLEE(21u),
- AA_PHYS_INT_CALLEE(22u), AA_PHYS_INT_CALLEE(23u),
- AA_PHYS_INT_CALLEE(24u), AA_PHYS_INT_CALLEE(25u),
- AA_PHYS_INT_CALLEE(26u), AA_PHYS_INT_CALLEE(27u),
- AA_PHYS_INT_CALLEE(28u), AA_PHYS_INT_RESERVED(29u),
+ AA_PHYS_INT_CALLEE(20u), AA_PHYS_INT_CALLEE(21u),
+ AA_PHYS_INT_CALLEE(22u), AA_PHYS_INT_CALLEE(23u),
+ AA_PHYS_INT_CALLEE(24u), AA_PHYS_INT_CALLEE(25u),
+ AA_PHYS_INT_CALLEE(26u), AA_PHYS_INT_CALLEE(27u),
+ AA_PHYS_INT_CALLEE(28u), AA_PHYS_INT_RESERVED(29u),
AA_PHYS_INT_RESERVED(30u), AA_PHYS_INT_RESERVED(31u),
};
-#define AA_PHYS_FP_ALLOC(r) \
- {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \
+#define AA_PHYS_FP_ALLOC(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_FP, \
+ .abi_index = 0xffu, \
.flags = NATIVE_REG_ALLOCABLE | NATIVE_REG_CALLER_SAVED, \
- .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_FP_CALLER(r) \
- {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \
- .flags = NATIVE_REG_CALLER_SAVED, .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_FP_ARG(r) \
- {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = (r), \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_FP_CALLER(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_FP, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_CALLER_SAVED, \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_FP_ARG(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_FP, \
+ .abi_index = (r), \
.flags = NATIVE_REG_CALLER_SAVED | NATIVE_REG_ARG | \
- ((r) < 4u ? NATIVE_REG_RET : 0), \
- .spill_cost = 1u, .copy_cost = 1u}
-#define AA_PHYS_FP_CALLEE(r) \
- {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \
- .flags = NATIVE_REG_CALLEE_SAVED, .spill_cost = 4u, .copy_cost = 1u}
-#define AA_PHYS_FP_RESERVED(r) \
- {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \
- .flags = NATIVE_REG_RESERVED, .spill_cost = 0u, .copy_cost = 0u}
+ ((r) < 4u ? NATIVE_REG_RET : 0), \
+ .spill_cost = 1u, \
+ .copy_cost = 1u}
+#define AA_PHYS_FP_CALLEE(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_FP, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_CALLEE_SAVED, \
+ .spill_cost = 4u, \
+ .copy_cost = 1u}
+#define AA_PHYS_FP_RESERVED(r) \
+ {.reg = (r), \
+ .cls = NATIVE_REG_FP, \
+ .abi_index = 0xffu, \
+ .flags = NATIVE_REG_RESERVED, \
+ .spill_cost = 0u, \
+ .copy_cost = 0u}
static const NativePhysRegInfo aa_fp_phys[] = {
- AA_PHYS_FP_ARG(0u), AA_PHYS_FP_ARG(1u),
- AA_PHYS_FP_ARG(2u), AA_PHYS_FP_ARG(3u),
- AA_PHYS_FP_ARG(4u), AA_PHYS_FP_ARG(5u),
- AA_PHYS_FP_ARG(6u), AA_PHYS_FP_ARG(7u),
- AA_PHYS_FP_CALLEE(8u), AA_PHYS_FP_CALLEE(9u),
- AA_PHYS_FP_CALLEE(10u), AA_PHYS_FP_CALLEE(11u),
- AA_PHYS_FP_CALLEE(12u), AA_PHYS_FP_CALLEE(13u),
- AA_PHYS_FP_CALLEE(14u), AA_PHYS_FP_CALLEE(15u),
- AA_PHYS_FP_CALLER(16u), AA_PHYS_FP_CALLER(17u),
- AA_PHYS_FP_ALLOC(18u), AA_PHYS_FP_ALLOC(19u),
- AA_PHYS_FP_RESERVED(20u), AA_PHYS_FP_RESERVED(21u),
- AA_PHYS_FP_CALLER(22u), AA_PHYS_FP_CALLER(23u),
- AA_PHYS_FP_CALLER(24u), AA_PHYS_FP_CALLER(25u),
- AA_PHYS_FP_CALLER(26u), AA_PHYS_FP_CALLER(27u),
- AA_PHYS_FP_CALLER(28u), AA_PHYS_FP_CALLER(29u),
+ AA_PHYS_FP_ARG(0u), AA_PHYS_FP_ARG(1u), AA_PHYS_FP_ARG(2u),
+ AA_PHYS_FP_ARG(3u), AA_PHYS_FP_ARG(4u), AA_PHYS_FP_ARG(5u),
+ AA_PHYS_FP_ARG(6u), AA_PHYS_FP_ARG(7u), AA_PHYS_FP_CALLEE(8u),
+ AA_PHYS_FP_CALLEE(9u), AA_PHYS_FP_CALLEE(10u), AA_PHYS_FP_CALLEE(11u),
+ AA_PHYS_FP_CALLEE(12u), AA_PHYS_FP_CALLEE(13u), AA_PHYS_FP_CALLEE(14u),
+ AA_PHYS_FP_CALLEE(15u), AA_PHYS_FP_CALLER(16u), AA_PHYS_FP_CALLER(17u),
+ AA_PHYS_FP_ALLOC(18u), AA_PHYS_FP_ALLOC(19u), AA_PHYS_FP_RESERVED(20u),
+ AA_PHYS_FP_RESERVED(21u), AA_PHYS_FP_CALLER(22u), AA_PHYS_FP_CALLER(23u),
+ AA_PHYS_FP_CALLER(24u), AA_PHYS_FP_CALLER(25u), AA_PHYS_FP_CALLER(26u),
+ AA_PHYS_FP_CALLER(27u), AA_PHYS_FP_CALLER(28u), AA_PHYS_FP_CALLER(29u),
AA_PHYS_FP_CALLER(30u), AA_PHYS_FP_CALLER(31u),
};
@@ -2465,8 +2520,8 @@ static const NativeAllocClassInfo aa_classes[] = {
.callee_saved_mask = 0x1ff80000u,
.arg_mask = 0x000000ffu,
.ret_mask = 0x00000003u,
- .reserved_mask = (1u << AA_TMP0) | (1u << AA_TMP1) | (1u << AA_FP) |
- (1u << AA_LR)},
+ .reserved_mask =
+ (1u << AA_TMP0) | (1u << AA_TMP1) | (1u << AA_FP) | (1u << AA_LR)},
{.cls = NATIVE_REG_FP,
.allocable = aa_fp_allocable,
.nallocable = sizeof aa_fp_allocable / sizeof aa_fp_allocable[0],
@@ -2547,12 +2602,13 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
NativeFrameSlot home) {
AANativeTarget* a = aa_of(t);
const ABIFuncInfo* abi = abi_cg_func_info(t->c->abi, a->func->fn_type);
- const ABIArgInfo* ai = p->index < abi->nparams ? &abi->params[p->index] : NULL;
+ const ABIArgInfo* ai =
+ p->index < abi->nparams ? &abi->params[p->index] : NULL;
if (!ai || ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
- NativeLoc src = aa_reg_loc(p->type, NATIVE_REG_INT,
- a->next_param_int < 8u ? a->next_param_int++
- : AA_TMP0);
+ NativeLoc src =
+ aa_reg_loc(p->type, NATIVE_REG_INT,
+ a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0);
if (src.v.reg == AA_TMP0) {
NativeAddr saddr;
memset(&saddr, 0, sizeof saddr);
@@ -2591,16 +2647,18 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
} else {
src = aa_reg_loc(p->type, cls, cls == NATIVE_REG_FP ? 16u : AA_TMP0);
NativeAddr saddr;
+ a->next_param_stack =
+ align_up_u32(a->next_param_stack, aa_part_stack_align(part));
memset(&saddr, 0, sizeof saddr);
saddr.base_kind = NATIVE_ADDR_BASE_REG;
saddr.base.reg = AA_FP;
saddr.base_type = p->type;
saddr.offset = (i32)a->next_param_stack;
aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, part->size));
- a->next_param_stack += 8u;
+ a->next_param_stack += aa_part_stack_size(part);
}
- aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset),
- src, 0, part->size);
+ aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset), src, 0,
+ part->size);
}
a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u);
}
@@ -2715,8 +2773,8 @@ static NativeAddr aa_reg_addr(CfreeCgTypeId type, u32 reg, i32 offset) {
static void aa_load_ap_addr(NativeDirectTarget* d, Operand ap_addr,
u32 dst_reg) {
- NativeLoc dst = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64),
- NATIVE_REG_INT, dst_reg);
+ NativeLoc dst =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, dst_reg);
NativeAddr ap = aa_direct_pointer_addr(d, ap_addr);
d->native->load_addr(d->native, dst, ap);
}
@@ -2724,8 +2782,8 @@ static void aa_load_ap_addr(NativeDirectTarget* d, Operand ap_addr,
static void aa_va_start_(NativeDirectTarget* d, Operand ap_addr) {
AANativeTarget* a = aa_of(d->native);
ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi);
- NativeLoc ptr = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT,
- AA_TMP0);
+ NativeLoc ptr =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0);
NativeAddr dst = aa_direct_pointer_addr(d, ap_addr);
if (vai.kind == ABI_VA_LIST_POINTER) {
aa_emit_add_imm(a, AA_TMP0, AA_FP, (i32)a->next_param_stack);
@@ -2745,26 +2803,24 @@ static void aa_va_start_(NativeDirectTarget* d, Operand ap_addr) {
: vai.fp_reg_count;
aa_load_ap_addr(d, ap_addr, 15u);
aa_emit_add_imm(a, AA_TMP0, AA_FP, (i32)a->next_param_stack);
- aa_emit_mem(a, 0, ptr,
- aa_reg_addr(ptr.type, 15u, (i32)vai.stack_offset), ptr_mem);
+ aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.stack_offset),
+ ptr_mem);
aa_emit_add_imm(a, AA_TMP0, AA_FP,
- -(i32)gr->off +
- (i32)(vai.gp_reg_count * vai.gp_slot_size));
- aa_emit_mem(a, 0, ptr,
- aa_reg_addr(ptr.type, 15u, (i32)vai.gr_top_offset), ptr_mem);
+ -(i32)gr->off + (i32)(vai.gp_reg_count * vai.gp_slot_size));
+ aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.gr_top_offset),
+ ptr_mem);
aa_emit_add_imm(a, AA_TMP0, AA_FP,
- -(i32)vr->off +
- (i32)(vai.fp_reg_count * vai.fp_slot_size));
- aa_emit_mem(a, 0, ptr,
- aa_reg_addr(ptr.type, 15u, (i32)vai.vr_top_offset), ptr_mem);
+ -(i32)vr->off + (i32)(vai.fp_reg_count * vai.fp_slot_size));
+ aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.vr_top_offset),
+ ptr_mem);
aa_emit_load_imm(a->base.mc, 0, AA_TMP1,
-(i32)((vai.gp_reg_count - used_gr) * vai.gp_slot_size));
- aa_emit_mem(a, 0, i32tmp,
- aa_reg_addr(i32_ty, 15u, (i32)vai.gr_offs_offset), i32_mem);
+ aa_emit_mem(a, 0, i32tmp, aa_reg_addr(i32_ty, 15u, (i32)vai.gr_offs_offset),
+ i32_mem);
aa_emit_load_imm(a->base.mc, 0, AA_TMP1,
-(i32)((vai.fp_reg_count - used_vr) * vai.fp_slot_size));
- aa_emit_mem(a, 0, i32tmp,
- aa_reg_addr(i32_ty, 15u, (i32)vai.vr_offs_offset), i32_mem);
+ aa_emit_mem(a, 0, i32tmp, aa_reg_addr(i32_ty, 15u, (i32)vai.vr_offs_offset),
+ i32_mem);
return;
}
{
@@ -2777,15 +2833,15 @@ static void aa_va_arg_(NativeDirectTarget* d, Operand dst_op, Operand ap_addr,
CfreeCgTypeId type) {
AANativeTarget* a = aa_of(d->native);
ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi);
- NativeLoc cur = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT,
- AA_TMP0);
- NativeLoc val =
- aa_reg_loc(type, cg_type_is_float(d->base.c, type) ? NATIVE_REG_FP
- : NATIVE_REG_INT,
- cg_type_is_float(d->base.c, type) ? 16u : 9u);
+ NativeLoc cur =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0);
+ NativeLoc val = aa_reg_loc(
+ type, cg_type_is_float(d->base.c, type) ? NATIVE_REG_FP : NATIVE_REG_INT,
+ cg_type_is_float(d->base.c, type) ? 16u : 9u);
NativeAddr src, dst;
MemAccess ptr_mem = aa_mem_for_type(d->native, cur.type, 8);
- MemAccess val_mem = aa_mem_for_type(d->native, type, type_size32(d->native, type));
+ MemAccess val_mem =
+ aa_mem_for_type(d->native, type, type_size32(d->native, type));
if (vai.kind == ABI_VA_LIST_POINTER) {
NativeAddr ap = aa_direct_pointer_addr(d, ap_addr);
aa_emit_mem(a, 1, cur, ap, ptr_mem);
@@ -2814,8 +2870,7 @@ static void aa_va_arg_(NativeDirectTarget* d, Operand dst_op, Operand ap_addr,
aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_GE_S)}));
a->base.mc->emit_label_ref(a->base.mc, stack_label, R_AARCH64_CONDBR19, 4,
0);
- aa_emit_mem(a, 1, cur, aa_reg_addr(cur.type, 15u, (i32)top_field),
- ptr_mem);
+ aa_emit_mem(a, 1, cur, aa_reg_addr(cur.type, 15u, (i32)top_field), ptr_mem);
aa_emit32(a->base.mc, aa_sbfm(1, AA_TMP1, AA_TMP1, 0, 31));
aa_emit32(a->base.mc, aa64_add(1, AA_TMP0, AA_TMP0, AA_TMP1));
aa_emit_mem(a, 1, val, aa_reg_addr(type, AA_TMP0, 0), val_mem);
@@ -2848,8 +2903,8 @@ static void aa_va_copy_(NativeDirectTarget* d, Operand dst_ap_addr,
Operand src_ap_addr) {
AANativeTarget* a = aa_of(d->native);
ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi);
- NativeLoc tmp = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT,
- AA_TMP0);
+ NativeLoc tmp =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0);
MemAccess mem = aa_mem_for_type(d->native, tmp.type, 8);
if (vai.kind == ABI_VA_LIST_POINTER) {
NativeAddr src = aa_direct_pointer_addr(d, src_ap_addr);
@@ -2901,8 +2956,8 @@ AA_UNUSED_FN static void aa_asm_bound_reg(Operand* out, CfreeCgTypeId type,
NativeAllocClass cls, Reg reg) {
memset(out, 0, sizeof *out);
out->kind = AA64_INLINE_OPK_REG;
- out->pad[0] = (cls == NATIVE_REG_FP) ? AA64_INLINE_OPCLS_FP
- : AA64_INLINE_OPCLS_INT;
+ out->pad[0] =
+ (cls == NATIVE_REG_FP) ? AA64_INLINE_OPCLS_FP : AA64_INLINE_OPCLS_INT;
out->type = type;
out->v.local = (CGLocal)reg;
}
@@ -2962,7 +3017,7 @@ AA_UNUSED_FN static void aa_asm_clobber_masks(NativeDirectTarget* d,
AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d,
NativeAllocClass cls, u32* used_int,
u32* used_fp) {
- static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u,
+ static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u,
7u, 8u, 11u, 12u, 13u, 14u, 15u};
static const Reg fp_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,
16u, 17u, 18u, 19u, 22u, 23u, 24u, 25u,
@@ -2981,8 +3036,8 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d,
return REG_NONE;
}
-AA_UNUSED_FN static NativeAllocClass
-aa_asm_constraint_class(NativeDirectTarget* d, const char* body) {
+AA_UNUSED_FN static NativeAllocClass aa_asm_constraint_class(
+ NativeDirectTarget* d, const char* body) {
if (body[0] == 'r') return NATIVE_REG_INT;
if (body[0] == 'w') return NATIVE_REG_FP;
aa_asm_panic(d, "constraint is not a register constraint");
@@ -3084,9 +3139,8 @@ AA_UNUSED_FN static void aa_asm_restore_one(NativeDirectTarget* d,
aa_mem_for_type(d->native, s->type, 8));
}
-AA_UNUSED_FN static AAAsmSavedClobber*
-aa_asm_save_callee_clobbers(NativeDirectTarget* d, u32 int_mask, u32 fp_mask,
- u32* nsaved_out) {
+AA_UNUSED_FN static AAAsmSavedClobber* aa_asm_save_callee_clobbers(
+ NativeDirectTarget* d, u32 int_mask, u32 fp_mask, u32* nsaved_out) {
AAAsmSavedClobber* saved =
arena_zarray(d->base.c->tu, AAAsmSavedClobber, 20u);
u32 n = 0;
@@ -3141,8 +3195,8 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
}
} else if (body[0] == 'm') {
Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- NativeLoc loc = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64),
- NATIVE_REG_INT, reg);
+ NativeLoc loc =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
CfreeCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
aa_direct_load_address_to_reg(d, out_ops[i], loc);
aa_asm_bound_mem(&bound_outs[i], type, reg);
@@ -3176,16 +3230,15 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp);
CfreeCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
aa_asm_bound_reg(&bound_ins[i], type, cls, reg);
- aa_direct_load_operand_to_reg(d, in_ops[i],
- aa_reg_loc(type, cls, reg));
+ aa_direct_load_operand_to_reg(d, in_ops[i], aa_reg_loc(type, cls, reg));
} else if (body[0] == 'i') {
if (in_ops[i].kind != OPK_IMM)
aa_asm_panic(d, "immediate constraint requires immediate operand");
bound_ins[i] = in_ops[i];
} else if (body[0] == 'm') {
Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- NativeLoc loc = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64),
- NATIVE_REG_INT, reg);
+ NativeLoc loc =
+ aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, reg);
CfreeCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
aa_direct_load_address_to_reg(d, in_ops[i], loc);
aa_asm_bound_mem(&bound_ins[i], type, reg);