kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit bf99dd7c5a89cf727659b2500b110dec54164794
parent f60a16d14658662018ec245649772ee7990d67ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 17:53:47 -0700

aa64: convert native.c from CGTarget wrapper to NativeTarget implementation

Port the AA64 backend from the old opaque CGTarget wrapper to a direct
NativeTarget implementation, aligning it with the new opt pipeline intake.

Diffstat:
Msrc/arch/aa64/native.c | 523+++++++++++++++++++++++++++++++++++++++++++------------------------------------
1 file changed, 288 insertions(+), 235 deletions(-)

diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -27,14 +27,13 @@ * register conflicts, memory barriers, outputs for aggregates/FP values, and * file-scope asm integration. */ -#include "arch/aa64/aa64.h" - #include <string.h> +#include "abi/abi.h" +#include "arch/aa64/aa64.h" #include "arch/aa64/asm.h" #include "arch/aa64/isa.h" #include "arch/aa64/regs.h" -#include "abi/abi.h" #include "asm/asm.h" #include "asm/asm_lex.h" #include "cg/native_direct_target.h" @@ -137,8 +136,9 @@ static u32 align_up_u32(u32 v, u32 align) { static u32 type_size32(NativeTarget* t, CfreeCgTypeId type) { u64 n = type ? cg_type_size(t->c, type) : 8u; if (n == 0) n = 8u; - if (n > 16u) compiler_panic(t->c, (SrcLoc){0, 0, 0}, - "aarch64 native target: scalar too large"); + if (n > 16u) + compiler_panic(t->c, (SrcLoc){0, 0, 0}, + "aarch64 native target: scalar too large"); return (u32)n; } @@ -306,8 +306,8 @@ static __attribute__((unused)) u32 aa_mrs_tpidr_el0(u32 rt) { } static u32 aa_fp_bin(u32 op, u32 is_double, u32 rd, u32 rn, u32 rm) { - return (is_double ? 0x1e600000u : 0x1e200000u) | op | - ((rm & 0x1fu) << 16) | ((rn & 0x1fu) << 5) | (rd & 0x1fu); + return (is_double ? 0x1e600000u : 0x1e200000u) | op | ((rm & 0x1fu) << 16) | + ((rn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_fcmp(u32 is_double, u32 rn, u32 rm) { @@ -327,26 +327,22 @@ static u32 aa_fmov_fp(u32 is_double, u32 rd, u32 rn) { static u32 aa_scvtf(u32 is_double_dst, u32 is64_src, u32 fd, u32 rn) { return (is64_src ? 0x9e220000u : 0x1e220000u) | - (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | - (fd & 0x1fu); + (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | (fd & 0x1fu); } static u32 aa_ucvtf(u32 is_double_dst, u32 is64_src, u32 fd, u32 rn) { return (is64_src ? 0x9e230000u : 0x1e230000u) | - (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | - (fd & 0x1fu); + (is_double_dst ? 0x00400000u : 0) | ((rn & 0x1fu) << 5) | (fd & 0x1fu); } static u32 aa_fcvtzs(u32 is64_dst, u32 is_double_src, u32 rd, u32 fn) { return (is64_dst ? 0x9e380000u : 0x1e380000u) | - (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | - (rd & 0x1fu); + (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_fcvtzu(u32 is64_dst, u32 is_double_src, u32 rd, u32 fn) { return (is64_dst ? 0x9e390000u : 0x1e390000u) | - (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | - (rd & 0x1fu); + (is_double_src ? 0x00400000u : 0) | ((fn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_fcvt_d_s(u32 rd, u32 rn) { @@ -368,18 +364,15 @@ static u32 aa_fmov_fp_to_gpr(u32 is64, u32 rd, u32 fn) { } static u32 aa_clz(u32 sf, u32 rd, u32 rn) { - return (sf ? 0xdac01000u : 0x5ac01000u) | ((rn & 0x1fu) << 5) | - (rd & 0x1fu); + return (sf ? 0xdac01000u : 0x5ac01000u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_rbit(u32 sf, u32 rd, u32 rn) { - return (sf ? 0xdac00000u : 0x5ac00000u) | ((rn & 0x1fu) << 5) | - (rd & 0x1fu); + return (sf ? 0xdac00000u : 0x5ac00000u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_rev(u32 sf, u32 rd, u32 rn) { - return (sf ? 0xdac00c00u : 0x5ac00800u) | ((rn & 0x1fu) << 5) | - (rd & 0x1fu); + return (sf ? 0xdac00c00u : 0x5ac00800u) | ((rn & 0x1fu) << 5) | (rd & 0x1fu); } static u32 aa_sbfm(u32 sf, u32 rd, u32 rn, u32 immr, u32 imms) { @@ -440,14 +433,19 @@ static u32 aa_umulh(u32 rd, u32 rn, u32 rm) { } static u32 aa_subs_reg(u32 sf, u32 rd, u32 rn, u32 rm) { - return aa64_addsubsr_pack((AA64AddSubSR){ - .sf = sf, .op = 1, .S = 1, .Rm = rm, .Rn = rn, .Rd = rd}); + return aa64_addsubsr_pack( + (AA64AddSubSR){.sf = sf, .op = 1, .S = 1, .Rm = rm, .Rn = rn, .Rd = rd}); } static u32 aa_add_lsl(u32 rd, u32 rn, u32 rm, u32 shift) { - return aa64_addsubsr_pack((AA64AddSubSR){ - .sf = 1, .op = 0, .S = 0, .shift = 0, .Rm = rm, .imm6 = shift, - .Rn = rn, .Rd = rd}); + return aa64_addsubsr_pack((AA64AddSubSR){.sf = 1, + .op = 0, + .S = 0, + .shift = 0, + .Rm = rm, + .imm6 = shift, + .Rn = rn, + .Rd = rd}); } static u32 aa_cset(u32 sf, u32 rd, u32 cond) { @@ -556,14 +554,13 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg, i32 off; MCEmitter* mc = a->base.mc; rt = loc_reg(reg); - sz = size_idx(mem.size ? mem.size - : type_size32(&a->base, reg.type ? reg.type - : mem.type)); - if (loc_is_fp(reg) && (mem.size ? mem.size - : type_size32(&a->base, reg.type - ? reg.type - : mem.type)) == - 16u) { + sz = size_idx(mem.size + ? mem.size + : type_size32(&a->base, reg.type ? reg.type : mem.type)); + if (loc_is_fp(reg) && + (mem.size + ? mem.size + : type_size32(&a->base, reg.type ? reg.type : mem.type)) == 16u) { aa_emit_mem_q(a, load, reg, addr); return; } @@ -630,11 +627,10 @@ static void aa_emit_mem(AANativeTarget* a, int load, NativeLoc reg, } aa_emit_add_imm(a, AA_TMP1, base, off); aa_emit32(mc, load ? aa_ldur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0) - : aa_stur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0)); + : aa_stur_v(sz, loc_is_fp(reg), rt, AA_TMP1, 0)); } -static NativeAllocClass aa_class_for_type(NativeTarget* t, - CfreeCgTypeId type) { +static NativeAllocClass aa_class_for_type(NativeTarget* t, CfreeCgTypeId type) { if (type && cg_type_is_float(t->c, type) && cg_type_size(t->c, type) <= 8u) return NATIVE_REG_FP; return NATIVE_REG_INT; @@ -671,8 +667,8 @@ static void aa_materialize_frame_index(AANativeTarget* a, NativeAddr* addr, memset(&load, 0, sizeof load); load.base_kind = NATIVE_ADDR_BASE_FRAME; load.base.frame = addr->index.frame; - load.base_type = addr->index_type ? addr->index_type - : builtin_id(CFREE_CG_BUILTIN_I64); + load.base_type = + addr->index_type ? addr->index_type : builtin_id(CFREE_CG_BUILTIN_I64); memset(&idx, 0, sizeof idx); idx.kind = NATIVE_LOC_REG; idx.cls = NATIVE_REG_INT; @@ -1058,8 +1054,7 @@ static void aa_indirect_branch(NativeTarget* t, NativeLoc addr, aa_emit32(t->mc, aa64_br(loc_reg(addr))); } -static void aa_load_label_addr(NativeTarget* t, NativeLoc dst, - MCLabel target) { +static void aa_load_label_addr(NativeTarget* t, NativeLoc dst, MCLabel target) { aa_emit32(t->mc, aa64_adr(loc_reg(dst), 0, 0)); aa_emit32(t->mc, aa64_b(3)); aa_emit32(t->mc, 0); @@ -1121,8 +1116,8 @@ static void aa_load_addr(NativeTarget* t, NativeLoc dst, NativeAddr addr) { memset(&load, 0, sizeof load); load.base_kind = NATIVE_ADDR_BASE_FRAME; load.base.frame = addr.base.frame; - load.base_type = addr.base_type ? addr.base_type - : builtin_id(CFREE_CG_BUILTIN_I64); + load.base_type = + addr.base_type ? addr.base_type : builtin_id(CFREE_CG_BUILTIN_I64); memset(&mem, 0, sizeof mem); mem.type = load.base_type; mem.size = 8; @@ -1142,13 +1137,13 @@ static void aa_load_addr(NativeTarget* t, NativeLoc dst, NativeAddr addr) { if (aa_use_got_for_sym(t, addr.base.global.sym)) { aa_emit32(t->mc, aa64_adrp(rd, 0, 0)); t->mc->emit_reloc_at(t->mc, t->mc->section_id, pos, - R_AARCH64_ADR_GOT_PAGE, addr.base.global.sym, 0, - 0, 0); + R_AARCH64_ADR_GOT_PAGE, addr.base.global.sym, 0, 0, + 0); pos = t->mc->pos(t->mc); aa_emit32(t->mc, aa_ldr_uimm(3, rd, rd, 0)); t->mc->emit_reloc_at(t->mc, t->mc->section_id, pos, - R_AARCH64_LD64_GOT_LO12_NC, - addr.base.global.sym, 0, 0, 0); + R_AARCH64_LD64_GOT_LO12_NC, addr.base.global.sym, + 0, 0, 0); if (addend) aa_emit_add_i64(a, rd, rd, addend); aa_apply_index(a, rd, &addr); return; @@ -1189,8 +1184,8 @@ static void aa_tls_addr_of(NativeTarget* t, NativeLoc dst, ObjSymId sym, if (obj_format_tls_via_descriptor(t->c)) { aa_emit32(mc, aa64_adrp(0, 0, 0)); pos = mc->pos(mc) - 4u; - mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGE21, - sym, 0, 0, 0); + mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGE21, sym, + 0, 0, 0); aa_emit32(mc, aa_ldr_uimm(3, 0, 0, 0)); pos = mc->pos(mc) - 4u; mc->emit_reloc_at(mc, mc->section_id, pos, R_AARCH64_TLVP_LOAD_PAGEOFF12, @@ -1435,31 +1430,33 @@ static void aa_convert(NativeTarget* t, ConvKind op, NativeLoc dst, } case CV_ITOF_S: aa_emit32(t->mc, aa_scvtf(type_size32(t, dst.type) == 8u, - loc_is_64(t, src), loc_reg(dst), - loc_reg(src))); + loc_is_64(t, src), loc_reg(dst), loc_reg(src))); return; case CV_ITOF_U: aa_emit32(t->mc, aa_ucvtf(type_size32(t, dst.type) == 8u, - loc_is_64(t, src), loc_reg(dst), - loc_reg(src))); + loc_is_64(t, src), loc_reg(dst), loc_reg(src))); return; case CV_FTOI_S: - aa_emit32(t->mc, aa_fcvtzs(loc_is_64(t, dst), - type_size32(t, src.type) == 8u, loc_reg(dst), - loc_reg(src))); + aa_emit32(t->mc, + aa_fcvtzs(loc_is_64(t, dst), type_size32(t, src.type) == 8u, + loc_reg(dst), loc_reg(src))); return; case CV_FTOI_U: - aa_emit32(t->mc, aa_fcvtzu(loc_is_64(t, dst), - type_size32(t, src.type) == 8u, loc_reg(dst), - loc_reg(src))); + aa_emit32(t->mc, + aa_fcvtzu(loc_is_64(t, dst), type_size32(t, src.type) == 8u, + loc_reg(dst), loc_reg(src))); return; case CV_FEXT: - if (dst_fp && src_fp) aa_emit32(t->mc, aa_fcvt_d_s(loc_reg(dst), loc_reg(src))); - else aa_move(t, dst, src); + if (dst_fp && src_fp) + aa_emit32(t->mc, aa_fcvt_d_s(loc_reg(dst), loc_reg(src))); + else + aa_move(t, dst, src); return; case CV_FTRUNC: - if (dst_fp && src_fp) aa_emit32(t->mc, aa_fcvt_s_d(loc_reg(dst), loc_reg(src))); - else aa_move(t, dst, src); + if (dst_fp && src_fp) + aa_emit32(t->mc, aa_fcvt_s_d(loc_reg(dst), loc_reg(src))); + else + aa_move(t, dst, src); return; default: aa_panic(aa_of(t), "unsupported conversion"); @@ -1491,7 +1488,8 @@ static void aa_alloca(NativeTarget* t, NativeLoc dst, NativeLoc size, aa_emit32(t->mc, aa64_add_imm(1, loc_reg(dst), AA_SP, 0, 0)); } -static MemAccess aa_mem_for_type(NativeTarget* t, CfreeCgTypeId type, u32 size) { +static MemAccess aa_mem_for_type(NativeTarget* t, CfreeCgTypeId type, + u32 size) { MemAccess mem; memset(&mem, 0, sizeof mem); mem.type = type; @@ -1607,8 +1605,7 @@ static void aa_store_outgoing_part(NativeTarget* t, int tail_call, aa_emit_mem(aa_of(t), 0, src, addr, mem); } -static const ABIArgInfo* aa_param_abi(NativeTarget* t, - const ABIFuncInfo* abi, +static const ABIArgInfo* aa_param_abi(NativeTarget* t, const ABIFuncInfo* abi, const NativeCallDesc* desc, u32 i, ABIArgInfo* scratch) { if (abi && i < abi->nparams) return &abi->params[i]; @@ -1626,10 +1623,29 @@ static const ABIArgInfo* aa_param_abi(NativeTarget* t, return scratch; } +/* Stack footprint of a single argument part: each occupies a multiple of 8 + * bytes (so a 16-byte FP part such as binary128 takes 16, not 8). */ +static u32 aa_part_stack_size(const ABIArgPart* part) { + return align_up_u32(part->size ? part->size : 8u, 8u); +} + +/* Natural stack alignment of a part: at least 8, capped at 16 (binary128). */ +static u32 aa_part_stack_align(const ABIArgPart* part) { + u32 al = part->align ? part->align : 8u; + if (al < 8u) al = 8u; + if (al > 16u) al = 16u; + return al; +} + static u32 aa_class_stack_size(const ABIArgInfo* ai) { + u32 total = 0; if (!ai || ai->kind == ABI_ARG_IGNORE) return 0; if (ai->kind == ABI_ARG_INDIRECT) return 8u; - return align_up_u32(ai->nparts ? ai->nparts * 8u : 8u, 8u); + for (u32 p = 0; p < ai->nparts; ++p) { + total = align_up_u32(total, aa_part_stack_align(&ai->parts[p])); + total += aa_part_stack_size(&ai->parts[p]); + } + return align_up_u32(total ? total : 8u, 8u); } static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) { @@ -1638,8 +1654,8 @@ static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) { for (u32 i = 0; i < desc->nargs; ++i) { ABIArgInfo tmp; const ABIArgInfo* ai = aa_param_abi(t, abi, desc, i, &tmp); - int force_stack = abi && abi->variadic && abi->vararg_on_stack && - i >= abi->nparams; + int force_stack = + abi && abi->variadic && abi->vararg_on_stack && i >= abi->nparams; if (ai->kind == ABI_ARG_IGNORE) continue; if (force_stack) { stack += aa_class_stack_size(ai); @@ -1657,13 +1673,17 @@ static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) { if (part->cls == ABI_CLASS_FP) { if (next_fp < 8u) next_fp++; - else - stack += 8u; + else { + stack = align_up_u32(stack, aa_part_stack_align(part)); + stack += aa_part_stack_size(part); + } } else { if (next_int < 8u) next_int++; - else - stack += 8u; + else { + stack = align_up_u32(stack, aa_part_stack_align(part)); + stack += aa_part_stack_size(part); + } } } } @@ -1688,12 +1708,11 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc, u32 next_int = 0, next_fp = 0, stack = 0; int tail_call = (desc->flags & CG_CALL_TAIL) != 0; if (abi && abi->has_sret) { - NativeLoc x8 = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), - NATIVE_REG_INT, 8u); + NativeLoc x8 = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, 8u); if (desc->flags & CG_CALL_TAIL) { AANativeTarget* a = aa_of(t); - NativeLoc saved = - aa_stack_loc(x8.type, a->sret_ptr_slot, 0); + NativeLoc saved = aa_stack_loc(x8.type, a->sret_ptr_slot, 0); aa_load_part(t, x8, saved, 0, 8); } else if (desc->nresults) { aa_addr_of_loc(t, x8, desc->results[0]); @@ -1702,8 +1721,8 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc, for (u32 i = 0; i < desc->nargs; ++i) { ABIArgInfo tmp; const ABIArgInfo* ai = aa_param_abi(t, abi, desc, i, &tmp); - int force_stack = abi && abi->variadic && abi->vararg_on_stack && - i >= abi->nparams; + int force_stack = + abi && abi->variadic && abi->vararg_on_stack && i >= abi->nparams; if (ai->kind == ABI_ARG_IGNORE) continue; if (force_stack) { NativeLoc tmpreg = @@ -1744,12 +1763,12 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc, NativeLoc dst = aa_reg_loc(desc->args[i].type, cls, next_int++); aa_load_part(t, dst, desc->args[i], part->src_offset, part->size); } else { - NativeLoc tmpreg = - aa_reg_loc(desc->args[i].type, cls, cls == NATIVE_REG_FP ? 16u - : AA_TMP0); + NativeLoc tmpreg = aa_reg_loc(desc->args[i].type, cls, + cls == NATIVE_REG_FP ? 16u : AA_TMP0); aa_load_part(t, tmpreg, desc->args[i], part->src_offset, part->size); + stack = align_up_u32(stack, aa_part_stack_align(part)); aa_store_outgoing_part(t, tail_call, stack, tmpreg, part->size); - stack += 8u; + stack += aa_part_stack_size(part); } } } @@ -1764,9 +1783,9 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc, cls == NATIVE_REG_FP ? nf++ : ni++); rets[nr].dst = desc->results[0]; if (rets[nr].dst.kind == NATIVE_LOC_FRAME) - rets[nr].dst = aa_stack_loc(desc->results[0].type, - desc->results[0].v.frame, - (i32)part->src_offset); + rets[nr].dst = + aa_stack_loc(desc->results[0].type, desc->results[0].v.frame, + (i32)part->src_offset); else if (rets[nr].dst.kind == NATIVE_LOC_STACK) rets[nr].dst.v.stack.offset += (i32)part->src_offset; else if (rets[nr].dst.kind == NATIVE_LOC_ADDR) @@ -1801,11 +1820,10 @@ static void aa_emit_tail_site(NativeTarget* t, NativeLoc callee) { a->ntail_sites++; for (u32 i = 0; i < AA_TAIL_WORDS; ++i) aa_emit32(t->mc, 0xd503201fu); if (callee.kind == NATIVE_LOC_GLOBAL) { - t->mc->emit_reloc_at(t->mc, t->mc->section_id, - a->tail_sites[a->ntail_sites - 1u].pos + - (AA_TAIL_WORDS - 1u) * 4u, - R_AARCH64_JUMP26, callee.v.global.sym, - callee.v.global.addend, 0, 0); + t->mc->emit_reloc_at( + t->mc, t->mc->section_id, + a->tail_sites[a->ntail_sites - 1u].pos + (AA_TAIL_WORDS - 1u) * 4u, + R_AARCH64_JUMP26, callee.v.global.sym, callee.v.global.addend, 0, 0); } } @@ -1842,8 +1860,8 @@ static void aa_plan_ret(NativeTarget* t, const CGFuncDesc* fd, if (nvalues) rets = arena_zarray(t->c->tu, NativeCallPlanRet, 4); if (nvalues && abi && abi->ret.kind == ABI_ARG_INDIRECT) { AANativeTarget* a = aa_of(t); - NativeLoc dstp = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), - NATIVE_REG_INT, AA_TMP1); + NativeLoc dstp = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP1); NativeLoc saved = aa_stack_loc(dstp.type, a->sret_ptr_slot, 0); NativeAddr dst_addr, src_addr; AggregateAccess access; @@ -1877,8 +1895,8 @@ static void aa_plan_ret(NativeTarget* t, const CGFuncDesc* fd, rets[nr].src.v.stack.offset += (i32)part->src_offset; else if (rets[nr].src.kind == NATIVE_LOC_ADDR) rets[nr].src.v.addr.offset += (i32)part->src_offset; - rets[nr].dst = aa_reg_loc(values[0].type, cls, - cls == NATIVE_REG_FP ? nf++ : ni++); + rets[nr].dst = + aa_reg_loc(values[0].type, cls, cls == NATIVE_REG_FP ? nf++ : ni++); rets[nr].mem = aa_mem_for_type(t, values[0].type, part->size); nr++; } @@ -1953,8 +1971,8 @@ static void aa_bitfield_store(NativeTarget* t, NativeAddr addr, NativeLoc src, u64 ones = width >= 64u ? ~(u64)0 : ((1ull << width) - 1ull); u64 field_mask = ones << bf.bit_offset; NativeAddr saddr = aa_addr_plus(addr, bf.storage_offset); - NativeLoc word = aa_tmp_loc(bf.storage.type ? bf.storage.type : src.type, - AA_TMP0); + NativeLoc word = + aa_tmp_loc(bf.storage.type ? bf.storage.type : src.type, AA_TMP0); aa_load_native(t, word, saddr, bf.storage); aa_emit_load_imm(t->mc, sf, AA_TMP1, (i64)~field_mask); aa_emit32(t->mc, aa64_and(sf, AA_TMP0, AA_TMP0, AA_TMP1)); @@ -2028,9 +2046,9 @@ static void aa_atomic_load(NativeTarget* t, NativeLoc dst, NativeAddr addr, u32 base = AA_TMP0; u32 sz = size_idx(mem.size ? mem.size : type_size32(t, dst.type)); aa_atomic_addr_reg(t, addr, base); - aa_emit32(t->mc, aa_order_acquire(order) ? aa_ldar(sz, loc_reg(dst), base) - : aa_ldr_uimm(sz, loc_reg(dst), - base, 0)); + aa_emit32(t->mc, aa_order_acquire(order) + ? aa_ldar(sz, loc_reg(dst), base) + : aa_ldr_uimm(sz, loc_reg(dst), base, 0)); if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); } @@ -2040,9 +2058,9 @@ static void aa_atomic_store(NativeTarget* t, NativeAddr addr, NativeLoc src, u32 sz = size_idx(mem.size ? mem.size : type_size32(t, src.type)); if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); aa_atomic_addr_reg(t, addr, base); - aa_emit32(t->mc, aa_order_release(order) ? aa_stlr(sz, loc_reg(src), base) - : aa_str_uimm(sz, loc_reg(src), - base, 0)); + aa_emit32(t->mc, aa_order_release(order) + ? aa_stlr(sz, loc_reg(src), base) + : aa_str_uimm(sz, loc_reg(src), base, 0)); if (order == MO_SEQ_CST) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); } @@ -2088,9 +2106,9 @@ static void aa_atomic_rmw(NativeTarget* t, AtomicOp op, NativeLoc dst, default: aa_panic(a, "unsupported atomic rmw op"); } - aa_emit32(t->mc, aa_order_release(order) ? aa_stlxr(sz, status, next_reg, base) - : aa_stxr(sz, status, next_reg, - base)); + aa_emit32(t->mc, aa_order_release(order) + ? aa_stlxr(sz, status, next_reg, base) + : aa_stxr(sz, status, next_reg, base)); aa_emit32(t->mc, aa64_cbnz_imm(0, status, 0)); t->mc->emit_label_ref(t->mc, retry, R_AARCH64_CONDBR19, 4, 0); aa_saved_tmp_restore(a, status); @@ -2138,8 +2156,8 @@ static void aa_fence(NativeTarget* t, MemOrder order) { } static void aa_intrinsic(NativeTarget* t, IntrinKind kind, - const NativeLoc* dsts, u32 ndst, - const NativeLoc* args, u32 narg) { + const NativeLoc* dsts, u32 ndst, const NativeLoc* args, + u32 narg) { AggregateAccess access; NativeAddr dst_addr; NativeAddr src_addr; @@ -2202,8 +2220,8 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind, aa_emit32(t->mc, aa_rev(sf, loc_reg(dsts[0]), loc_reg(args[0]))); if (kind == INTRIN_BSWAP16) { aa_emit_load_imm(t->mc, 0, AA_TMP0, 16); - aa_emit32(t->mc, aa64_lsrv(0, loc_reg(dsts[0]), loc_reg(dsts[0]), - AA_TMP0)); + aa_emit32(t->mc, + aa64_lsrv(0, loc_reg(dsts[0]), loc_reg(dsts[0]), AA_TMP0)); } return; } @@ -2216,15 +2234,21 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind, u32 sf = loc_is_64(t, dsts[0]); u32 rd = loc_reg(dsts[0]); if (kind == INTRIN_SADD_OVERFLOW || kind == INTRIN_UADD_OVERFLOW) - aa_emit32(t->mc, aa64_addsubsr_pack((AA64AddSubSR){ - .sf = sf, .op = 0, .S = 1, - .Rm = loc_reg(args[1]), .Rn = loc_reg(args[0]), - .Rd = rd})); + aa_emit32(t->mc, + aa64_addsubsr_pack((AA64AddSubSR){.sf = sf, + .op = 0, + .S = 1, + .Rm = loc_reg(args[1]), + .Rn = loc_reg(args[0]), + .Rd = rd})); else - aa_emit32(t->mc, aa64_addsubsr_pack((AA64AddSubSR){ - .sf = sf, .op = 1, .S = 1, - .Rm = loc_reg(args[1]), .Rn = loc_reg(args[0]), - .Rd = rd})); + aa_emit32(t->mc, + aa64_addsubsr_pack((AA64AddSubSR){.sf = sf, + .op = 1, + .S = 1, + .Rm = loc_reg(args[1]), + .Rn = loc_reg(args[0]), + .Rd = rd})); aa_emit32(t->mc, aa_cset(loc_is_64(t, dsts[1]), loc_reg(dsts[1]), (kind == INTRIN_SADD_OVERFLOW || @@ -2240,16 +2264,16 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind, u32 sf = loc_is_64(t, dsts[0]); if (sf) { if (kind == INTRIN_SMUL_OVERFLOW) { - aa_emit32(t->mc, aa_smulh(AA_TMP0, loc_reg(args[0]), - loc_reg(args[1]))); + aa_emit32(t->mc, + aa_smulh(AA_TMP0, loc_reg(args[0]), loc_reg(args[1]))); aa_emit32(t->mc, aa64_mul(1, loc_reg(dsts[0]), loc_reg(args[0]), loc_reg(args[1]))); aa_emit32(t->mc, aa_sbfm(1, AA_TMP1, loc_reg(dsts[0]), 63, 63)); aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, AA_TMP0, AA_TMP1)); aa_emit32(t->mc, aa_cset(0, loc_reg(dsts[1]), cmp_cond(CMP_NE))); } else { - aa_emit32(t->mc, aa_umulh(AA_TMP0, loc_reg(args[0]), - loc_reg(args[1]))); + aa_emit32(t->mc, + aa_umulh(AA_TMP0, loc_reg(args[0]), loc_reg(args[1]))); aa_emit32(t->mc, aa64_mul(1, loc_reg(dsts[0]), loc_reg(args[0]), loc_reg(args[1]))); aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, AA_TMP0, AA64_ZR)); @@ -2302,9 +2326,9 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind, dst_addr.base.reg = args[0].v.reg; src_addr.base_kind = NATIVE_ADDR_BASE_REG; src_addr.base.reg = args[1].v.reg; + aa_emit32(t->mc, aa_subs_reg(1, AA64_ZR, args[0].v.reg, args[1].v.reg)); aa_emit32(t->mc, - aa_subs_reg(1, AA64_ZR, args[0].v.reg, args[1].v.reg)); - aa_emit32(t->mc, aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_LT_U)})); + aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_LT_U)})); t->mc->emit_label_ref(t->mc, forward, R_AARCH64_CONDBR19, 4, 0); aa_copy_bytes_dir(t, dst_addr, src_addr, access, 1); aa_jump(t, done); @@ -2377,79 +2401,110 @@ static const Reg aa_int_scratch[] = {9u, 10u}; static const Reg aa_fp_allocable[] = {18u, 19u}; static const Reg aa_fp_scratch[] = {20u, 21u}; -#define AA_PHYS_INT_ALLOC(r) \ - {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \ +#define AA_PHYS_INT_ALLOC(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_INT, \ + .abi_index = 0xffu, \ .flags = NATIVE_REG_ALLOCABLE | NATIVE_REG_CALLER_SAVED, \ - .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_INT_CALLER(r) \ - {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \ - .flags = NATIVE_REG_CALLER_SAVED, .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_INT_ARG(r) \ - {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = (r), \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_INT_CALLER(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_INT, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_CALLER_SAVED, \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_INT_ARG(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_INT, \ + .abi_index = (r), \ .flags = NATIVE_REG_CALLER_SAVED | NATIVE_REG_ARG | \ - ((r) < 2u ? NATIVE_REG_RET : 0), \ - .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_INT_CALLEE(r) \ - {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \ - .flags = NATIVE_REG_CALLEE_SAVED, .spill_cost = 4u, .copy_cost = 1u} -#define AA_PHYS_INT_RESERVED(r) \ - {.reg = (r), .cls = NATIVE_REG_INT, .abi_index = 0xffu, \ - .flags = NATIVE_REG_RESERVED, .spill_cost = 0u, .copy_cost = 0u} + ((r) < 2u ? NATIVE_REG_RET : 0), \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_INT_CALLEE(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_INT, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_CALLEE_SAVED, \ + .spill_cost = 4u, \ + .copy_cost = 1u} +#define AA_PHYS_INT_RESERVED(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_INT, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_RESERVED, \ + .spill_cost = 0u, \ + .copy_cost = 0u} static const NativePhysRegInfo aa_int_phys[] = { - AA_PHYS_INT_ARG(0u), AA_PHYS_INT_ARG(1u), - AA_PHYS_INT_ARG(2u), AA_PHYS_INT_ARG(3u), - AA_PHYS_INT_ARG(4u), AA_PHYS_INT_ARG(5u), - AA_PHYS_INT_ARG(6u), AA_PHYS_INT_ARG(7u), - AA_PHYS_INT_ALLOC(8u), AA_PHYS_INT_RESERVED(9u), + AA_PHYS_INT_ARG(0u), AA_PHYS_INT_ARG(1u), + AA_PHYS_INT_ARG(2u), AA_PHYS_INT_ARG(3u), + AA_PHYS_INT_ARG(4u), AA_PHYS_INT_ARG(5u), + AA_PHYS_INT_ARG(6u), AA_PHYS_INT_ARG(7u), + AA_PHYS_INT_ALLOC(8u), AA_PHYS_INT_RESERVED(9u), AA_PHYS_INT_RESERVED(10u), AA_PHYS_INT_ALLOC(11u), - AA_PHYS_INT_ALLOC(12u), AA_PHYS_INT_ALLOC(13u), - AA_PHYS_INT_ALLOC(14u), AA_PHYS_INT_ALLOC(15u), + AA_PHYS_INT_ALLOC(12u), AA_PHYS_INT_ALLOC(13u), + AA_PHYS_INT_ALLOC(14u), AA_PHYS_INT_ALLOC(15u), AA_PHYS_INT_RESERVED(16u), AA_PHYS_INT_RESERVED(17u), AA_PHYS_INT_RESERVED(18u), AA_PHYS_INT_CALLEE(19u), - AA_PHYS_INT_CALLEE(20u), AA_PHYS_INT_CALLEE(21u), - AA_PHYS_INT_CALLEE(22u), AA_PHYS_INT_CALLEE(23u), - AA_PHYS_INT_CALLEE(24u), AA_PHYS_INT_CALLEE(25u), - AA_PHYS_INT_CALLEE(26u), AA_PHYS_INT_CALLEE(27u), - AA_PHYS_INT_CALLEE(28u), AA_PHYS_INT_RESERVED(29u), + AA_PHYS_INT_CALLEE(20u), AA_PHYS_INT_CALLEE(21u), + AA_PHYS_INT_CALLEE(22u), AA_PHYS_INT_CALLEE(23u), + AA_PHYS_INT_CALLEE(24u), AA_PHYS_INT_CALLEE(25u), + AA_PHYS_INT_CALLEE(26u), AA_PHYS_INT_CALLEE(27u), + AA_PHYS_INT_CALLEE(28u), AA_PHYS_INT_RESERVED(29u), AA_PHYS_INT_RESERVED(30u), AA_PHYS_INT_RESERVED(31u), }; -#define AA_PHYS_FP_ALLOC(r) \ - {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \ +#define AA_PHYS_FP_ALLOC(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_FP, \ + .abi_index = 0xffu, \ .flags = NATIVE_REG_ALLOCABLE | NATIVE_REG_CALLER_SAVED, \ - .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_FP_CALLER(r) \ - {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \ - .flags = NATIVE_REG_CALLER_SAVED, .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_FP_ARG(r) \ - {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = (r), \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_FP_CALLER(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_FP, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_CALLER_SAVED, \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_FP_ARG(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_FP, \ + .abi_index = (r), \ .flags = NATIVE_REG_CALLER_SAVED | NATIVE_REG_ARG | \ - ((r) < 4u ? NATIVE_REG_RET : 0), \ - .spill_cost = 1u, .copy_cost = 1u} -#define AA_PHYS_FP_CALLEE(r) \ - {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \ - .flags = NATIVE_REG_CALLEE_SAVED, .spill_cost = 4u, .copy_cost = 1u} -#define AA_PHYS_FP_RESERVED(r) \ - {.reg = (r), .cls = NATIVE_REG_FP, .abi_index = 0xffu, \ - .flags = NATIVE_REG_RESERVED, .spill_cost = 0u, .copy_cost = 0u} + ((r) < 4u ? NATIVE_REG_RET : 0), \ + .spill_cost = 1u, \ + .copy_cost = 1u} +#define AA_PHYS_FP_CALLEE(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_FP, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_CALLEE_SAVED, \ + .spill_cost = 4u, \ + .copy_cost = 1u} +#define AA_PHYS_FP_RESERVED(r) \ + {.reg = (r), \ + .cls = NATIVE_REG_FP, \ + .abi_index = 0xffu, \ + .flags = NATIVE_REG_RESERVED, \ + .spill_cost = 0u, \ + .copy_cost = 0u} static const NativePhysRegInfo aa_fp_phys[] = { - AA_PHYS_FP_ARG(0u), AA_PHYS_FP_ARG(1u), - AA_PHYS_FP_ARG(2u), AA_PHYS_FP_ARG(3u), - AA_PHYS_FP_ARG(4u), AA_PHYS_FP_ARG(5u), - AA_PHYS_FP_ARG(6u), AA_PHYS_FP_ARG(7u), - AA_PHYS_FP_CALLEE(8u), AA_PHYS_FP_CALLEE(9u), - AA_PHYS_FP_CALLEE(10u), AA_PHYS_FP_CALLEE(11u), - AA_PHYS_FP_CALLEE(12u), AA_PHYS_FP_CALLEE(13u), - AA_PHYS_FP_CALLEE(14u), AA_PHYS_FP_CALLEE(15u), - AA_PHYS_FP_CALLER(16u), AA_PHYS_FP_CALLER(17u), - AA_PHYS_FP_ALLOC(18u), AA_PHYS_FP_ALLOC(19u), - AA_PHYS_FP_RESERVED(20u), AA_PHYS_FP_RESERVED(21u), - AA_PHYS_FP_CALLER(22u), AA_PHYS_FP_CALLER(23u), - AA_PHYS_FP_CALLER(24u), AA_PHYS_FP_CALLER(25u), - AA_PHYS_FP_CALLER(26u), AA_PHYS_FP_CALLER(27u), - AA_PHYS_FP_CALLER(28u), AA_PHYS_FP_CALLER(29u), + AA_PHYS_FP_ARG(0u), AA_PHYS_FP_ARG(1u), AA_PHYS_FP_ARG(2u), + AA_PHYS_FP_ARG(3u), AA_PHYS_FP_ARG(4u), AA_PHYS_FP_ARG(5u), + AA_PHYS_FP_ARG(6u), AA_PHYS_FP_ARG(7u), AA_PHYS_FP_CALLEE(8u), + AA_PHYS_FP_CALLEE(9u), AA_PHYS_FP_CALLEE(10u), AA_PHYS_FP_CALLEE(11u), + AA_PHYS_FP_CALLEE(12u), AA_PHYS_FP_CALLEE(13u), AA_PHYS_FP_CALLEE(14u), + AA_PHYS_FP_CALLEE(15u), AA_PHYS_FP_CALLER(16u), AA_PHYS_FP_CALLER(17u), + AA_PHYS_FP_ALLOC(18u), AA_PHYS_FP_ALLOC(19u), AA_PHYS_FP_RESERVED(20u), + AA_PHYS_FP_RESERVED(21u), AA_PHYS_FP_CALLER(22u), AA_PHYS_FP_CALLER(23u), + AA_PHYS_FP_CALLER(24u), AA_PHYS_FP_CALLER(25u), AA_PHYS_FP_CALLER(26u), + AA_PHYS_FP_CALLER(27u), AA_PHYS_FP_CALLER(28u), AA_PHYS_FP_CALLER(29u), AA_PHYS_FP_CALLER(30u), AA_PHYS_FP_CALLER(31u), }; @@ -2465,8 +2520,8 @@ static const NativeAllocClassInfo aa_classes[] = { .callee_saved_mask = 0x1ff80000u, .arg_mask = 0x000000ffu, .ret_mask = 0x00000003u, - .reserved_mask = (1u << AA_TMP0) | (1u << AA_TMP1) | (1u << AA_FP) | - (1u << AA_LR)}, + .reserved_mask = + (1u << AA_TMP0) | (1u << AA_TMP1) | (1u << AA_FP) | (1u << AA_LR)}, {.cls = NATIVE_REG_FP, .allocable = aa_fp_allocable, .nallocable = sizeof aa_fp_allocable / sizeof aa_fp_allocable[0], @@ -2547,12 +2602,13 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, NativeFrameSlot home) { AANativeTarget* a = aa_of(t); const ABIFuncInfo* abi = abi_cg_func_info(t->c->abi, a->func->fn_type); - const ABIArgInfo* ai = p->index < abi->nparams ? &abi->params[p->index] : NULL; + const ABIArgInfo* ai = + p->index < abi->nparams ? &abi->params[p->index] : NULL; if (!ai || ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { - NativeLoc src = aa_reg_loc(p->type, NATIVE_REG_INT, - a->next_param_int < 8u ? a->next_param_int++ - : AA_TMP0); + NativeLoc src = + aa_reg_loc(p->type, NATIVE_REG_INT, + a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0); if (src.v.reg == AA_TMP0) { NativeAddr saddr; memset(&saddr, 0, sizeof saddr); @@ -2591,16 +2647,18 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, } else { src = aa_reg_loc(p->type, cls, cls == NATIVE_REG_FP ? 16u : AA_TMP0); NativeAddr saddr; + a->next_param_stack = + align_up_u32(a->next_param_stack, aa_part_stack_align(part)); memset(&saddr, 0, sizeof saddr); saddr.base_kind = NATIVE_ADDR_BASE_REG; saddr.base.reg = AA_FP; saddr.base_type = p->type; saddr.offset = (i32)a->next_param_stack; aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, part->size)); - a->next_param_stack += 8u; + a->next_param_stack += aa_part_stack_size(part); } - aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset), - src, 0, part->size); + aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset), src, 0, + part->size); } a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u); } @@ -2715,8 +2773,8 @@ static NativeAddr aa_reg_addr(CfreeCgTypeId type, u32 reg, i32 offset) { static void aa_load_ap_addr(NativeDirectTarget* d, Operand ap_addr, u32 dst_reg) { - NativeLoc dst = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), - NATIVE_REG_INT, dst_reg); + NativeLoc dst = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, dst_reg); NativeAddr ap = aa_direct_pointer_addr(d, ap_addr); d->native->load_addr(d->native, dst, ap); } @@ -2724,8 +2782,8 @@ static void aa_load_ap_addr(NativeDirectTarget* d, Operand ap_addr, static void aa_va_start_(NativeDirectTarget* d, Operand ap_addr) { AANativeTarget* a = aa_of(d->native); ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi); - NativeLoc ptr = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, - AA_TMP0); + NativeLoc ptr = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0); NativeAddr dst = aa_direct_pointer_addr(d, ap_addr); if (vai.kind == ABI_VA_LIST_POINTER) { aa_emit_add_imm(a, AA_TMP0, AA_FP, (i32)a->next_param_stack); @@ -2745,26 +2803,24 @@ static void aa_va_start_(NativeDirectTarget* d, Operand ap_addr) { : vai.fp_reg_count; aa_load_ap_addr(d, ap_addr, 15u); aa_emit_add_imm(a, AA_TMP0, AA_FP, (i32)a->next_param_stack); - aa_emit_mem(a, 0, ptr, - aa_reg_addr(ptr.type, 15u, (i32)vai.stack_offset), ptr_mem); + aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.stack_offset), + ptr_mem); aa_emit_add_imm(a, AA_TMP0, AA_FP, - -(i32)gr->off + - (i32)(vai.gp_reg_count * vai.gp_slot_size)); - aa_emit_mem(a, 0, ptr, - aa_reg_addr(ptr.type, 15u, (i32)vai.gr_top_offset), ptr_mem); + -(i32)gr->off + (i32)(vai.gp_reg_count * vai.gp_slot_size)); + aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.gr_top_offset), + ptr_mem); aa_emit_add_imm(a, AA_TMP0, AA_FP, - -(i32)vr->off + - (i32)(vai.fp_reg_count * vai.fp_slot_size)); - aa_emit_mem(a, 0, ptr, - aa_reg_addr(ptr.type, 15u, (i32)vai.vr_top_offset), ptr_mem); + -(i32)vr->off + (i32)(vai.fp_reg_count * vai.fp_slot_size)); + aa_emit_mem(a, 0, ptr, aa_reg_addr(ptr.type, 15u, (i32)vai.vr_top_offset), + ptr_mem); aa_emit_load_imm(a->base.mc, 0, AA_TMP1, -(i32)((vai.gp_reg_count - used_gr) * vai.gp_slot_size)); - aa_emit_mem(a, 0, i32tmp, - aa_reg_addr(i32_ty, 15u, (i32)vai.gr_offs_offset), i32_mem); + aa_emit_mem(a, 0, i32tmp, aa_reg_addr(i32_ty, 15u, (i32)vai.gr_offs_offset), + i32_mem); aa_emit_load_imm(a->base.mc, 0, AA_TMP1, -(i32)((vai.fp_reg_count - used_vr) * vai.fp_slot_size)); - aa_emit_mem(a, 0, i32tmp, - aa_reg_addr(i32_ty, 15u, (i32)vai.vr_offs_offset), i32_mem); + aa_emit_mem(a, 0, i32tmp, aa_reg_addr(i32_ty, 15u, (i32)vai.vr_offs_offset), + i32_mem); return; } { @@ -2777,15 +2833,15 @@ static void aa_va_arg_(NativeDirectTarget* d, Operand dst_op, Operand ap_addr, CfreeCgTypeId type) { AANativeTarget* a = aa_of(d->native); ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi); - NativeLoc cur = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, - AA_TMP0); - NativeLoc val = - aa_reg_loc(type, cg_type_is_float(d->base.c, type) ? NATIVE_REG_FP - : NATIVE_REG_INT, - cg_type_is_float(d->base.c, type) ? 16u : 9u); + NativeLoc cur = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0); + NativeLoc val = aa_reg_loc( + type, cg_type_is_float(d->base.c, type) ? NATIVE_REG_FP : NATIVE_REG_INT, + cg_type_is_float(d->base.c, type) ? 16u : 9u); NativeAddr src, dst; MemAccess ptr_mem = aa_mem_for_type(d->native, cur.type, 8); - MemAccess val_mem = aa_mem_for_type(d->native, type, type_size32(d->native, type)); + MemAccess val_mem = + aa_mem_for_type(d->native, type, type_size32(d->native, type)); if (vai.kind == ABI_VA_LIST_POINTER) { NativeAddr ap = aa_direct_pointer_addr(d, ap_addr); aa_emit_mem(a, 1, cur, ap, ptr_mem); @@ -2814,8 +2870,7 @@ static void aa_va_arg_(NativeDirectTarget* d, Operand dst_op, Operand ap_addr, aa64_brcond_pack((AA64BrCond){.cond = cmp_cond(CMP_GE_S)})); a->base.mc->emit_label_ref(a->base.mc, stack_label, R_AARCH64_CONDBR19, 4, 0); - aa_emit_mem(a, 1, cur, aa_reg_addr(cur.type, 15u, (i32)top_field), - ptr_mem); + aa_emit_mem(a, 1, cur, aa_reg_addr(cur.type, 15u, (i32)top_field), ptr_mem); aa_emit32(a->base.mc, aa_sbfm(1, AA_TMP1, AA_TMP1, 0, 31)); aa_emit32(a->base.mc, aa64_add(1, AA_TMP0, AA_TMP0, AA_TMP1)); aa_emit_mem(a, 1, val, aa_reg_addr(type, AA_TMP0, 0), val_mem); @@ -2848,8 +2903,8 @@ static void aa_va_copy_(NativeDirectTarget* d, Operand dst_ap_addr, Operand src_ap_addr) { AANativeTarget* a = aa_of(d->native); ABIVaListInfo vai = abi_va_list_layout(d->base.c->abi); - NativeLoc tmp = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, - AA_TMP0); + NativeLoc tmp = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0); MemAccess mem = aa_mem_for_type(d->native, tmp.type, 8); if (vai.kind == ABI_VA_LIST_POINTER) { NativeAddr src = aa_direct_pointer_addr(d, src_ap_addr); @@ -2901,8 +2956,8 @@ AA_UNUSED_FN static void aa_asm_bound_reg(Operand* out, CfreeCgTypeId type, NativeAllocClass cls, Reg reg) { memset(out, 0, sizeof *out); out->kind = AA64_INLINE_OPK_REG; - out->pad[0] = (cls == NATIVE_REG_FP) ? AA64_INLINE_OPCLS_FP - : AA64_INLINE_OPCLS_INT; + out->pad[0] = + (cls == NATIVE_REG_FP) ? AA64_INLINE_OPCLS_FP : AA64_INLINE_OPCLS_INT; out->type = type; out->v.local = (CGLocal)reg; } @@ -2962,7 +3017,7 @@ AA_UNUSED_FN static void aa_asm_clobber_masks(NativeDirectTarget* d, AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls, u32* used_int, u32* used_fp) { - static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u, + static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 11u, 12u, 13u, 14u, 15u}; static const Reg fp_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 16u, 17u, 18u, 19u, 22u, 23u, 24u, 25u, @@ -2981,8 +3036,8 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d, return REG_NONE; } -AA_UNUSED_FN static NativeAllocClass -aa_asm_constraint_class(NativeDirectTarget* d, const char* body) { +AA_UNUSED_FN static NativeAllocClass aa_asm_constraint_class( + NativeDirectTarget* d, const char* body) { if (body[0] == 'r') return NATIVE_REG_INT; if (body[0] == 'w') return NATIVE_REG_FP; aa_asm_panic(d, "constraint is not a register constraint"); @@ -3084,9 +3139,8 @@ AA_UNUSED_FN static void aa_asm_restore_one(NativeDirectTarget* d, aa_mem_for_type(d->native, s->type, 8)); } -AA_UNUSED_FN static AAAsmSavedClobber* -aa_asm_save_callee_clobbers(NativeDirectTarget* d, u32 int_mask, u32 fp_mask, - u32* nsaved_out) { +AA_UNUSED_FN static AAAsmSavedClobber* aa_asm_save_callee_clobbers( + NativeDirectTarget* d, u32 int_mask, u32 fp_mask, u32* nsaved_out) { AAAsmSavedClobber* saved = arena_zarray(d->base.c->tu, AAAsmSavedClobber, 20u); u32 n = 0; @@ -3141,8 +3195,8 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl, } } else if (body[0] == 'm') { Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - NativeLoc loc = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), - NATIVE_REG_INT, reg); + NativeLoc loc = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, reg); CfreeCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type; aa_direct_load_address_to_reg(d, out_ops[i], loc); aa_asm_bound_mem(&bound_outs[i], type, reg); @@ -3176,16 +3230,15 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl, Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp); CfreeCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; aa_asm_bound_reg(&bound_ins[i], type, cls, reg); - aa_direct_load_operand_to_reg(d, in_ops[i], - aa_reg_loc(type, cls, reg)); + aa_direct_load_operand_to_reg(d, in_ops[i], aa_reg_loc(type, cls, reg)); } else if (body[0] == 'i') { if (in_ops[i].kind != OPK_IMM) aa_asm_panic(d, "immediate constraint requires immediate operand"); bound_ins[i] = in_ops[i]; } else if (body[0] == 'm') { Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - NativeLoc loc = aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), - NATIVE_REG_INT, reg); + NativeLoc loc = + aa_reg_loc(builtin_id(CFREE_CG_BUILTIN_I64), NATIVE_REG_INT, reg); CfreeCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; aa_direct_load_address_to_reg(d, in_ops[i], loc); aa_asm_bound_mem(&bound_ins[i], type, reg);