commit 5a7642085de670403406e9ad6a3f29cbd73ef3e1
parent c3ab7c37ff8954e1c3a358f844e8871cf30d7b7c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 11:40:53 -0700
Factor simple backend register allocator
Diffstat:
13 files changed, 359 insertions(+), 270 deletions(-)
diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c
@@ -14,28 +14,15 @@ AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
* ============================================================ */
void regpool_init(RegPool* p, u8 base, u8 nregs) {
- p->base = base;
- p->nregs = nregs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+ cg_simple_regpool_init_range(p, base, nregs);
}
Reg regpool_alloc(RegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)(p->base + idx);
+ return cg_simple_regpool_alloc(p);
}
int regpool_free(RegPool* p, Reg r) {
- u32 rn = (u32)r;
- if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
- u32 idx = rn - p->base;
- u32 bit = 1u << idx;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
+ return cg_simple_regpool_free(p, r);
}
/* ============================================================
@@ -176,13 +163,14 @@ void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
u32 imm12, sh;
if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) {
- u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
aa64_emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh));
return;
}
}
- u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
- u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u);
+ u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
+ u32 rm =
+ aa64_force_reg_int(t, b_op, sf, (rn == AA_TMP0) ? AA_TMP1 : AA_TMP0);
aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, rn, rm));
}
@@ -225,7 +213,7 @@ static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) {
sc->else_label = t->mc->label_new(t->mc);
sc->end_label = t->mc->label_new(t->mc);
u32 sf = type_is_64(d->cond.type) ? 1u : 0u;
- u32 rn = aa64_force_reg_int(t, d->cond, sf, 9);
+ u32 rn = aa64_force_reg_int(t, d->cond, sf, AA_TMP0);
aa64_emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, rn, 0));
aa64_emit32(t->mc, aa64_b_cond(0x0u /*EQ*/));
t->mc->emit_label_ref(t->mc, sc->else_label, R_AARCH64_CONDBR19, 4, 0);
diff --git a/src/arch/aarch64/emit.c b/src/arch/aarch64/emit.c
@@ -382,29 +382,29 @@ void aa_param(CGTarget* t, const CGParamDesc* p) {
} else {
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
- aa64_emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off)));
- ptr_reg = 9;
+ aa64_emit32(t->mc, aa64_ldur(3, AA_TMP0, 29, (i32)(16 + caller_off)));
+ ptr_reg = AA_TMP0;
}
u32 nbytes = s->size;
u32 i = 0;
while (i + 8 <= nbytes) {
- aa64_emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i));
- aa64_emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(t->mc, aa64_ldur(3, AA_TMP1, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(3, AA_TMP1, 29, -(i32)s->off + (i32)i));
i += 8;
}
while (i + 4 <= nbytes) {
- aa64_emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i));
- aa64_emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(t->mc, aa64_ldur(2, AA_TMP1, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(2, AA_TMP1, 29, -(i32)s->off + (i32)i));
i += 4;
}
while (i + 2 <= nbytes) {
- aa64_emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i));
- aa64_emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(t->mc, aa64_ldur(1, AA_TMP1, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(1, AA_TMP1, 29, -(i32)s->off + (i32)i));
i += 2;
}
while (i < nbytes) {
- aa64_emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i));
- aa64_emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(t->mc, aa64_ldur(0, AA_TMP1, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(0, AA_TMP1, 29, -(i32)s->off + (i32)i));
i += 1;
}
return;
@@ -422,8 +422,10 @@ void aa_param(CGTarget* t, const CGParamDesc* p) {
} else {
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
- aa64_emit32(t->mc, aa64_ldur(sidx, 9, 29, (i32)(16 + caller_off)));
- aa64_emit32(t->mc, aa64_stur(sidx, 9, 29, -(i32)s->off + (i32)part_off));
+ aa64_emit32(t->mc, aa64_ldur(sidx, AA_TMP0, 29, (i32)(16 + caller_off)));
+ aa64_emit32(t->mc,
+ aa64_stur(sidx, AA_TMP0, 29,
+ -(i32)s->off + (i32)part_off));
}
} else if (pt->cls == ABI_CLASS_FP) {
if (a->next_param_fp < 8) {
@@ -433,8 +435,12 @@ void aa_param(CGTarget* t, const CGParamDesc* p) {
} else {
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
- aa64_emit32(t->mc, aa64_ldur_fp(sidx, 0, 29, (i32)(16 + caller_off)));
- aa64_emit32(t->mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off + (i32)part_off));
+ aa64_emit32(t->mc,
+ aa64_ldur_fp(sidx, AA_FP_TMP0, 29,
+ (i32)(16 + caller_off)));
+ aa64_emit32(t->mc,
+ aa64_stur_fp(sidx, AA_FP_TMP0, 29,
+ -(i32)s->off + (i32)part_off));
}
} else {
compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl",
diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h
@@ -8,6 +8,7 @@
#include "arch/aa64_isa.h"
#include "arch/aa64_regs.h"
#include "arch/arch.h"
+#include "arch/regalloc.h"
#include "core/arena.h"
#include "core/pool.h"
#include "obj/obj.h"
@@ -17,6 +18,16 @@
* ============================================================ */
#define AA64_NOP 0xD503201Fu
+
+/* Hidden backend temporaries. These must stay outside the allocable pools and
+ * outside optimizer scratch registers because CGTarget ops may clobber them
+ * while lowering a single operation. AA_FP_TMP0 names v31, not integer x31. */
+enum {
+ AA_TMP0 = 9u,
+ AA_TMP1 = 10u,
+ AA_TMP2 = 11u,
+ AA_FP_TMP0 = 31u,
+};
#define CG_BUILTIN_ID(k) ((CfreeCgTypeId)((1u << 6) | (u32)(k)))
static inline u32 aa64_stp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
@@ -191,13 +202,7 @@ static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
* RegPool
* ============================================================ */
-typedef struct RegPool {
- u32 free;
- u32 hwm;
- u8 base;
- u8 nregs;
- u8 pad[2];
-} RegPool;
+typedef CGSimpleRegPool RegPool;
/* ============================================================
* AAImpl types
diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c
@@ -53,13 +53,13 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
t->mc->set_section(t->mc, cur_section);
u32 adrp_pos = t->mc->pos(t->mc);
- aa64_emit32(t->mc, aa64_adrp_base(9));
+ aa64_emit32(t->mc, aa64_adrp_base(AA_TMP0));
t->mc->emit_reloc_at(t->mc, cur_section, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21,
sym, 0, 0, 0);
u32 ldr_pos = t->mc->pos(t->mc);
u32 sidx = (cb.size == 8) ? 3u : 2u;
- aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
+ aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), AA_TMP0, 0));
RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC
: R_AARCH64_LDST32_ABS_LO12_NC;
t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0);
@@ -137,30 +137,30 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
ObjSymId sym = addr.v.global.sym;
i64 add = addr.v.global.addend;
if (use_got_for_sym(t, sym)) {
- aa64_emit_got_load_addr(t, /*dst=*/9, sym);
+ aa64_emit_got_load_addr(t, AA_TMP0, sym);
if (dst.cls == RC_FP) {
- aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add));
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP0, (i32)add));
} else {
- aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add));
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP0, (i32)add));
}
return;
}
u32 adrp_pos = mc->pos(mc);
- aa64_emit32(mc, aa64_adrp_base(/*Rd=*/9));
+ aa64_emit32(mc, aa64_adrp_base(AA_TMP0));
mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
0, 0);
u32 ld_pos = mc->pos(mc);
if (dst.cls == RC_FP) {
- aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
+ aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), AA_TMP0, 0));
} else {
- aa64_emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), 9, 0));
+ aa64_emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), AA_TMP0, 0));
}
mc->emit_reloc_at(mc, sec, ld_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0);
return;
}
i32 off;
- u32 base = addr_base(t, addr, &off, 9);
+ u32 base = addr_base(t, addr, &off, AA_TMP0);
if (dst.cls == RC_FP) {
aa64_emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off));
} else {
@@ -182,15 +182,15 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
u32 src_is_fp = 0;
if (src.kind == OPK_IMM) {
u32 sf = (sz == 8) ? 1u : 0u;
- aa64_emit_load_imm(mc, sf, /*Rd=*/9, src.v.imm);
- src_reg = 9;
+ aa64_emit_load_imm(mc, sf, AA_TMP0, src.v.imm);
+ src_reg = AA_TMP0;
} else if (src.cls == RC_FP) {
src_reg = reg_num(src);
src_is_fp = 1;
} else {
src_reg = reg_num(src);
}
- u32 base = (src.kind == OPK_IMM) ? 10u : 9u;
+ u32 base = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0;
if (use_got_for_sym(t, sym)) {
aa64_emit_got_load_addr(t, base, sym);
if (src_is_fp) {
@@ -215,13 +215,13 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
}
i32 off;
- u32 addr_tmp = (src.kind == OPK_IMM) ? 10u : 9u;
+ u32 addr_tmp = (src.kind == OPK_IMM) ? AA_TMP1 : AA_TMP0;
u32 base = addr_base(t, addr, &off, addr_tmp);
if (src.kind == OPK_IMM) {
u32 sf = (sz == 8) ? 1u : 0u;
- aa64_emit_load_imm(t->mc, sf, 9, src.v.imm);
- aa64_emit32(t->mc, aa64_stur(sidx, 9, base, off));
+ aa64_emit_load_imm(t->mc, sf, AA_TMP0, src.v.imm);
+ aa64_emit32(t->mc, aa64_stur(sidx, AA_TMP0, base, off));
return;
}
if (src.cls == RC_FP) {
@@ -314,10 +314,10 @@ static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
return;
}
- aa64_emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9));
+ aa64_emit32(mc, aa64_mrs_tpidr_el0(AA_TMP0));
u32 hi_pos = mc->pos(mc);
- aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/9, /*imm12=*/0, /*sh=*/1));
+ aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, AA_TMP0, /*imm12=*/0, /*sh=*/1));
mc->emit_reloc_at(mc, sec, hi_pos, R_AARCH64_TLSLE_ADD_TPREL_HI12, sym,
addend, 0, 0);
@@ -347,28 +347,29 @@ static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
AggregateAccess agg) {
MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, 9);
- u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u);
+ u32 dr = agg_addr_reg(t, dst_addr, AA_TMP0);
+ u32 sr = agg_addr_reg(t, src_addr,
+ (dr == AA_TMP1) ? AA_TMP2 : AA_TMP1);
u32 nbytes = agg.size;
u32 i = 0;
while (i + 8 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP2, dr, (i32)i));
i += 8;
}
while (i + 4 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, AA_TMP2, dr, (i32)i));
i += 4;
}
while (i + 2 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(1, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, AA_TMP2, dr, (i32)i));
i += 2;
}
while (i < nbytes) {
- aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(0, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, AA_TMP2, dr, (i32)i));
i += 1;
}
}
@@ -376,7 +377,7 @@ static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
AggregateAccess agg) {
MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, 9);
+ u32 dr = agg_addr_reg(t, dst_addr, AA_TMP0);
u32 byte;
if (byte_value.kind == OPK_IMM) {
@@ -412,23 +413,23 @@ static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
b64 |= b64 << 8;
b64 |= b64 << 16;
b64 |= b64 << 32;
- aa64_emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64);
+ aa64_emit_load_imm(mc, /*sf=*/1u, AA_TMP1, (i64)b64);
u32 i = 0;
while (i + 8 <= nbytes) {
- aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP1, dr, (i32)i));
i += 8;
}
while (i + 4 <= nbytes) {
- aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, AA_TMP1, dr, (i32)i));
i += 4;
}
while (i + 2 <= nbytes) {
- aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, AA_TMP1, dr, (i32)i));
i += 2;
}
while (i < nbytes) {
- aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, AA_TMP1, dr, (i32)i));
i += 1;
}
}
@@ -440,7 +441,7 @@ static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
BitFieldAccess bf) {
MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 base = agg_addr_reg(t, record_addr, AA_TMP0);
u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
u32 sf = (storage_bytes == 8u) ? 1u : 0u;
u32 sidx = size_idx_for_bytes(storage_bytes);
@@ -460,17 +461,17 @@ static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
BitFieldAccess bf) {
MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 base = agg_addr_reg(t, record_addr, AA_TMP0);
u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
u32 sf = (storage_bytes == 8u) ? 1u : 0u;
u32 sidx = size_idx_for_bytes(storage_bytes);
- aa64_emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+ aa64_emit32(mc, aa64_ldur(sidx, AA_TMP1, base, (i32)bf.storage_offset));
u32 src_reg;
if (src.kind == OPK_IMM) {
- aa64_emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm);
- src_reg = 11u;
+ aa64_emit_load_imm(mc, sf, AA_TMP2, src.v.imm);
+ src_reg = AA_TMP2;
} else if (src.kind == OPK_REG) {
src_reg = reg_num(src);
} else {
@@ -484,9 +485,9 @@ static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
u32 width = bf.bit_width ? bf.bit_width : 1u;
u32 immr = (reg_size - lsb) % reg_size;
u32 imms = width - 1u;
- aa64_emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms));
+ aa64_emit32(mc, aa64_bfm(sf, AA_TMP1, src_reg, immr, imms));
- aa64_emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+ aa64_emit32(mc, aa64_stur(sidx, AA_TMP1, base, (i32)bf.storage_offset));
}
/* ============================================================
@@ -610,8 +611,9 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
}
}
- u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
- u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9);
+ u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
+ u32 rm =
+ aa64_force_reg_int(t, b_op, sf, (rn == AA_TMP0) ? AA_TMP1 : AA_TMP0);
u32 word;
switch (op) {
@@ -627,12 +629,12 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
case BO_UDIV: word = aa64_udiv(sf, rd, rn, rm); break;
case BO_SDIV: word = aa64_sdiv(sf, rd, rn, rm); break;
case BO_SREM:
- aa64_emit32(mc, aa64_sdiv(sf, 11, rn, rm));
- word = aa64_msub(sf, rd, 11, rm, rn);
+ aa64_emit32(mc, aa64_sdiv(sf, AA_TMP2, rn, rm));
+ word = aa64_msub(sf, rd, AA_TMP2, rm, rn);
break;
case BO_UREM:
- aa64_emit32(mc, aa64_udiv(sf, 11, rn, rm));
- word = aa64_msub(sf, rd, 11, rm, rn);
+ aa64_emit32(mc, aa64_udiv(sf, AA_TMP2, rn, rm));
+ word = aa64_msub(sf, rd, AA_TMP2, rm, rn);
break;
case BO_FADD:
case BO_FSUB:
@@ -649,7 +651,7 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
- u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 rn = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
u32 word;
switch (op) {
@@ -794,7 +796,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
if (!to_stack)
dst_reg = (*next_int)++;
else
- dst_reg = 9;
+ dst_reg = AA_TMP0;
if (av->storage.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot");
@@ -821,7 +823,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
if (pt->cls == ABI_CLASS_INT) {
int to_stack = (*next_int >= 8);
- u32 dst_reg = to_stack ? 9u : (*next_int)++;
+ u32 dst_reg = to_stack ? AA_TMP0 : (*next_int)++;
switch (av->storage.kind) {
case OPK_IMM: {
u32 sf = (sz == 8) ? 1u : 0u;
@@ -847,7 +849,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
src.v.ind.base = av->storage.v.ind.base;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
+ u32 base = addr_base(t, src, &off, AA_TMP0);
aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off));
break;
}
@@ -877,7 +879,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
src.v.ind.base = av->storage.v.ind.base;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
+ u32 base = addr_base(t, src, &off, AA_TMP0);
aa64_emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off));
break;
}
@@ -899,9 +901,9 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
src.v.ind.base = av->storage.v.ind.base;
src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
- aa64_emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off));
- aa64_emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off));
+ u32 base = addr_base(t, src, &off, AA_TMP0);
+ aa64_emit32(t->mc, aa64_ldur_fp(sidx, AA_FP_TMP0, base, off));
+ aa64_emit32(t->mc, aa64_stur_fp(sidx, AA_FP_TMP0, 31, (i32)*stack_off));
break;
}
default:
@@ -1033,23 +1035,23 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) {
u32 nbytes = s->size;
u32 i = 0;
while (i + 8 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(3, 9, 29, -(i32)s->off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP0, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, 8, i));
i += 8;
}
while (i + 4 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(2, 9, 29, -(i32)s->off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP0, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(2, AA_TMP0, 8, i));
i += 4;
}
while (i + 2 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(1, 9, 29, -(i32)s->off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(1, AA_TMP0, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(1, AA_TMP0, 8, i));
i += 2;
}
while (i < nbytes) {
- aa64_emit32(mc, aa64_ldur(0, 9, 29, -(i32)s->off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(0, AA_TMP0, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(0, AA_TMP0, 8, i));
i += 1;
}
} else if (val->storage.kind == OPK_INDIRECT) {
@@ -1066,23 +1068,23 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) {
i32 base_off = val->storage.v.ind.ofs;
u32 i = 0;
while (i + 8 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP0, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, 8, i));
i += 8;
}
while (i + 4 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP0, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(2, AA_TMP0, 8, i));
i += 4;
}
while (i + 2 <= nbytes) {
- aa64_emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(1, AA_TMP0, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(1, AA_TMP0, 8, i));
i += 2;
}
while (i < nbytes) {
- aa64_emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i));
- aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i));
+ aa64_emit32(mc, aa64_ldur(0, AA_TMP0, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(0, AA_TMP0, 8, i));
i += 1;
}
} else {
@@ -1167,10 +1169,10 @@ static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
aa64_emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=SP*/ 31, (u32)aligned, 0));
} else if (sz.kind == OPK_REG) {
u32 sz_reg = reg_num(sz);
- aa64_emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0));
- aa64_emit32(mc, aa64_ubfm(1, 9, 9, 4, 63));
- aa64_emit32(mc, aa64_ubfm(1, 9, 9, 60, 59));
- aa64_emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, 9));
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP0, sz_reg, 15u, 0));
+ aa64_emit32(mc, aa64_ubfm(1, AA_TMP0, AA_TMP0, 4, 63));
+ aa64_emit32(mc, aa64_ubfm(1, AA_TMP0, AA_TMP0, 60, 59));
+ aa64_emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, AA_TMP0));
} else {
compiler_panic(t->c, a->loc, "aarch64 alloca: size kind %d unsupported",
(int)sz.kind);
@@ -1223,21 +1225,23 @@ static void aa_va_start_(CGTarget* t, Operand ap_op) {
{
u32 ofs = 16u + a->next_param_stack;
if (ofs <= 0xfff)
- aa64_emit32(mc, aa64_add_imm(1, 9, 29, ofs, 0));
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP0, 29, ofs, 0));
else {
- aa64_emit_load_imm(mc, 1, 9, (i64)ofs);
- aa64_emit32(mc, aa64_add(1, 9, 29, 9));
+ aa64_emit_load_imm(mc, 1, AA_TMP0, (i64)ofs);
+ aa64_emit32(mc, aa64_add(1, AA_TMP0, 29, AA_TMP0));
}
- aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 0));
- }
- emit_fp_off(mc, 9, -(i32)gs->off + (i32)gs->size);
- aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 8));
- emit_fp_off(mc, 9, -(i32)fs->off + (i32)fs->size);
- aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 16));
- aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_int * 8u) - 64));
- aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 24));
- aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_fp * 16u) - 128));
- aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 28));
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 0));
+ }
+ emit_fp_off(mc, AA_TMP0, -(i32)gs->off + (i32)gs->size);
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 8));
+ emit_fp_off(mc, AA_TMP0, -(i32)fs->off + (i32)fs->size);
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 16));
+ aa64_emit_load_imm(mc, 0, AA_TMP0,
+ (i64)((i32)(a->next_param_int * 8u) - 64));
+ aa64_emit32(mc, aa64_str_uimm(2, AA_TMP0, ap, 24));
+ aa64_emit_load_imm(mc, 0, AA_TMP0,
+ (i64)((i32)(a->next_param_fp * 16u) - 128));
+ aa64_emit32(mc, aa64_str_uimm(2, AA_TMP0, ap, 28));
}
static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
@@ -1254,31 +1258,31 @@ static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
MCLabel L_stack = mc->label_new(mc);
MCLabel L_done = mc->label_new(mc);
- aa64_emit32(mc, aa64_ldur(2, 9, ap, (i32)offs_field));
- aa64_emit32(mc, aa64_subs_imm(0, 31, 9, 0));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP0, ap, (i32)offs_field));
+ aa64_emit32(mc, aa64_subs_imm(0, 31, AA_TMP0, 0));
aa64_emit32(mc, aa64_b_cond(0xa /*GE*/));
mc->emit_label_ref(mc, L_stack, R_AARCH64_CONDBR19, 4, 0);
- aa64_emit32(mc, aa64_ldur(3, 10, ap, (i32)top_field));
- aa64_emit32(mc, aa64_sbfm(1, 12, 9, 0, 31));
- aa64_emit32(mc, aa64_add(1, 11, 10, 12));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP1, ap, (i32)top_field));
+ aa64_emit32(mc, aa64_sbfm(1, AA_TMP2, AA_TMP0, 0, 31));
+ aa64_emit32(mc, aa64_add(1, AA_TMP2, AA_TMP1, AA_TMP2));
if (is_fp)
- aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 11, 0));
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP2, 0));
else
- aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 11, 0));
- aa64_emit32(mc, aa64_add_imm(0, 9, 9, stride_reg, 0));
- aa64_emit32(mc, aa64_stur(2, 9, ap, (i32)offs_field));
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP2, 0));
+ aa64_emit32(mc, aa64_add_imm(0, AA_TMP0, AA_TMP0, stride_reg, 0));
+ aa64_emit32(mc, aa64_stur(2, AA_TMP0, ap, (i32)offs_field));
aa64_emit32(mc, aa64_b_base());
mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0);
mc->label_place(mc, L_stack);
- aa64_emit32(mc, aa64_ldur(3, 10, ap, 0));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP1, ap, 0));
if (is_fp)
- aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 10, 0));
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP1, 0));
else
- aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 10, 0));
- aa64_emit32(mc, aa64_add_imm(1, 10, 10, 8u, 0));
- aa64_emit32(mc, aa64_stur(3, 10, ap, 0));
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP1, 0));
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP1, AA_TMP1, 8u, 0));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP1, ap, 0));
mc->label_place(mc, L_done);
}
@@ -1293,8 +1297,8 @@ static void aa_va_copy_(CGTarget* t, Operand d, Operand s) {
u32 dr = reg_num(d);
u32 sr = reg_num(s);
for (u32 i = 0; i < 32u; i += 8u) {
- aa64_emit32(mc, aa64_ldur(3, 9, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(3, 9, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP0, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, (i32)i));
}
}
@@ -1346,7 +1350,7 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
} else if (addr.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot");
- base = 9u;
+ base = AA_TMP0;
aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
} else {
compiler_panic(t->c, a->loc,
@@ -1369,7 +1373,7 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
u32 src_reg;
if (src.kind == OPK_IMM) {
- src_reg = 10u;
+ src_reg = AA_TMP1;
aa64_emit_load_imm(mc, sf, src_reg, src.v.imm);
} else if (src.kind == OPK_REG) {
src_reg = reg_num(src);
@@ -1384,7 +1388,7 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
} else if (addr.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot");
- base = 9u;
+ base = AA_TMP0;
aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
} else {
compiler_panic(t->c, a->loc,
@@ -1424,18 +1428,18 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
MCEmitter* mc = t->mc;
u32 sf = (ma.size == 8) ? 1u : 0u;
- u32 base = 9u;
+ u32 base = AA_TMP0;
if (addr.kind == OPK_REG) {
- aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
+ aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr)));
} else if (addr.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot");
- aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
+ aa64_emit32(mc, aa64_sub_imm(1, AA_TMP0, 29, s->off, 0));
} else {
compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported",
(int)addr.kind);
}
- u32 vreg = 10u;
+ u32 vreg = AA_TMP1;
if (val.kind == OPK_IMM) {
aa64_emit_load_imm(mc, sf, vreg, val.v.imm);
} else if (val.kind == OPK_REG) {
@@ -1456,15 +1460,15 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
else
aa64_emit32(mc, aa64_ldxr(sf, reg_num(dst), base));
- emit_rmw_combine(mc, op, sf, /*new=*/11u, /*prior=*/reg_num(dst), vreg);
+ emit_rmw_combine(mc, op, sf, AA_TMP2, reg_num(dst), vreg);
if (do_rel)
- aa64_emit32(mc, aa64_stlxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
+ aa64_emit32(mc, aa64_stlxr(sf, vreg, AA_TMP2, base));
else
- aa64_emit32(mc, aa64_stxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
+ aa64_emit32(mc, aa64_stxr(sf, vreg, AA_TMP2, base));
u32 cbnz_pos = mc->pos(mc);
- aa64_emit32(mc, aa64_cbnz(0, /*Rt=*/12u));
+ aa64_emit32(mc, aa64_cbnz(0, vreg));
mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
(void)cbnz_pos;
}
@@ -1477,28 +1481,28 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
u32 sf = (ma.size == 8) ? 1u : 0u;
(void)fail;
- u32 base = 9u;
+ u32 base = AA_TMP0;
if (addr.kind == OPK_REG)
- aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
+ aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(addr)));
else if (addr.kind == OPK_LOCAL) {
AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot");
- aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
+ aa64_emit32(mc, aa64_sub_imm(1, AA_TMP0, 29, s->off, 0));
} else {
compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported",
(int)addr.kind);
}
if (expected.kind == OPK_IMM)
- aa64_emit_load_imm(mc, sf, 10, expected.v.imm);
+ aa64_emit_load_imm(mc, sf, AA_TMP1, expected.v.imm);
else if (expected.kind == OPK_REG)
- aa64_emit32(mc, aa64_mov_reg(sf, 10, reg_num(expected)));
+ aa64_emit32(mc, aa64_mov_reg(sf, AA_TMP1, reg_num(expected)));
else
compiler_panic(t->c, a->loc, "aarch64 atomic_cas: exp kind %d unsupported",
(int)expected.kind);
if (desired.kind == OPK_IMM)
- aa64_emit_load_imm(mc, sf, 11, desired.v.imm);
+ aa64_emit_load_imm(mc, sf, AA_TMP2, desired.v.imm);
else if (desired.kind == OPK_REG)
- aa64_emit32(mc, aa64_mov_reg(sf, 11, reg_num(desired)));
+ aa64_emit32(mc, aa64_mov_reg(sf, AA_TMP2, reg_num(desired)));
else
compiler_panic(t->c, a->loc, "aarch64 atomic_cas: des kind %d unsupported",
(int)desired.kind);
@@ -1516,15 +1520,15 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
else
aa64_emit32(mc, aa64_ldxr(sf, reg_num(prior), base));
- aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), 10u));
+ aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), AA_TMP1));
aa64_emit32(mc, aa64_b_cond(0x1u /*NE*/));
mc->emit_label_ref(mc, L_fail, R_AARCH64_CONDBR19, 4, 0);
if (do_rel)
- aa64_emit32(mc, aa64_stlxr(sf, 12u, 11u, base));
+ aa64_emit32(mc, aa64_stlxr(sf, AA_TMP1, AA_TMP2, base));
else
- aa64_emit32(mc, aa64_stxr(sf, 12u, 11u, base));
- aa64_emit32(mc, aa64_cbnz(0, 12u));
+ aa64_emit32(mc, aa64_stxr(sf, AA_TMP1, AA_TMP2, base));
+ aa64_emit32(mc, aa64_cbnz(0, AA_TMP1));
mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
aa64_emit_load_imm(mc, 0, reg_num(ok), 1);
@@ -1597,12 +1601,12 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
Operand dst = dsts[0];
u32 sz_in = type_byte_size(src.type);
if (sz_in == 8)
- aa64_emit32(mc, aa64_fmov_d_x(0, reg_num(src)));
+ aa64_emit32(mc, aa64_fmov_d_x(AA_FP_TMP0, reg_num(src)));
else
- aa64_emit32(mc, aa64_fmov_s_w(0, reg_num(src)));
- aa64_emit32(mc, aa64_cnt_8b(0, 0));
- aa64_emit32(mc, aa64_addv_b_8b(0, 0));
- aa64_emit32(mc, aa64_fmov_w_s(reg_num(dst), 0));
+ aa64_emit32(mc, aa64_fmov_s_w(AA_FP_TMP0, reg_num(src)));
+ aa64_emit32(mc, aa64_cnt_8b(AA_FP_TMP0, AA_FP_TMP0));
+ aa64_emit32(mc, aa64_addv_b_8b(AA_FP_TMP0, AA_FP_TMP0));
+ aa64_emit32(mc, aa64_fmov_w_s(reg_num(dst), AA_FP_TMP0));
return;
}
case INTRIN_CLZ: {
@@ -1646,46 +1650,46 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
if (kind == INTRIN_MEMCPY) {
u32 i = 0;
while (i + 8 <= n) {
- aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP2, dr, (i32)i));
i += 8;
}
while (i + 4 <= n) {
- aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, AA_TMP2, dr, (i32)i));
i += 4;
}
while (i + 2 <= n) {
- aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(1, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, AA_TMP2, dr, (i32)i));
i += 2;
}
while (i < n) {
- aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(0, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, AA_TMP2, dr, (i32)i));
i += 1;
}
} else {
u32 i = n;
while (i >= 8) {
i -= 8;
- aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP2, dr, (i32)i));
}
while (i >= 4) {
i -= 4;
- aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(2, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, AA_TMP2, dr, (i32)i));
}
while (i >= 2) {
i -= 2;
- aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(1, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, AA_TMP2, dr, (i32)i));
}
while (i >= 1) {
i -= 1;
- aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ aa64_emit32(mc, aa64_ldur(0, AA_TMP2, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, AA_TMP2, dr, (i32)i));
}
}
return;
@@ -1710,13 +1714,13 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
b64 |= b64 << 8;
b64 |= b64 << 16;
b64 |= b64 << 32;
- aa64_emit_load_imm(mc, 1, 12, (i64)b64);
- src_reg = 12u;
+ aa64_emit_load_imm(mc, 1, AA_TMP2, (i64)b64);
+ src_reg = AA_TMP2;
}
} else if (bv.kind == OPK_REG) {
- aa64_emit_load_imm(mc, 1, 12, (i64)0x0101010101010101ll);
- aa64_emit32(mc, aa64_madd(1, 12, reg_num(bv), 12, AA64_ZR));
- src_reg = 12u;
+ aa64_emit_load_imm(mc, 1, AA_TMP2, (i64)0x0101010101010101ll);
+ aa64_emit32(mc, aa64_madd(1, AA_TMP2, reg_num(bv), AA_TMP2, AA64_ZR));
+ src_reg = AA_TMP2;
} else {
compiler_panic(t->c, a->loc,
"aarch64 intrinsic: memset byte kind %d unsupported",
@@ -1779,8 +1783,10 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
Operand a_op = args[0], b_op = args[1];
Operand dval = dsts[0], dovf = dsts[1];
u32 sf = type_is_64(dval.type) ? 1u : 0u;
- u32 ra = aa64_force_reg_int(t, a_op, sf, 9);
- u32 rb = aa64_force_reg_int(t, b_op, sf, (ra == 9) ? 10u : 9u);
+ u32 ra = aa64_force_reg_int(t, a_op, sf, AA_TMP0);
+ u32 rb =
+ aa64_force_reg_int(t, b_op, sf,
+ (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0);
u32 word = (kind == INTRIN_ADD_OVERFLOW)
? aa64_adds_reg(sf, reg_num(dval), ra, rb)
: aa64_subs_reg(sf, reg_num(dval), ra, rb);
@@ -1797,12 +1803,14 @@ static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
t->c, a->loc,
"aarch64 intrinsic: mul_overflow on i64 not yet supported");
}
- u32 ra = aa64_force_reg_int(t, a_op, 0, 9);
- u32 rb = aa64_force_reg_int(t, b_op, 0, (ra == 9) ? 10u : 9u);
- aa64_emit32(mc, aa64_smull(/*X*/ 11u, ra, rb));
- aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, /*Xn=*/11u, /*Wm=*/11u));
+ u32 ra = aa64_force_reg_int(t, a_op, 0, AA_TMP0);
+ u32 rb =
+ aa64_force_reg_int(t, b_op, 0,
+ (ra == AA_TMP0) ? AA_TMP1 : AA_TMP0);
+ aa64_emit32(mc, aa64_smull(AA_TMP2, ra, rb));
+ aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, AA_TMP2, AA_TMP2));
aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/));
- aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), 11u));
+ aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), AA_TMP2));
return;
}
default:
diff --git a/src/arch/aarch64/opt_coord.c b/src/arch/aarch64/opt_coord.c
@@ -79,8 +79,7 @@ static void aa_reserve_hard_regs(CGTarget* t, RegClass cls,
default: return;
}
for (u32 i = 0; i < n; ++i) {
- u32 idx = (u32)(regs[i] - p->base);
- if (idx < p->nregs && idx + 1u > p->hwm) p->hwm = idx + 1u;
+ cg_simple_regpool_reserve(p, regs[i]);
}
}
diff --git a/src/arch/regalloc.c b/src/arch/regalloc.c
@@ -0,0 +1,92 @@
+#include "arch/regalloc.h"
+
+#include <string.h>
+
+static u32 pool_mask(u32 nregs) {
+ if (nregs > CG_SIMPLE_REGALLOC_MAX_REGS) nregs = CG_SIMPLE_REGALLOC_MAX_REGS;
+ return (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+}
+
+void cg_simple_regpool_init_range(CGSimpleRegPool* p, Reg base, u32 nregs) {
+ if (nregs > CG_SIMPLE_REGALLOC_MAX_REGS) nregs = CG_SIMPLE_REGALLOC_MAX_REGS;
+ p->free = pool_mask(nregs);
+ p->hwm = 0;
+ p->order = NULL;
+ p->base = base;
+ p->nregs = nregs;
+}
+
+void cg_simple_regpool_init_ordered(CGSimpleRegPool* p, const Reg* regs,
+ u32 nregs) {
+ if (nregs > CG_SIMPLE_REGALLOC_MAX_REGS) nregs = CG_SIMPLE_REGALLOC_MAX_REGS;
+ p->free = pool_mask(nregs);
+ p->hwm = 0;
+ p->order = regs;
+ p->base = 0;
+ p->nregs = nregs;
+}
+
+Reg cg_simple_regpool_reg_at(const CGSimpleRegPool* p, u32 idx) {
+ if (idx >= p->nregs) return (Reg)REG_NONE;
+ return p->order ? p->order[idx] : (Reg)(p->base + idx);
+}
+
+Reg cg_simple_regpool_alloc(CGSimpleRegPool* p) {
+ if (p->free == 0) return (Reg)REG_NONE;
+ u32 idx = (u32)__builtin_ctz(p->free);
+ p->free &= ~(1u << idx);
+ if (idx + 1u > p->hwm) p->hwm = idx + 1u;
+ return cg_simple_regpool_reg_at(p, idx);
+}
+
+int cg_simple_regpool_free(CGSimpleRegPool* p, Reg r) {
+ for (u32 i = 0; i < p->nregs; ++i) {
+ if (cg_simple_regpool_reg_at(p, i) == r) {
+ u32 bit = 1u << i;
+ if (p->free & bit) return -1;
+ p->free |= bit;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void cg_simple_regpool_reserve(CGSimpleRegPool* p, Reg r) {
+ for (u32 i = 0; i < p->nregs; ++i) {
+ if (cg_simple_regpool_reg_at(p, i) == r) {
+ if (i + 1u > p->hwm) p->hwm = i + 1u;
+ return;
+ }
+ }
+}
+
+void cg_simple_regalloc_init(CGSimpleRegAlloc* a) {
+ memset(a, 0, sizeof *a);
+}
+
+void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base,
+ u32 nregs) {
+ if ((u32)cls >= 3u) return;
+ cg_simple_regpool_init_range(&a->pools[cls], base, nregs);
+}
+
+void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls,
+ const Reg* regs, u32 nregs) {
+ if ((u32)cls >= 3u) return;
+ cg_simple_regpool_init_ordered(&a->pools[cls], regs, nregs);
+}
+
+Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls) {
+ if ((u32)cls >= 3u) return (Reg)REG_NONE;
+ return cg_simple_regpool_alloc(&a->pools[cls]);
+}
+
+int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r) {
+ if ((u32)cls >= 3u) return -2;
+ return cg_simple_regpool_free(&a->pools[cls], r);
+}
+
+void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r) {
+ if ((u32)cls >= 3u) return;
+ cg_simple_regpool_reserve(&a->pools[cls], r);
+}
diff --git a/src/arch/regalloc.h b/src/arch/regalloc.h
@@ -0,0 +1,37 @@
+#ifndef CFREE_ARCH_REGALLOC_H
+#define CFREE_ARCH_REGALLOC_H
+
+#include "arch/arch.h"
+
+#define CG_SIMPLE_REGALLOC_MAX_REGS 32u
+
+typedef struct CGSimpleRegPool {
+ u32 free; /* bit i set iff reg_at(i) is free */
+ u32 hwm; /* highest index+1 ever allocated/reserved */
+ const Reg* order; /* optional ordered hard-reg table */
+ Reg base; /* used when order is NULL: reg_at(i) = base + i */
+ u32 nregs;
+} CGSimpleRegPool;
+
+typedef struct CGSimpleRegAlloc {
+ CGSimpleRegPool pools[3]; /* indexed by RegClass */
+} CGSimpleRegAlloc;
+
+void cg_simple_regpool_init_range(CGSimpleRegPool* p, Reg base, u32 nregs);
+void cg_simple_regpool_init_ordered(CGSimpleRegPool* p, const Reg* regs,
+ u32 nregs);
+Reg cg_simple_regpool_alloc(CGSimpleRegPool* p);
+int cg_simple_regpool_free(CGSimpleRegPool* p, Reg r);
+void cg_simple_regpool_reserve(CGSimpleRegPool* p, Reg r);
+Reg cg_simple_regpool_reg_at(const CGSimpleRegPool* p, u32 idx);
+
+void cg_simple_regalloc_init(CGSimpleRegAlloc* a);
+void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base,
+ u32 nregs);
+void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls,
+ const Reg* regs, u32 nregs);
+Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls);
+int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r);
+void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r);
+
+#endif
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -5,6 +5,7 @@
#include <string.h>
#include "arch/arch.h"
+#include "arch/regalloc.h"
#include "arch/rv64.h"
#include "arch/rv64_isa.h"
#include "core/arena.h"
@@ -14,13 +15,7 @@
#define RV_PROLOGUE_WORDS 32u
/* ---- RegPool ---- */
-typedef struct RegPool {
- u32 free;
- u32 hwm;
- u8 base;
- u8 nregs;
- u8 pad[2];
-} RegPool;
+typedef CGSimpleRegPool RegPool;
/* ---- RvSlot / RvScope ---- */
typedef struct RvSlot {
@@ -115,26 +110,13 @@ static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
/* ---- RegPool ops (inlined — identical in each caller) ---- */
static inline void regpool_init(RegPool* p, u8 base, u8 nregs) {
- p->base = base;
- p->nregs = nregs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+ cg_simple_regpool_init_range(p, base, nregs);
}
static inline Reg regpool_alloc(RegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)(p->base + idx);
+ return cg_simple_regpool_alloc(p);
}
static inline int regpool_free(RegPool* p, Reg r) {
- u32 rn = (u32)r;
- if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
- u32 idx = rn - p->base;
- u32 bit = 1u << idx;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
+ return cg_simple_regpool_free(p, r);
}
/* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */
diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c
@@ -78,8 +78,7 @@ static void rv_reserve_hard_regs(CGTarget* t, RegClass cls,
default: return;
}
for (u32 i = 0; i < n; ++i) {
- u32 idx = (u32)(regs[i] - p->base);
- if (idx < p->nregs && idx + 1u > p->hwm) p->hwm = idx + 1u;
+ cg_simple_regpool_reserve(p, regs[i]);
}
}
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -19,32 +19,16 @@
/* ============================================================
* XRegPool implementation. */
-void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) {
- p->order = order;
- p->nregs = nregs;
- p->n_cs = n_cs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+void xpool_init(XRegPool* p, const Reg* order, u32 nregs) {
+ cg_simple_regpool_init_ordered(p, order, nregs);
}
static Reg xpool_alloc(XRegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)p->order[idx];
+ return cg_simple_regpool_alloc(p);
}
static int xpool_free(XRegPool* p, Reg r) {
- for (u8 i = 0; i < p->nregs; ++i) {
- if (p->order[i] == (u8)r) {
- u32 bit = 1u << i;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
- }
- }
- return 0;
+ return cg_simple_regpool_free(p, r);
}
/* ============================================================
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -18,12 +18,12 @@
/* ============================================================
* Shared constant tables. */
-const u8 g_int_order[6] = {
+const Reg g_int_order[6] = {
X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */
X64_R10, /* caller-saved tail */
};
-const u8 g_fp_order[10] = {
+const Reg g_fp_order[10] = {
/* All xmm regs are caller-saved on SysV; preference order is xmm6
* upward to keep the low arg/return regs (xmm0..5) clear for calls. */
X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10,
@@ -478,8 +478,8 @@ void x_func_begin(CGTarget* t, const CGFuncDesc* fd) {
a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
- xpool_init(&a->int_pool, g_int_order, 6u, 5u);
- xpool_init(&a->fp_pool, g_fp_order, 10u, 0u);
+ xpool_init(&a->int_pool, g_int_order, 6u);
+ xpool_init(&a->fp_pool, g_fp_order, 10u);
a->nslots = 0;
a->nscopes = 0;
a->nalloca_patches = 0;
@@ -538,7 +538,7 @@ void x_func_end(CGTarget* t) {
MCEmitter* mc = t->mc;
u32 cs_used = a->int_pool.hwm;
- if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs;
+ if (cs_used > 5u) cs_used = 5u;
u32 cs_size = cs_used * 8u;
/* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -13,6 +13,7 @@
#include <string.h>
#include "arch/arch.h"
+#include "arch/regalloc.h"
#include "arch/x64.h"
#include "arch/x64_isa.h"
#include "core/arena.h"
@@ -24,14 +25,7 @@
/* ============================================================
* Custom register pool. */
-typedef struct XRegPool {
- u32 free; /* bit i set ⇔ alloc_order[i] is free */
- u32 hwm; /* highest index+1 ever allocated */
- const u8* order; /* alloc_order; first n_cs are callee-saved */
- u8 nregs;
- u8 n_cs;
- u8 pad[2];
-} XRegPool;
+typedef CGSimpleRegPool XRegPool;
/* ============================================================
* XImpl and friends. */
@@ -137,8 +131,8 @@ static inline _Noreturn void x_panic(CGTarget* t, const char* what) {
/* ============================================================
* Shared constant tables (defined in alloc.c, used in emit.c and ops.c). */
-extern const u8 g_int_order[6];
-extern const u8 g_fp_order[10];
+extern const Reg g_int_order[6];
+extern const Reg g_fp_order[10];
extern const u32 g_int_arg_regs[6];
/* ============================================================
@@ -196,7 +190,7 @@ void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
u32 src);
/* --- alloc.c exports (used by emit.c and/or ops.c) --- */
-void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs);
+void xpool_init(XRegPool* p, const Reg* order, u32 nregs);
XSlot* x64_slot_get(XImpl* a, FrameSlot fs);
FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
Reg x_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty);
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -16,11 +16,11 @@ static void x_get_allocable_regs(CGTarget* t, RegClass cls,
XImpl* a = impl_of(t);
switch (cls) {
case RC_INT:
- *out = (const Reg*)a->int_pool.order;
+ *out = a->int_pool.order;
*nregs = a->int_pool.nregs;
break;
case RC_FP:
- *out = (const Reg*)a->fp_pool.order;
+ *out = a->fp_pool.order;
*nregs = a->fp_pool.nregs;
break;
default:
@@ -75,12 +75,7 @@ static void x_reserve_hard_regs(CGTarget* t, RegClass cls,
default: return;
}
for (u32 i = 0; i < n; ++i) {
- for (u8 j = 0; j < p->nregs; ++j) {
- if (p->order[j] == (u8)regs[i]) {
- if (j + 1u > p->hwm) p->hwm = j + 1u;
- break;
- }
- }
+ cg_simple_regpool_reserve(p, regs[i]);
}
}