commit 3e695b43cadf39b052bc1d70f402d5fdfdf49292
parent ed3b81da82f373155f1790929e3c7a7d96cf2b62
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 14:36:53 -0700
cg/aa64: implement Groups D, E, F (control flow, conv, memory)
Brings the AArch64 CGTarget through the full cg test corpus:
- D: cmp/cmp_branch via SUBS XZR + CSET/B.cond, with a CmpOp→ARM cond
table; labels passthrough to MCEmitter; SCOPE_IF via a per-function
AAScope table tracking else/end labels. mc.c learns to apply
R_AARCH64_CONDBR19 fixups for B.cond's imm19 displacement.
- E: convert covers SEXT/ZEXT (SBFM/UBFM), TRUNC (W-view MOV), ITOF_S/U
(SCVTF/UCVTF), FTOI_U (FCVTZU), FEXT/FTRUNC (FCVT S↔D), and BITCAST
(FMOV between GPR and FP, single and double).
- F: copy_bytes/set_bytes as 8/4/2/1-byte unrolled LDUR/STUR loops
(XZR fast-path for zero-fill); bitfield_load uses LDUR + UBFX/SBFX,
bitfield_store is a read-modify-write with BFI.
All 288 cg cases (72 × D/R/E/J) now pass.
Diffstat:
2 files changed, 454 insertions(+), 18 deletions(-)
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -130,6 +130,53 @@ static inline u32 aa64_cset_eq(u32 sf, u32 Rd)
* sf: 0=W, 1=X. type: 0=S, 1=D. */
static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn)
{ return 0x1E380000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn)
+{ return 0x1E390000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn)
+{ return 0x1E220000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn)
+{ return 0x1E230000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
+/* FCVT — between FP precisions. S→D widens; D→S narrows. */
+static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn)
+{ return 0x1E22C000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn)
+{ return 0x1E624000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
+/* FMOV between FP and GPR (BITCAST). */
+static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) /* GPR→FP, single */
+{ return 0x1E270000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) /* FP→GPR, single */
+{ return 0x1E260000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) /* GPR→FP, double */
+{ return 0x9E670000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) /* FP→GPR, double */
+{ return 0x9E660000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
+/* SUBS shifted register (Rd=ZR encodes CMP). */
+static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return 0x6B000000u | (sf<<31) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
+/* B.cond — imm19 at bits 5..23 left as zero; patched by linker / MCEmitter. */
+static inline u32 aa64_b_cond(u32 cond)
+{ return 0x54000000u | (cond & 0xfu); }
+
+/* CSINC Rd, Rn, Rm, cond (CSEL family with op2=01). CSET Rd, cond
+ * is CSINC Rd, ZR, ZR, !cond. */
+static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond)
+{ return 0x1A800400u | (sf<<31) | ((Rm&0x1f)<<16) | ((cond&0xfu)<<12) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond)
+{ return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u); }
+
+/* SBFM / UBFM / BFM (bitfield move family).
+ * sf opc(2) 100110 N immr(6) imms(6) Rn(5) Rd(5)
+ * opc: 00=SBFM, 01=BFM, 10=UBFM. N must equal sf. */
+static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms)
+{ return 0x13000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms)
+{ return 0x53000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms)
+{ return 0x33000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
/* ============================================================
* AAImpl
@@ -145,6 +192,14 @@ typedef struct AASlot {
u8 pad[3];
} AASlot;
+typedef struct AAScope {
+ u8 kind; /* ScopeKind */
+ u8 has_else;
+ u8 pad[2];
+ MCLabel else_label; /* SCOPE_IF: false branch target / end-of-then */
+ MCLabel end_label; /* SCOPE_IF: join point past the whole if/else */
+} AAScope;
+
typedef struct AAImpl {
CGTarget base;
SrcLoc loc;
@@ -172,6 +227,13 @@ typedef struct AAImpl {
/* Reg allocator (callee-saved prefix). */
u32 used_int; /* x19 + i, i in [0, used_int) */
u32 used_fp; /* v8 + i, i in [0, used_fp ) */
+
+ /* Structured-scope stack. Entries are not popped — IDs returned to
+ * the caller are stable indices into this array for the lifetime
+ * of the function. nscopes is reset at func_begin. */
+ AAScope* scopes;
+ u32 nscopes;
+ u32 scopes_cap;
} AAImpl;
static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
@@ -179,6 +241,7 @@ static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
/* Forward decls used before definition. */
static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d);
static AASlot* slot_get(AAImpl* a, FrameSlot fs);
+static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch);
/* ---- helpers ---- */
@@ -341,6 +404,7 @@ static void aa_func_begin(CGTarget* t, const CGFuncDesc* fd)
a->used_int = 0;
a->used_fp = 0;
a->nslots = 0;
+ a->nscopes = 0;
a->sret_ptr_slot = FRAME_SLOT_NONE;
a->epilogue_label = mc->label_new(mc);
@@ -598,18 +662,151 @@ static const Reg* aa_clobbers (CGTarget* t, RegClass c, u32* n) { (void)c;
static void aa_spill_reg (CGTarget* t, Operand s, FrameSlot f, MemAccess m) { (void)s; (void)f; (void)m; aa_panic(t, "spill_reg"); }
static void aa_reload_reg(CGTarget* t, Operand d, FrameSlot f, MemAccess m) { (void)d; (void)f; (void)m; aa_panic(t, "reload_reg"); }
-/* ---- labels / control flow (deferred for D-group; ret uses internal label) ---- */
+/* ---- labels / control flow ----
+ *
+ * Label is a transparent wrapper around MCLabel — the MCEmitter already
+ * tracks placement and applies pending fixups. Jumps emit a B with
+ * imm26=0 paired with R_AARCH64_JUMP26; conditional branches emit a
+ * B.cond with imm19=0 paired with R_AARCH64_CONDBR19. */
+
+static Label aa_label_new(CGTarget* t)
+{
+ return (Label)t->mc->label_new(t->mc);
+}
+
+static void aa_label_place(CGTarget* t, Label l)
+{
+ t->mc->label_place(t->mc, (MCLabel)l);
+}
+
+static void aa_jump(CGTarget* t, Label l)
+{
+ MCEmitter* mc = t->mc;
+ emit32(mc, aa64_b_base());
+ mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0);
+}
+
+/* Map CmpOp → AArch64 condition code. Boolean (i1) "true" means take the
+ * branch / set 1. */
+static u32 cmp_to_cond(CmpOp op)
+{
+ switch (op) {
+ case CMP_EQ: return 0x0u; /* EQ */
+ case CMP_NE: return 0x1u; /* NE */
+ case CMP_LT_U: return 0x3u; /* CC/LO */
+ case CMP_LE_U: return 0x9u; /* LS */
+ case CMP_GT_U: return 0x8u; /* HI */
+ case CMP_GE_U: return 0x2u; /* CS/HS */
+ case CMP_LT_S: return 0xbu; /* LT */
+ case CMP_LE_S: return 0xdu; /* LE */
+ case CMP_GT_S: return 0xcu; /* GT */
+ case CMP_GE_S: return 0xau; /* GE */
+ /* FP compares route through FCMP, not yet exercised here. */
+ default: return 0x0u;
+ }
+}
+
+/* Emit CMP a, b (= SUBS ZR, a, b). Materializes IMM operands through
+ * scratch x9/x10. Width comes from `a`; signedness lives in the cond. */
+static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op)
+{
+ MCEmitter* mc = t->mc;
+ u32 sf = type_is_64(a_op.type) ? 1u : 0u;
+ /* Special-case CMP Rn, #0 so a literal zero compare doesn't need
+ * a scratch register. */
+ if (b_op.kind == OPK_IMM && b_op.v.imm == 0 && a_op.kind == OPK_REG) {
+ emit32(mc, aa64_subs_imm(sf, /*Rd=ZR*/31u, reg_num(a_op), 0));
+ return;
+ }
+ u32 rn = force_reg_int(t, a_op, sf, 9);
+ u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u);
+ emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/31u, rn, rm));
+}
+
+static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l)
+{
+ MCEmitter* mc = t->mc;
+ emit_cmp_ab(t, a, b);
+ emit32(mc, aa64_b_cond(cmp_to_cond(op)));
+ mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0);
+}
+
+static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b)
+{
+ emit_cmp_ab(t, a, b);
+ u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
+ emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op)));
+}
+
+/* ---- structured scopes (SCOPE_IF only for v1) ---- */
-static Label aa_label_new (CGTarget* t) { aa_panic(t, "label_new"); }
-static void aa_label_place(CGTarget* t, Label l) { (void)l; aa_panic(t, "label_place"); }
-static void aa_jump (CGTarget* t, Label l) { (void)l; aa_panic(t, "jump"); }
-static void aa_cmp_branch (CGTarget* t, CmpOp op, Operand a, Operand b, Label l) { (void)op;(void)a;(void)b;(void)l; aa_panic(t, "cmp_branch"); }
+static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d)
+{
+ AAImpl* a = impl_of(t);
+ if (a->nscopes == a->scopes_cap) {
+ u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
+ AAScope* nb = arena_array(t->c->tu, AAScope, ncap);
+ if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes);
+ a->scopes = nb;
+ a->scopes_cap = ncap;
+ }
+ AAScope* sc = &a->scopes[a->nscopes];
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
+
+ if (d->kind == SCOPE_IF) {
+ /* Test cond against zero, branch to else_label on EQ (false). */
+ u32 sf = type_is_64(d->cond.type) ? 1u : 0u;
+ u32 rn = force_reg_int(t, d->cond, sf, 9);
+ emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/31u, rn, 0));
+ emit32(t->mc, aa64_b_cond(0x0u /*EQ*/));
+ t->mc->emit_label_ref(t->mc, sc->else_label,
+ R_AARCH64_CONDBR19, 4, 0);
+ } else {
+ /* BLOCK / LOOP not yet exercised by the corpus. */
+ compiler_panic(t->c, a->loc,
+ "aarch64 scope_begin: kind %d not yet implemented", (int)d->kind);
+ }
-static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) { (void)d; aa_panic(t, "scope_begin"); }
-static void aa_scope_else (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "scope_else"); }
-static void aa_scope_end (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "scope_end"); }
-static void aa_break_to (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "break_to"); }
-static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_panic(t, "continue_to"); }
+ a->nscopes++;
+ return (CGScope)a->nscopes; /* 1-based */
+}
+
+static void aa_scope_else(CGTarget* t, CGScope s)
+{
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ /* End of the then-arm: jump past the else body. */
+ emit32(t->mc, aa64_b_base());
+ t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0);
+ /* Begin of the else-arm. */
+ t->mc->label_place(t->mc, sc->else_label);
+ sc->has_else = 1;
+}
+
+static void aa_scope_end(CGTarget* t, CGScope s)
+{
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ if (sc->kind == SCOPE_IF && !sc->has_else) {
+ /* No else body — false-branch lands at scope_end. */
+ t->mc->label_place(t->mc, sc->else_label);
+ }
+ t->mc->label_place(t->mc, sc->end_label);
+}
+
+static void aa_break_to (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "break_to"); }
+static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_panic(t, "continue_to"); }
/* ---- data movement ---- */
@@ -779,10 +976,155 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv)
}
static void aa_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) { (void)d;(void)s;(void)a; aa_panic(t, "tls_addr_of"); }
-static void aa_copy_bytes(CGTarget* t, Operand d, Operand s, AggregateAccess g) { (void)d;(void)s;(void)g; aa_panic(t, "copy_bytes"); }
-static void aa_set_bytes (CGTarget* t, Operand d, Operand b, AggregateAccess g) { (void)d;(void)b;(void)g; aa_panic(t, "set_bytes"); }
-static void aa_bitfield_load (CGTarget* t, Operand d, Operand a, BitFieldAccess f) { (void)d;(void)a;(void)f; aa_panic(t, "bitfield_load"); }
-static void aa_bitfield_store(CGTarget* t, Operand a, Operand s, BitFieldAccess f) { (void)a;(void)s;(void)f; aa_panic(t, "bitfield_store"); }
+
+/* Resolve a dst/src address operand for the aggregate ops below.
+ * Accepts OPK_REG (already a pointer) and OPK_LOCAL (= fp - off);
+ * for OPK_LOCAL we materialize the address into a scratch register. */
+static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch)
+{
+ if (op.kind == OPK_REG) return reg_num(op);
+ if (op.kind == OPK_LOCAL) {
+ AAImpl* a = impl_of(t);
+ AASlot* s = slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot");
+ emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0));
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 agg: address kind %d unsupported", (int)op.kind);
+}
+
+static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
+ AggregateAccess agg)
+{
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, 9);
+ u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u);
+ u32 nbytes = agg.size;
+ u32 i = 0;
+ /* Unrolled per-element copy through scratch x12. We use unscaled
+ * LDUR/STUR so we don't depend on `agg.align` for legality. */
+ while (i + 8 <= nbytes) {
+ emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
+ emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
+ emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
+ emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
+ emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ i += 1;
+ }
+}
+
+static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
+ AggregateAccess agg)
+{
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, 9);
+
+ u32 byte;
+ if (byte_value.kind == OPK_IMM) {
+ byte = (u32)(byte_value.v.imm & 0xffu);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 set_bytes: REG byte not yet supported");
+ }
+ u32 nbytes = agg.size;
+
+ if (byte == 0) {
+ /* Use XZR/WZR directly — no broadcast register needed. */
+ u32 i = 0;
+ while (i + 8 <= nbytes) { emit32(mc, aa64_stur(3, 31, dr, (i32)i)); i += 8; }
+ while (i + 4 <= nbytes) { emit32(mc, aa64_stur(2, 31, dr, (i32)i)); i += 4; }
+ while (i + 2 <= nbytes) { emit32(mc, aa64_stur(1, 31, dr, (i32)i)); i += 2; }
+ while (i < nbytes) { emit32(mc, aa64_stur(0, 31, dr, (i32)i)); i += 1; }
+ return;
+ }
+
+ /* Broadcast byte into x12 then strided-store. */
+ u64 b64 = byte;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64);
+
+ u32 i = 0;
+ while (i + 8 <= nbytes) { emit32(mc, aa64_stur(3, 12, dr, (i32)i)); i += 8; }
+ while (i + 4 <= nbytes) { emit32(mc, aa64_stur(2, 12, dr, (i32)i)); i += 4; }
+ while (i + 2 <= nbytes) { emit32(mc, aa64_stur(1, 12, dr, (i32)i)); i += 2; }
+ while (i < nbytes) { emit32(mc, aa64_stur(0, 12, dr, (i32)i)); i += 1; }
+}
+
+static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
+ BitFieldAccess bf)
+{
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ u32 sf = (storage_bytes == 8u) ? 1u : 0u;
+ u32 sidx = size_idx_for_bytes(storage_bytes);
+ u32 rd = reg_num(dst);
+
+ /* Load the entire storage unit, then extract bf.bit_width bits at
+ * bf.bit_offset. UBFX (zero-extend) or SBFX (sign-extend) per the
+ * field's signedness. */
+ emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset));
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 imms = lsb + width - 1u;
+ if (bf.signed_) {
+ emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms));
+ } else {
+ emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms));
+ }
+}
+
+static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
+ BitFieldAccess bf)
+{
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ u32 sf = (storage_bytes == 8u) ? 1u : 0u;
+ u32 sidx = size_idx_for_bytes(storage_bytes);
+
+ /* Read-modify-write through scratch registers x10 (storage) and x11
+ * (the source value). */
+ emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+
+ u32 src_reg;
+ if (src.kind == OPK_IMM) {
+ emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm);
+ src_reg = 11u;
+ } else if (src.kind == OPK_REG) {
+ src_reg = reg_num(src);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 bitfield_store: src kind %d unsupported", (int)src.kind);
+ }
+
+ /* BFI Rd, Rn, #lsb, #width — insert width bits of Rn[0..width-1]
+ * starting at bit lsb of Rd. Encoded as BFM with
+ * immr = (RegSize - lsb) mod RegSize, imms = width - 1. */
+ u32 reg_size = sf ? 64u : 32u;
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 immr = (reg_size - lsb) % reg_size;
+ u32 imms = width - 1u;
+ emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms));
+
+ emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+}
/* ---- arithmetic ---- */
@@ -865,20 +1207,97 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op)
emit32(mc, word);
}
-static void aa_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, Operand b)
-{ (void)op;(void)d;(void)a;(void)b; aa_panic(t, "cmp"); }
-
static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src)
{
AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = reg_num(dst);
+ u32 rn = reg_num(src);
+
switch (k) {
+ case CV_SEXT: {
+ if (src.cls != RC_INT || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes");
+ }
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
+ emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
+ return;
+ }
+ case CV_ZEXT: {
+ if (src.cls != RC_INT || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes");
+ }
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ if (src_bits == 32u) {
+ /* MOV Wd, Wn auto-zero-extends into the X register. */
+ emit32(mc, aa64_mov_reg(0, rd, rn));
+ } else {
+ emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
+ }
+ return;
+ }
+ case CV_TRUNC: {
+ /* Reading the W view of any X register zeros the upper 32 bits.
+ * For narrower truncations the consumer (store / ret) selects
+ * the byte width — leaving extra high bits is harmless. */
+ emit32(mc, aa64_mov_reg(0, rd, rn));
+ return;
+ }
+ case CV_ITOF_S: {
+ u32 sf_src = type_is_64(src.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ emit32(mc, aa64_scvtf(sf_src, type, rd, rn));
+ return;
+ }
+ case CV_ITOF_U: {
+ u32 sf_src = type_is_64(src.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ emit32(mc, aa64_ucvtf(sf_src, type, rd, rn));
+ return;
+ }
case CV_FTOI_S: {
if (src.cls != RC_FP || dst.cls != RC_INT) {
compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes");
}
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 type = type_is_fp_double(src.type) ? 1u : 0u;
- emit32(t->mc, aa64_fcvtzs(sf, type, reg_num(dst), reg_num(src)));
+ emit32(mc, aa64_fcvtzs(sf, type, rd, rn));
+ return;
+ }
+ case CV_FTOI_U: {
+ if (src.cls != RC_FP || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes");
+ }
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(src.type) ? 1u : 0u;
+ emit32(mc, aa64_fcvtzu(sf, type, rd, rn));
+ return;
+ }
+ case CV_FEXT: {
+ /* float (S) → double (D). */
+ emit32(mc, aa64_fcvt_d_s(rd, rn));
+ return;
+ }
+ case CV_FTRUNC: {
+ /* double (D) → float (S). */
+ emit32(mc, aa64_fcvt_s_d(rd, rn));
+ return;
+ }
+ case CV_BITCAST: {
+ /* Same-size cross-class reinterpret (i32↔f32, i64↔f64). */
+ if (src.cls == RC_INT && dst.cls == RC_FP) {
+ u32 sz = type_byte_size(dst.type);
+ emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn)
+ : aa64_fmov_s_w(rd, rn));
+ } else if (src.cls == RC_FP && dst.cls == RC_INT) {
+ u32 sz = type_byte_size(src.type);
+ emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn)
+ : aa64_fmov_w_s(rd, rn));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 convert BITCAST: same-class not yet supported");
+ }
return;
}
default:
diff --git a/src/arch/mc.c b/src/arch/mc.c
@@ -119,6 +119,23 @@ static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset)
obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
break;
}
+ case R_AARCH64_CONDBR19: {
+ /* imm19 at bits 5..23 of B.cond; word-aligned displacement. */
+ i64 idisp = disp >> 2;
+ u32 imm19 = (u32)(idisp & 0x7ffffu);
+ const Section* s = obj_section_get(mc->base.obj, fx->sec_id);
+ if (!s) break;
+ u8 cur[4];
+ buf_read(&s->bytes, fx->offset, cur, 4);
+ u32 word = (u32)cur[0] | ((u32)cur[1] << 8) | ((u32)cur[2] << 16) | ((u32)cur[3] << 24);
+ word = (word & ~(0x7ffffu << 5)) | (imm19 << 5);
+ cur[0] = (u8)(word & 0xff);
+ cur[1] = (u8)((word >> 8) & 0xff);
+ cur[2] = (u8)((word >> 16)& 0xff);
+ cur[3] = (u8)((word >> 24)& 0xff);
+ obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
+ break;
+ }
default:
compiler_panic(mc->base.c, mc->loc,
"MCEmitter: unsupported label-ref reloc kind %d",