kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 3e695b43cadf39b052bc1d70f402d5fdfdf49292
parent ed3b81da82f373155f1790929e3c7a7d96cf2b62
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 14:36:53 -0700

cg/aa64: implement Groups D, E, F (control flow, conv, memory)

Brings the AArch64 CGTarget through the full cg test corpus:

- D: cmp/cmp_branch via SUBS XZR + CSET/B.cond, with a CmpOp→ARM cond
  table; labels passthrough to MCEmitter; SCOPE_IF via a per-function
  AAScope table tracking else/end labels. mc.c learns to apply
  R_AARCH64_CONDBR19 fixups for B.cond's imm19 displacement.
- E: convert covers SEXT/ZEXT (SBFM/UBFM), TRUNC (W-view MOV), ITOF_S/U
  (SCVTF/UCVTF), FTOI_U (FCVTZU), FEXT/FTRUNC (FCVT S↔D), and BITCAST
  (FMOV between GPR and FP, single and double).
- F: copy_bytes/set_bytes as 8/4/2/1-byte unrolled LDUR/STUR loops
  (XZR fast-path for zero-fill); bitfield_load uses LDUR + UBFX/SBFX,
  bitfield_store is a read-modify-write with BFI.

All 288 cg cases (72 × D/R/E/J) now pass.

Diffstat:
Msrc/arch/aarch64.c | 455+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/arch/mc.c | 17+++++++++++++++++
2 files changed, 454 insertions(+), 18 deletions(-)

diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c @@ -130,6 +130,53 @@ static inline u32 aa64_cset_eq(u32 sf, u32 Rd) * sf: 0=W, 1=X. type: 0=S, 1=D. */ static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn) { return 0x1E380000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn) +{ return 0x1E390000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn) +{ return 0x1E220000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn) +{ return 0x1E230000u | (sf<<31) | ((type&3)<<22) | ((Rn&0x1f)<<5) | (Rd&0x1f); } + +/* FCVT — between FP precisions. S→D widens; D→S narrows. */ +static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn) +{ return 0x1E22C000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn) +{ return 0x1E624000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } + +/* FMOV between FP and GPR (BITCAST). */ +static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) /* GPR→FP, single */ +{ return 0x1E270000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) /* FP→GPR, single */ +{ return 0x1E260000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) /* GPR→FP, double */ +{ return 0x9E670000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) /* FP→GPR, double */ +{ return 0x9E660000u | ((Rn&0x1f)<<5) | (Rd&0x1f); } + +/* SUBS shifted register (Rd=ZR encodes CMP). */ +static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) +{ return 0x6B000000u | (sf<<31) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); } + +/* B.cond — imm19 at bits 5..23 left as zero; patched by linker / MCEmitter. */ +static inline u32 aa64_b_cond(u32 cond) +{ return 0x54000000u | (cond & 0xfu); } + +/* CSINC Rd, Rn, Rm, cond (CSEL family with op2=01). CSET Rd, cond + * is CSINC Rd, ZR, ZR, !cond. */ +static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) +{ return 0x1A800400u | (sf<<31) | ((Rm&0x1f)<<16) | ((cond&0xfu)<<12) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond) +{ return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u); } + +/* SBFM / UBFM / BFM (bitfield move family). + * sf opc(2) 100110 N immr(6) imms(6) Rn(5) Rd(5) + * opc: 00=SBFM, 01=BFM, 10=UBFM. N must equal sf. */ +static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) +{ return 0x13000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) +{ return 0x53000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); } +static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) +{ return 0x33000000u | (sf<<31) | (sf<<22) | ((immr&0x3fu)<<16) | ((imms&0x3fu)<<10) | ((Rn&0x1f)<<5) | (Rd&0x1f); } /* ============================================================ * AAImpl @@ -145,6 +192,14 @@ typedef struct AASlot { u8 pad[3]; } AASlot; +typedef struct AAScope { + u8 kind; /* ScopeKind */ + u8 has_else; + u8 pad[2]; + MCLabel else_label; /* SCOPE_IF: false branch target / end-of-then */ + MCLabel end_label; /* SCOPE_IF: join point past the whole if/else */ +} AAScope; + typedef struct AAImpl { CGTarget base; SrcLoc loc; @@ -172,6 +227,13 @@ typedef struct AAImpl { /* Reg allocator (callee-saved prefix). */ u32 used_int; /* x19 + i, i in [0, used_int) */ u32 used_fp; /* v8 + i, i in [0, used_fp ) */ + + /* Structured-scope stack. Entries are not popped — IDs returned to + * the caller are stable indices into this array for the lifetime + * of the function. nscopes is reset at func_begin. */ + AAScope* scopes; + u32 nscopes; + u32 scopes_cap; } AAImpl; static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; } @@ -179,6 +241,7 @@ static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; } /* Forward decls used before definition. */ static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d); static AASlot* slot_get(AAImpl* a, FrameSlot fs); +static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch); /* ---- helpers ---- */ @@ -341,6 +404,7 @@ static void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) a->used_int = 0; a->used_fp = 0; a->nslots = 0; + a->nscopes = 0; a->sret_ptr_slot = FRAME_SLOT_NONE; a->epilogue_label = mc->label_new(mc); @@ -598,18 +662,151 @@ static const Reg* aa_clobbers (CGTarget* t, RegClass c, u32* n) { (void)c; static void aa_spill_reg (CGTarget* t, Operand s, FrameSlot f, MemAccess m) { (void)s; (void)f; (void)m; aa_panic(t, "spill_reg"); } static void aa_reload_reg(CGTarget* t, Operand d, FrameSlot f, MemAccess m) { (void)d; (void)f; (void)m; aa_panic(t, "reload_reg"); } -/* ---- labels / control flow (deferred for D-group; ret uses internal label) ---- */ +/* ---- labels / control flow ---- + * + * Label is a transparent wrapper around MCLabel — the MCEmitter already + * tracks placement and applies pending fixups. Jumps emit a B with + * imm26=0 paired with R_AARCH64_JUMP26; conditional branches emit a + * B.cond with imm19=0 paired with R_AARCH64_CONDBR19. */ + +static Label aa_label_new(CGTarget* t) +{ + return (Label)t->mc->label_new(t->mc); +} + +static void aa_label_place(CGTarget* t, Label l) +{ + t->mc->label_place(t->mc, (MCLabel)l); +} + +static void aa_jump(CGTarget* t, Label l) +{ + MCEmitter* mc = t->mc; + emit32(mc, aa64_b_base()); + mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0); +} + +/* Map CmpOp → AArch64 condition code. Boolean (i1) "true" means take the + * branch / set 1. */ +static u32 cmp_to_cond(CmpOp op) +{ + switch (op) { + case CMP_EQ: return 0x0u; /* EQ */ + case CMP_NE: return 0x1u; /* NE */ + case CMP_LT_U: return 0x3u; /* CC/LO */ + case CMP_LE_U: return 0x9u; /* LS */ + case CMP_GT_U: return 0x8u; /* HI */ + case CMP_GE_U: return 0x2u; /* CS/HS */ + case CMP_LT_S: return 0xbu; /* LT */ + case CMP_LE_S: return 0xdu; /* LE */ + case CMP_GT_S: return 0xcu; /* GT */ + case CMP_GE_S: return 0xau; /* GE */ + /* FP compares route through FCMP, not yet exercised here. */ + default: return 0x0u; + } +} + +/* Emit CMP a, b (= SUBS ZR, a, b). Materializes IMM operands through + * scratch x9/x10. Width comes from `a`; signedness lives in the cond. */ +static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) +{ + MCEmitter* mc = t->mc; + u32 sf = type_is_64(a_op.type) ? 1u : 0u; + /* Special-case CMP Rn, #0 so a literal zero compare doesn't need + * a scratch register. */ + if (b_op.kind == OPK_IMM && b_op.v.imm == 0 && a_op.kind == OPK_REG) { + emit32(mc, aa64_subs_imm(sf, /*Rd=ZR*/31u, reg_num(a_op), 0)); + return; + } + u32 rn = force_reg_int(t, a_op, sf, 9); + u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u); + emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/31u, rn, rm)); +} + +static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) +{ + MCEmitter* mc = t->mc; + emit_cmp_ab(t, a, b); + emit32(mc, aa64_b_cond(cmp_to_cond(op))); + mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0); +} + +static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) +{ + emit_cmp_ab(t, a, b); + u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; + emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op))); +} + +/* ---- structured scopes (SCOPE_IF only for v1) ---- */ -static Label aa_label_new (CGTarget* t) { aa_panic(t, "label_new"); } -static void aa_label_place(CGTarget* t, Label l) { (void)l; aa_panic(t, "label_place"); } -static void aa_jump (CGTarget* t, Label l) { (void)l; aa_panic(t, "jump"); } -static void aa_cmp_branch (CGTarget* t, CmpOp op, Operand a, Operand b, Label l) { (void)op;(void)a;(void)b;(void)l; aa_panic(t, "cmp_branch"); } +static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) +{ + AAImpl* a = impl_of(t); + if (a->nscopes == a->scopes_cap) { + u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u; + AAScope* nb = arena_array(t->c->tu, AAScope, ncap); + if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes); + a->scopes = nb; + a->scopes_cap = ncap; + } + AAScope* sc = &a->scopes[a->nscopes]; + sc->kind = (u8)d->kind; + sc->has_else = 0; + sc->else_label = t->mc->label_new(t->mc); + sc->end_label = t->mc->label_new(t->mc); + + if (d->kind == SCOPE_IF) { + /* Test cond against zero, branch to else_label on EQ (false). */ + u32 sf = type_is_64(d->cond.type) ? 1u : 0u; + u32 rn = force_reg_int(t, d->cond, sf, 9); + emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/31u, rn, 0)); + emit32(t->mc, aa64_b_cond(0x0u /*EQ*/)); + t->mc->emit_label_ref(t->mc, sc->else_label, + R_AARCH64_CONDBR19, 4, 0); + } else { + /* BLOCK / LOOP not yet exercised by the corpus. */ + compiler_panic(t->c, a->loc, + "aarch64 scope_begin: kind %d not yet implemented", (int)d->kind); + } -static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) { (void)d; aa_panic(t, "scope_begin"); } -static void aa_scope_else (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "scope_else"); } -static void aa_scope_end (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "scope_end"); } -static void aa_break_to (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "break_to"); } -static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_panic(t, "continue_to"); } + a->nscopes++; + return (CGScope)a->nscopes; /* 1-based */ +} + +static void aa_scope_else(CGTarget* t, CGScope s) +{ + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u", + (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + /* End of the then-arm: jump past the else body. */ + emit32(t->mc, aa64_b_base()); + t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0); + /* Begin of the else-arm. */ + t->mc->label_place(t->mc, sc->else_label); + sc->has_else = 1; +} + +static void aa_scope_end(CGTarget* t, CGScope s) +{ + AAImpl* a = impl_of(t); + if (s == CG_SCOPE_NONE || s > a->nscopes) { + compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u", + (unsigned)s); + } + AAScope* sc = &a->scopes[s - 1]; + if (sc->kind == SCOPE_IF && !sc->has_else) { + /* No else body — false-branch lands at scope_end. */ + t->mc->label_place(t->mc, sc->else_label); + } + t->mc->label_place(t->mc, sc->end_label); +} + +static void aa_break_to (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "break_to"); } +static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_panic(t, "continue_to"); } /* ---- data movement ---- */ @@ -779,10 +976,155 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) } static void aa_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) { (void)d;(void)s;(void)a; aa_panic(t, "tls_addr_of"); } -static void aa_copy_bytes(CGTarget* t, Operand d, Operand s, AggregateAccess g) { (void)d;(void)s;(void)g; aa_panic(t, "copy_bytes"); } -static void aa_set_bytes (CGTarget* t, Operand d, Operand b, AggregateAccess g) { (void)d;(void)b;(void)g; aa_panic(t, "set_bytes"); } -static void aa_bitfield_load (CGTarget* t, Operand d, Operand a, BitFieldAccess f) { (void)d;(void)a;(void)f; aa_panic(t, "bitfield_load"); } -static void aa_bitfield_store(CGTarget* t, Operand a, Operand s, BitFieldAccess f) { (void)a;(void)s;(void)f; aa_panic(t, "bitfield_store"); } + +/* Resolve a dst/src address operand for the aggregate ops below. + * Accepts OPK_REG (already a pointer) and OPK_LOCAL (= fp - off); + * for OPK_LOCAL we materialize the address into a scratch register. */ +static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) +{ + if (op.kind == OPK_REG) return reg_num(op); + if (op.kind == OPK_LOCAL) { + AAImpl* a = impl_of(t); + AASlot* s = slot_get(a, op.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot"); + emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0)); + return scratch; + } + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 agg: address kind %d unsupported", (int)op.kind); +} + +static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr, + AggregateAccess agg) +{ + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, 9); + u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u); + u32 nbytes = agg.size; + u32 i = 0; + /* Unrolled per-element copy through scratch x12. We use unscaled + * LDUR/STUR so we don't depend on `agg.align` for legality. */ + while (i + 8 <= nbytes) { + emit32(mc, aa64_ldur(3, 12, sr, (i32)i)); + emit32(mc, aa64_stur(3, 12, dr, (i32)i)); + i += 8; + } + while (i + 4 <= nbytes) { + emit32(mc, aa64_ldur(2, 12, sr, (i32)i)); + emit32(mc, aa64_stur(2, 12, dr, (i32)i)); + i += 4; + } + while (i + 2 <= nbytes) { + emit32(mc, aa64_ldur(1, 12, sr, (i32)i)); + emit32(mc, aa64_stur(1, 12, dr, (i32)i)); + i += 2; + } + while (i < nbytes) { + emit32(mc, aa64_ldur(0, 12, sr, (i32)i)); + emit32(mc, aa64_stur(0, 12, dr, (i32)i)); + i += 1; + } +} + +static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value, + AggregateAccess agg) +{ + MCEmitter* mc = t->mc; + u32 dr = agg_addr_reg(t, dst_addr, 9); + + u32 byte; + if (byte_value.kind == OPK_IMM) { + byte = (u32)(byte_value.v.imm & 0xffu); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 set_bytes: REG byte not yet supported"); + } + u32 nbytes = agg.size; + + if (byte == 0) { + /* Use XZR/WZR directly — no broadcast register needed. */ + u32 i = 0; + while (i + 8 <= nbytes) { emit32(mc, aa64_stur(3, 31, dr, (i32)i)); i += 8; } + while (i + 4 <= nbytes) { emit32(mc, aa64_stur(2, 31, dr, (i32)i)); i += 4; } + while (i + 2 <= nbytes) { emit32(mc, aa64_stur(1, 31, dr, (i32)i)); i += 2; } + while (i < nbytes) { emit32(mc, aa64_stur(0, 31, dr, (i32)i)); i += 1; } + return; + } + + /* Broadcast byte into x12 then strided-store. */ + u64 b64 = byte; + b64 |= b64 << 8; + b64 |= b64 << 16; + b64 |= b64 << 32; + emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64); + + u32 i = 0; + while (i + 8 <= nbytes) { emit32(mc, aa64_stur(3, 12, dr, (i32)i)); i += 8; } + while (i + 4 <= nbytes) { emit32(mc, aa64_stur(2, 12, dr, (i32)i)); i += 4; } + while (i + 2 <= nbytes) { emit32(mc, aa64_stur(1, 12, dr, (i32)i)); i += 2; } + while (i < nbytes) { emit32(mc, aa64_stur(0, 12, dr, (i32)i)); i += 1; } +} + +static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr, + BitFieldAccess bf) +{ + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, 9); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + u32 sf = (storage_bytes == 8u) ? 1u : 0u; + u32 sidx = size_idx_for_bytes(storage_bytes); + u32 rd = reg_num(dst); + + /* Load the entire storage unit, then extract bf.bit_width bits at + * bf.bit_offset. UBFX (zero-extend) or SBFX (sign-extend) per the + * field's signedness. */ + emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset)); + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 imms = lsb + width - 1u; + if (bf.signed_) { + emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms)); + } else { + emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms)); + } +} + +static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src, + BitFieldAccess bf) +{ + MCEmitter* mc = t->mc; + u32 base = agg_addr_reg(t, record_addr, 9); + u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u; + u32 sf = (storage_bytes == 8u) ? 1u : 0u; + u32 sidx = size_idx_for_bytes(storage_bytes); + + /* Read-modify-write through scratch registers x10 (storage) and x11 + * (the source value). */ + emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); + + u32 src_reg; + if (src.kind == OPK_IMM) { + emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm); + src_reg = 11u; + } else if (src.kind == OPK_REG) { + src_reg = reg_num(src); + } else { + compiler_panic(t->c, impl_of(t)->loc, + "aarch64 bitfield_store: src kind %d unsupported", (int)src.kind); + } + + /* BFI Rd, Rn, #lsb, #width — insert width bits of Rn[0..width-1] + * starting at bit lsb of Rd. Encoded as BFM with + * immr = (RegSize - lsb) mod RegSize, imms = width - 1. */ + u32 reg_size = sf ? 64u : 32u; + u32 lsb = bf.bit_offset; + u32 width = bf.bit_width ? bf.bit_width : 1u; + u32 immr = (reg_size - lsb) % reg_size; + u32 imms = width - 1u; + emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms)); + + emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset)); +} /* ---- arithmetic ---- */ @@ -865,20 +1207,97 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) emit32(mc, word); } -static void aa_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, Operand b) -{ (void)op;(void)d;(void)a;(void)b; aa_panic(t, "cmp"); } - static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 rd = reg_num(dst); + u32 rn = reg_num(src); + switch (k) { + case CV_SEXT: { + if (src.cls != RC_INT || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes"); + } + u32 src_bits = type_byte_size(src.type) * 8u; + u32 sf_dst = type_is_64(dst.type) ? 1u : 0u; + emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); + return; + } + case CV_ZEXT: { + if (src.cls != RC_INT || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes"); + } + u32 src_bits = type_byte_size(src.type) * 8u; + if (src_bits == 32u) { + /* MOV Wd, Wn auto-zero-extends into the X register. */ + emit32(mc, aa64_mov_reg(0, rd, rn)); + } else { + emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u)); + } + return; + } + case CV_TRUNC: { + /* Reading the W view of any X register zeros the upper 32 bits. + * For narrower truncations the consumer (store / ret) selects + * the byte width — leaving extra high bits is harmless. */ + emit32(mc, aa64_mov_reg(0, rd, rn)); + return; + } + case CV_ITOF_S: { + u32 sf_src = type_is_64(src.type) ? 1u : 0u; + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + emit32(mc, aa64_scvtf(sf_src, type, rd, rn)); + return; + } + case CV_ITOF_U: { + u32 sf_src = type_is_64(src.type) ? 1u : 0u; + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + emit32(mc, aa64_ucvtf(sf_src, type, rd, rn)); + return; + } case CV_FTOI_S: { if (src.cls != RC_FP || dst.cls != RC_INT) { compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes"); } u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 type = type_is_fp_double(src.type) ? 1u : 0u; - emit32(t->mc, aa64_fcvtzs(sf, type, reg_num(dst), reg_num(src))); + emit32(mc, aa64_fcvtzs(sf, type, rd, rn)); + return; + } + case CV_FTOI_U: { + if (src.cls != RC_FP || dst.cls != RC_INT) { + compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes"); + } + u32 sf = type_is_64(dst.type) ? 1u : 0u; + u32 type = type_is_fp_double(src.type) ? 1u : 0u; + emit32(mc, aa64_fcvtzu(sf, type, rd, rn)); + return; + } + case CV_FEXT: { + /* float (S) → double (D). */ + emit32(mc, aa64_fcvt_d_s(rd, rn)); + return; + } + case CV_FTRUNC: { + /* double (D) → float (S). */ + emit32(mc, aa64_fcvt_s_d(rd, rn)); + return; + } + case CV_BITCAST: { + /* Same-size cross-class reinterpret (i32↔f32, i64↔f64). */ + if (src.cls == RC_INT && dst.cls == RC_FP) { + u32 sz = type_byte_size(dst.type); + emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn) + : aa64_fmov_s_w(rd, rn)); + } else if (src.cls == RC_FP && dst.cls == RC_INT) { + u32 sz = type_byte_size(src.type); + emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn) + : aa64_fmov_w_s(rd, rn)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 convert BITCAST: same-class not yet supported"); + } return; } default: diff --git a/src/arch/mc.c b/src/arch/mc.c @@ -119,6 +119,23 @@ static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4); break; } + case R_AARCH64_CONDBR19: { + /* imm19 at bits 5..23 of B.cond; word-aligned displacement. */ + i64 idisp = disp >> 2; + u32 imm19 = (u32)(idisp & 0x7ffffu); + const Section* s = obj_section_get(mc->base.obj, fx->sec_id); + if (!s) break; + u8 cur[4]; + buf_read(&s->bytes, fx->offset, cur, 4); + u32 word = (u32)cur[0] | ((u32)cur[1] << 8) | ((u32)cur[2] << 16) | ((u32)cur[3] << 24); + word = (word & ~(0x7ffffu << 5)) | (imm19 << 5); + cur[0] = (u8)(word & 0xff); + cur[1] = (u8)((word >> 8) & 0xff); + cur[2] = (u8)((word >> 16)& 0xff); + cur[3] = (u8)((word >> 24)& 0xff); + obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4); + break; + } default: compiler_panic(mc->base.c, mc->loc, "MCEmitter: unsupported label-ref reloc kind %d",