kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 94d7d4bd6cfe1d24cebed41aec3450f1f059cc09
parent 1b9c9e4a655a8f6ae719cea49851d3c5b8a61783
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 07:04:26 -0700

x64: add asm disasm parity slice

Diffstat:
Msrc/arch/x64/alloc.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msrc/arch/x64/arch.c | 17++++++++++++-----
Msrc/arch/x64/asm.c | 300++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Msrc/arch/x64/asm.h | 8++++++++
Asrc/arch/x64/disasm.c | 333+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64/disasm.h | 8++++++++
Msrc/arch/x64/internal.h | 141+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/arch/x64/ops.c | 219++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Asrc/arch/x64/regs.c | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64/regs.h | 13+++++++++++++
Atest/asm/decode/nop_ret.targets | 1+
Atest/asm/decode/x64_mov_ret.expected.txt | 2++
Atest/asm/decode/x64_mov_ret.hex | 1+
Atest/asm/decode/x64_mov_ret.targets | 1+
Atest/asm/decode/x64_nop_ret.expected.txt | 2++
Atest/asm/decode/x64_nop_ret.hex | 1+
Atest/asm/decode/x64_nop_ret.targets | 1+
Atest/asm/encode/aa64_stp_ldp_q.targets | 1+
Atest/asm/encode/exit_zero.targets | 1+
Atest/asm/encode/x64_exit_42.expected | 1+
Atest/asm/encode/x64_exit_42.expected.hex | 1+
Atest/asm/encode/x64_exit_42.s | 6++++++
Atest/asm/encode/x64_exit_42.targets | 1+
Atest/asm/listing/nop_ret.targets | 1+
Mtest/asm/run.sh | 36++++++++++++++++++++++++++++++++++++
Atest/parse/cases/cg_x64_inline_asm_mov.c | 9+++++++++
Atest/parse/cases/cg_x64_inline_asm_mov.expected | 1+
27 files changed, 1063 insertions(+), 242 deletions(-)

diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -8,17 +8,39 @@ #include <string.h> #include "arch/arch.h" -#include "arch/x64/x64.h" +#include "arch/x64/internal.h" #include "arch/x64/isa.h" +#include "arch/x64/regs.h" +#include "arch/x64/x64.h" #include "core/arena.h" #include "core/pool.h" #include "obj/obj.h" -#include "arch/x64/internal.h" - /* ============================================================ * Registers / frame */ +int x_resolve_reg_name(CGTarget* t, Sym name, Reg* out, RegClass* cls_out) { + size_t len = 0; + const char* s = pool_str(t->c->global, name, &len); + char buf[16]; + u32 idx; + if (!s || !len || len >= sizeof buf) return 1; + memcpy(buf, s, len); + buf[len] = '\0'; + if (x64_register_hw_index(buf, &idx) == 0) { + if (out) *out = (Reg)idx; + if (cls_out) *cls_out = RC_INT; + return 0; + } + if (x64_register_index(buf, &idx) != 0) return 1; + if (idx >= 17u && idx <= 32u) { + if (out) *out = (Reg)(idx - 17u); + if (cls_out) *cls_out = RC_FP; + return 0; + } + return 1; +} + FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) { XImpl* a = impl_of(t); if (a->nslots == a->slots_cap) { @@ -183,8 +205,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { if (pt->cls == ABI_CLASS_INT) { if (a->next_param_int < 6) { u32 reg = g_int_arg_regs[a->next_param_int++]; - emit_mov_store(t->mc, sz, reg, X64_RBP, - -(i32)s->off + (i32)part_off); + emit_mov_store(t->mc, sz, reg, X64_RBP, -(i32)s->off + (i32)part_off); } else { u32 caller_off = a->next_param_stack; a->next_param_stack += 8; @@ -216,8 +237,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { return st; } -void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, - MemAccess ma) { +void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma) { XImpl* a = impl_of(t); if (src.kind != OPK_REG) compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG"); @@ -230,8 +250,7 @@ void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, x_store(t, addr, src, ma); } -void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, - MemAccess ma) { +void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma) { XImpl* a = impl_of(t); if (dst.kind != OPK_REG) compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG"); @@ -247,9 +266,7 @@ void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, /* ============================================================ * Labels / control flow */ -Label x_label_new(CGTarget* t) { - return (Label)t->mc->label_new(t->mc); -} +Label x_label_new(CGTarget* t) { return (Label)t->mc->label_new(t->mc); } void x_label_place(CGTarget* t, Label l) { t->mc->label_place(t->mc, (MCLabel)l); } @@ -298,8 +315,7 @@ void x_indirect_branch(CGTarget* t, Operand addr, const Label* targets, (void)targets; (void)ntargets; if (addr.kind != OPK_REG) { - compiler_panic(t->c, mc->loc, - "x64: indirect_branch expects REG operand"); + compiler_panic(t->c, mc->loc, "x64: indirect_branch expects REG operand"); } reg = addr.v.reg & 0xFu; /* REX.B if reg >= 8 (no REX.W needed for jmpq *) */ @@ -315,27 +331,46 @@ void x_indirect_branch(CGTarget* t, Operand addr, const Label* targets, static u32 cmp_to_cc(CmpOp op) { switch (op) { - case CMP_EQ: return X64_CC_E; - case CMP_NE: return X64_CC_NE; - case CMP_LT_U: return X64_CC_B; - case CMP_LE_U: return X64_CC_BE; - case CMP_GT_U: return X64_CC_A; - case CMP_GE_U: return X64_CC_AE; - case CMP_LT_S: return X64_CC_L; - case CMP_LE_S: return X64_CC_LE; - case CMP_GT_S: return X64_CC_G; - case CMP_GE_S: return X64_CC_GE; - default: return X64_CC_E; + case CMP_EQ: + return X64_CC_E; + case CMP_NE: + return X64_CC_NE; + case CMP_LT_U: + return X64_CC_B; + case CMP_LE_U: + return X64_CC_BE; + case CMP_GT_U: + return X64_CC_A; + case CMP_GE_U: + return X64_CC_AE; + case CMP_LT_S: + return X64_CC_L; + case CMP_LE_S: + return X64_CC_LE; + case CMP_GT_S: + return X64_CC_G; + case CMP_GE_S: + return X64_CC_GE; + default: + return X64_CC_E; } } static void emit_fp_setcc_ordered(CGTarget* t, CmpOp op, u32 dst) { u32 primary; switch (op) { - case CMP_EQ: primary = X64_CC_E; break; - case CMP_LT_F: primary = X64_CC_B; break; - case CMP_LE_F: primary = X64_CC_BE; break; - default: primary = cmp_to_cc(op); break; + case CMP_EQ: + primary = X64_CC_E; + break; + case CMP_LT_F: + primary = X64_CC_B; + break; + case CMP_LE_F: + primary = X64_CC_BE; + break; + default: + primary = cmp_to_cc(op); + break; } emit_setcc(t->mc, primary, dst); emit_movzx_r32_r8(t->mc, dst, dst); @@ -383,8 +418,7 @@ static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { emit_alu_rr(t->mc, w, 0x39, ra, rb); } -void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, - Label l) { +void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) { emit_cmp_ab(t, a, b); emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l); } @@ -450,8 +484,7 @@ CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) { } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) { /* Bookkeeping only. */ } else { - compiler_panic(t->c, a->loc, - "x64 scope_begin: kind %d not yet implemented", + compiler_panic(t->c, a->loc, "x64 scope_begin: kind %d not yet implemented", (int)d->kind); } a->nscopes++; diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c @@ -2,6 +2,8 @@ #include "abi/abi_internal.h" #include "arch/x64/asm.h" +#include "arch/x64/disasm.h" +#include "arch/x64/regs.h" #include "arch/x64/x64.h" #include "core/bytes.h" #include "link/link_arch.h" @@ -56,13 +58,18 @@ static const CfreePredefinedMacro x64_predefined_macros[] = { {"__LITTLE_ENDIAN__", "1"}, }; +static int x64_register_at_public(uint32_t idx, CfreeArchReg* out) { + if (!out) return 1; + return x64_register_iter_get(idx, &out->dwarf_idx, &out->name); +} + const ArchImpl arch_impl_x64 = { .kind = CFREE_ARCH_X86_64, .name = "x64", .abi_vtable = x64_abi_vtable, .cgtarget_new = x64_cgtarget_new, .asm_new = x64_arch_asm_new, - .disasm_new = NULL, + .disasm_new = x64_disasm_new, .apply_label_fixup = x64_apply_label_fixup, .link = &link_arch_x64, .elf = &x64_elf_ops, @@ -70,8 +77,8 @@ const ArchImpl arch_impl_x64 = { .predefined_macros = x64_predefined_macros, .npredefined_macros = (u32)(sizeof x64_predefined_macros / sizeof x64_predefined_macros[0]), - .register_name = NULL, - .register_index = NULL, - .register_count = NULL, - .register_at = NULL, + .register_name = x64_register_name, + .register_index = x64_register_index, + .register_count = x64_register_iter_size, + .register_at = x64_register_at_public, }; diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c @@ -3,13 +3,25 @@ #include <string.h> #include "arch/x64/internal.h" +#include "arch/x64/regs.h" #include "asm/asm_helpers.h" #include "core/arena.h" +#include "core/pool.h" +#include "core/strbuf.h" -typedef struct X64Asm { +struct X64Asm { ArchAsm base; Compiler* c; -} X64Asm; + + const AsmConstraint* outs; + Operand* out_ops; + const AsmConstraint* ins; + const Operand* in_ops; + const Sym* clobbers; + u32 nout; + u32 nin; + u32 nclob; +}; typedef enum X64AsmOperandKind { X64_ASM_OP_REG, @@ -37,41 +49,15 @@ static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, u32* width_out) { size_t n = 0; const char* p = pool_str(asm_driver_pool(d), s, &n); - u32 width = 8; + char buf[16]; u32 reg; - if (!p || n < 2) return 0; - if (p[0] == 'e') { - width = 4; - ++p; - --n; - } else if (p[0] == 'r') { - width = 8; - ++p; - --n; - } else { - return 0; - } - if (n == 2 && p[0] == 'a' && p[1] == 'x') - reg = X64_RAX; - else if (n == 2 && p[0] == 'c' && p[1] == 'x') - reg = X64_RCX; - else if (n == 2 && p[0] == 'd' && p[1] == 'x') - reg = X64_RDX; - else if (n == 2 && p[0] == 'b' && p[1] == 'x') - reg = X64_RBX; - else if (n == 2 && p[0] == 's' && p[1] == 'p') - reg = X64_RSP; - else if (n == 2 && p[0] == 'b' && p[1] == 'p') - reg = X64_RBP; - else if (n == 2 && p[0] == 's' && p[1] == 'i') - reg = X64_RSI; - else if (n == 2 && p[0] == 'd' && p[1] == 'i') - reg = X64_RDI; - else if (n >= 2 && p[0] == '1' && p[1] >= '2' && p[1] <= '5' && - (n == 2 || (n == 3 && p[2] == 'd'))) - reg = X64_R12 + (u32)(p[1] - '2'); - else - return 0; + u32 width = 8; + if (!p || n < 2 || n >= sizeof buf) return 0; + memcpy(buf, p, n); + buf[n] = '\0'; + if (buf[n - 1] == 'd' || buf[0] == 'e') width = 4; + if (x64_register_hw_index(buf, &reg) != 0) return 0; + if (reg > 15u) return 0; if (reg_out) *reg_out = reg; if (width_out) *width_out = width; return 1; @@ -80,7 +66,8 @@ static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, static u32 parse_reg(AsmDriver* d, u32* width_out) { AsmTok t; u32 reg; - if (!asm_driver_eat_punct(d, '%')) asm_driver_panic(d, "x64 asm: expected register"); + if (!asm_driver_eat_punct(d, '%')) + asm_driver_panic(d, "x64 asm: expected register"); t = asm_driver_next(d); if (t.kind != ASM_TOK_IDENT || !x64_reg_from_name(d, t.v.ident, &reg, width_out)) { @@ -159,6 +146,11 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { (void)a; (void)asm_driver_cur_section(d); + if (sym_eq(d, mnemonic, "nop")) { + u8 op = X64_NOP1; + mc->emit_bytes(mc, &op, 1); + return; + } if (sym_eq(d, mnemonic, "ret")) { emit_ret(mc); return; @@ -170,12 +162,14 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { src = parse_operand(d); if (sym_eq(d, mnemonic, "jmpq")) { - if (src.kind != X64_ASM_OP_IND_REG) asm_driver_panic(d, "x64 asm: jmpq form"); + if (src.kind != X64_ASM_OP_IND_REG) + asm_driver_panic(d, "x64 asm: jmpq form"); emit_indirect_branch(mc, 4u, src.reg); return; } if (sym_eq(d, mnemonic, "callq")) { - if (src.kind != X64_ASM_OP_IND_REG) asm_driver_panic(d, "x64 asm: callq form"); + if (src.kind != X64_ASM_OP_IND_REG) + asm_driver_panic(d, "x64 asm: callq form"); emit_indirect_branch(mc, 2u, src.reg); return; } @@ -241,13 +235,235 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { asm_driver_panic(d, "x64 asm: unsupported instruction form"); } -static void x64_arch_asm_destroy(ArchAsm* base) { (void)base; } +static void x64_arch_asm_destroy(ArchAsm* base) { + x64_asm_close((X64Asm*)base); +} -ArchAsm* x64_arch_asm_new(Compiler* c) { +X64Asm* x64_asm_open(Compiler* c) { X64Asm* a = arena_new(c->tu, X64Asm); memset(a, 0, sizeof *a); a->base.insn = x64_arch_asm_insn; a->base.destroy = x64_arch_asm_destroy; a->c = c; - return &a->base; + return a; +} + +void x64_asm_close(X64Asm* a) { (void)a; } + +ArchAsm* x64_arch_asm_new(Compiler* c) { return &x64_asm_open(c)->base; } + +void x64_inline_bind(X64Asm* a, const AsmConstraint* outs, u32 nout, + Operand* out_ops, const AsmConstraint* ins, u32 nin, + const Operand* in_ops, const Sym* clobbers, u32 nclob) { + a->outs = outs; + a->out_ops = out_ops; + a->ins = ins; + a->in_ops = in_ops; + a->clobbers = clobbers; + a->nout = nout; + a->nin = nin; + a->nclob = nclob; +} + +#define X64_INLINE_LINE_CAP 1024 + +_Noreturn static void inline_panic(X64Asm* a, const char* msg) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(a->c, loc, "x64 inline asm: %s", msg); +} + +static const char* x64_reg_spelling(u32 reg, int width32) { + static const char* r64[16] = { + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + }; + static const char* r32[16] = { + "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", + }; + return width32 ? r32[reg & 15u] : r64[reg & 15u]; +} + +static int x64_type_prefers_32(CfreeCgTypeId type) { + if (type == 0) return 0; + return !type_is_64(type); +} + +static void render_reg(StrBuf* sb, u32 reg, int width32) { + strbuf_putc(sb, '%'); + strbuf_puts(sb, x64_reg_spelling(reg, width32)); +} + +static void render_imm(StrBuf* sb, i64 v) { + strbuf_putc(sb, '$'); + strbuf_put_i64(sb, v); +} + +static void render_indirect(StrBuf* sb, Reg base, i32 ofs) { + if (ofs) strbuf_put_i64(sb, (i64)ofs); + strbuf_putc(sb, '('); + render_reg(sb, (u32)base, 0); + strbuf_putc(sb, ')'); +} + +static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) { + u32 ntot = a->nout + a->nin; + const Operand* op; + if (idx >= ntot) inline_panic(a, "operand index out of range"); + op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; + if (form == 3) { + if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); + render_indirect(sb, op->v.ind.base, op->v.ind.ofs); + return; + } + if (op->kind == OPK_REG) { + int width32 = form == 1 ? 1 : form == 2 ? 0 : x64_type_prefers_32(op->type); + render_reg(sb, (u32)op->v.reg, width32); + return; + } + if (op->kind == OPK_IMM) { + render_imm(sb, op->v.imm); + return; + } + if (op->kind == OPK_INDIRECT) { + render_indirect(sb, op->v.ind.base, op->v.ind.ofs); + return; + } + inline_panic(a, "unsupported operand kind"); +} + +static u32 find_named_operand(X64Asm* a, const char* name, size_t len) { + Sym needle = pool_intern(a->c->global, name, len); + u32 i; + for (i = 0; i < a->nout; ++i) { + if (a->outs[i].name == needle) return i; + } + for (i = 0; i < a->nin; ++i) { + if (a->ins[i].name == needle) return a->nout + i; + } + inline_panic(a, "%[name] does not match any constraint"); +} + +static void run_one_line(X64Asm* a, MCEmitter* mc, const char* text, + size_t len) { + size_t i; + AsmLexer* lx; + AsmDriver* d; + AsmTok t; + for (i = 0; i < len; ++i) { + if (text[i] != ' ' && text[i] != '\t') break; + } + if (i == len) return; + lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len); + d = asm_driver_open_inline(a->c, mc, lx); + t = asm_driver_peek(d); + while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) { + (void)asm_driver_next(d); + if (t.kind == ASM_TOK_HASH) { + while (!asm_driver_at_eol(d)) (void)asm_driver_next(d); + } + t = asm_driver_peek(d); + } + if (t.kind == ASM_TOK_EOF) { + asm_driver_close_inline(d); + asm_lex_close(lx); + return; + } + if (t.kind != ASM_TOK_IDENT) inline_panic(a, "expected mnemonic"); + (void)asm_driver_next(d); + x64_arch_asm_insn(&a->base, d, t.v.ident); + asm_driver_close_inline(d); + asm_lex_close(lx); +} + +static void render_and_run_line(X64Asm* a, MCEmitter* mc, StrBuf* sb, + const char* start, const char* end) { + strbuf_reset(sb); + for (const char* p = start; p < end; ++p) { + char c = *p; + char n; + int form = 0; + if (c != '%') { + strbuf_putc(sb, c); + continue; + } + if (p + 1 >= end) inline_panic(a, "trailing '%' in template"); + n = *(p + 1); + if (n == '%') { + strbuf_putc(sb, '%'); + ++p; + continue; + } + if (n == 'w' || n == 'x' || n == 'a') { + form = (n == 'w') ? 1 : (n == 'x') ? 2 : 3; + ++p; + if (p + 1 >= end) inline_panic(a, "trailing '%' modifier"); + n = *(p + 1); + } + if (n == '[') { + const char* nbeg = p + 2; + const char* nend = nbeg; + u32 idx; + while (nend < end && *nend != ']') ++nend; + if (nend == end) inline_panic(a, "unterminated %[name]"); + idx = find_named_operand(a, nbeg, (size_t)(nend - nbeg)); + p = nend; + render_operand(a, sb, idx, form); + continue; + } + if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'"); + { + u32 idx = (u32)(n - '0'); + ++p; + if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') { + idx = idx * 10u + (u32)(*(p + 1) - '0'); + ++p; + } + render_operand(a, sb, idx, form); + } + } + if (sb->truncated) inline_panic(a, "inline asm line buffer overflow"); + run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb)); +} + +void x64_asm_run_template(X64Asm* a, MCEmitter* mc, const char* tmpl) { + char buf[X64_INLINE_LINE_CAP]; + StrBuf sb; + const char* line_start; + int bracket = 0; + char quote = 0; + if (!tmpl || !*tmpl) return; + strbuf_init(&sb, buf, sizeof buf); + line_start = tmpl; + for (const char* p = tmpl;; ++p) { + char c = *p; + if (c == '\0') { + render_and_run_line(a, mc, &sb, line_start, p); + break; + } + if (quote) { + if (c == '\\' && *(p + 1)) { + ++p; + continue; + } + if (c == quote) quote = 0; + continue; + } + if (c == '"' || c == '\'') { + quote = c; + continue; + } + if (c == '[') { + ++bracket; + continue; + } + if (c == ']') { + if (bracket) --bracket; + continue; + } + if (bracket == 0 && (c == '\n' || c == ';')) { + render_and_run_line(a, mc, &sb, line_start, p); + line_start = p + 1; + } + } } diff --git a/src/arch/x64/asm.h b/src/arch/x64/asm.h @@ -3,6 +3,14 @@ #include "arch/arch.h" +typedef struct X64Asm X64Asm; + +X64Asm* x64_asm_open(Compiler*); +void x64_asm_close(X64Asm*); ArchAsm* x64_arch_asm_new(Compiler*); +void x64_inline_bind(X64Asm*, const AsmConstraint* outs, u32 nout, + Operand* out_ops, const AsmConstraint* ins, u32 nin, + const Operand* in_ops, const Sym* clobbers, u32 nclob); +void x64_asm_run_template(X64Asm*, MCEmitter* mc, const char* tmpl); #endif diff --git a/src/arch/x64/disasm.c b/src/arch/x64/disasm.c @@ -0,0 +1,333 @@ +/* Small x86-64 disassembler for the instruction subset cfree can assemble. */ + +#include "arch/x64/disasm.h" + +#include <string.h> + +#include "arch/x64/isa.h" +#include "core/bytes.h" +#include "core/heap.h" +#include "core/strbuf.h" + +#define X64_DASM_MNEM_CAP 16u +#define X64_DASM_OPS_CAP 128u +#define X64_DASM_ANN_CAP 96u +#define X64_REG_RIP 16u + +typedef struct X64Disasm { + ArchDisasm base; + Compiler* c; + Heap* heap; + char mnem_buf[X64_DASM_MNEM_CAP]; + char ops_buf[X64_DASM_OPS_CAP]; + char ann_buf[X64_DASM_ANN_CAP]; + StrBuf mnem; + StrBuf ops; + StrBuf ann; +} X64Disasm; + +typedef struct X64Rex { + u8 present; + u8 w; + u8 r; + u8 x; + u8 b; +} X64Rex; + +static const char* x64_reg_name(u32 reg, u32 width) { + static const char* r64[16] = { + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + }; + static const char* r32[16] = { + "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", + }; + return width == 4u ? r32[reg & 15u] : r64[reg & 15u]; +} + +static void put_reg(StrBuf* sb, u32 reg, u32 width) { + strbuf_putc(sb, '%'); + strbuf_puts(sb, x64_reg_name(reg, width)); +} + +static void put_imm(StrBuf* sb, i64 imm) { + strbuf_putc(sb, '$'); + strbuf_put_i64(sb, imm); +} + +static u32 parse_rex(const u8* bytes, size_t len, X64Rex* rex) { + memset(rex, 0, sizeof *rex); + if (len && bytes[0] >= 0x40u && bytes[0] <= 0x4fu) { + u8 b = bytes[0]; + rex->present = 1; + rex->w = (u8)((b >> 3) & 1u); + rex->r = (u8)((b >> 2) & 1u); + rex->x = (u8)((b >> 1) & 1u); + rex->b = (u8)(b & 1u); + return 1; + } + return 0; +} + +static void put_mem(StrBuf* sb, u32 base, i32 disp, int has_base) { + if (disp != 0 || !has_base) strbuf_put_i64(sb, (i64)disp); + if (has_base) { + strbuf_putc(sb, '('); + if (base == X64_REG_RIP) { + strbuf_puts(sb, "%rip"); + } else { + put_reg(sb, base, 8); + } + strbuf_putc(sb, ')'); + } +} + +static u32 read_disp(const u8* bytes, size_t len, u32 off, u32 n, i32* out) { + if (off + n > len) return 0; + if (n == 1u) { + *out = (i32)(i8)bytes[off]; + } else if (n == 4u) { + *out = (i32)rd_u32_le(bytes + off); + } else { + *out = 0; + } + return n; +} + +static u32 put_rm_operand(StrBuf* sb, const u8* bytes, size_t len, u32 off, + X64Rex rex, u32 rm, u32 mod, u32 width) { + if (mod == 3u) { + put_reg(sb, rm | ((u32)rex.b << 3), width); + return 0; + } + + if ((rm & 7u) == 4u) { + u8 sib; + u32 base; + i32 disp = 0; + u32 used = 1; + if (off >= len) return (u32)-1; + sib = bytes[off]; + base = (sib & 7u) | ((u32)rex.b << 3); + if (mod == 0u && (sib & 7u) == 5u) { + if (!read_disp(bytes, len, off + used, 4, &disp)) return (u32)-1; + used += 4; + put_mem(sb, X64_REG_RIP, disp, 0); + return used; + } + if (mod == 1u) { + if (!read_disp(bytes, len, off + used, 1, &disp)) return (u32)-1; + used += 1; + } else if (mod == 2u) { + if (!read_disp(bytes, len, off + used, 4, &disp)) return (u32)-1; + used += 4; + } + put_mem(sb, base, disp, 1); + return used; + } + + { + u32 base = rm | ((u32)rex.b << 3); + i32 disp = 0; + u32 used = 0; + if (mod == 0u && (rm & 7u) == 5u) { + if (!read_disp(bytes, len, off, 4, &disp)) return (u32)-1; + put_mem(sb, X64_REG_RIP, disp, 1); + return 4; + } + if (mod == 1u) { + if (!read_disp(bytes, len, off, 1, &disp)) return (u32)-1; + used = 1; + } else if (mod == 2u) { + if (!read_disp(bytes, len, off, 4, &disp)) return (u32)-1; + used = 4; + } + put_mem(sb, base, disp, 1); + return used; + } +} + +static void x64_unknown(X64Disasm* d, u8 byte) { + strbuf_reset(&d->mnem); + strbuf_puts(&d->mnem, ".byte"); + strbuf_reset(&d->ops); + strbuf_put_hex_u64(&d->ops, byte); +} + +static void set_mnemonic(X64Disasm* d, const char* s) { + strbuf_reset(&d->mnem); + strbuf_puts(&d->mnem, s); + strbuf_reset(&d->ops); +} + +static u32 decode_modrm_two_operand(X64Disasm* d, const u8* bytes, size_t len, + u32 off, X64Rex rex, const char* mnem, + u32 width, int reg_is_src) { + u8 mr; + u32 mod; + u32 reg; + u32 rm; + u32 used; + if (off >= len) return 0; + mr = bytes[off++]; + mod = (mr >> 6) & 3u; + reg = ((mr >> 3) & 7u) | ((u32)rex.r << 3); + rm = mr & 7u; + set_mnemonic(d, mnem); + if (reg_is_src) { + put_reg(&d->ops, reg, width); + strbuf_puts(&d->ops, ", "); + used = put_rm_operand(&d->ops, bytes, len, off, rex, rm, mod, width); + } else { + used = put_rm_operand(&d->ops, bytes, len, off, rex, rm, mod, width); + strbuf_puts(&d->ops, ", "); + put_reg(&d->ops, reg, width); + } + if (used == (u32)-1) return 0; + return 1u + used; +} + +static u32 x64_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, + CfreeInsn* out) { + X64Disasm* d = (X64Disasm*)base; + X64Rex rex; + u32 off; + u8 op; + u32 consumed = 1; + + if (!len) return 0; + off = parse_rex(bytes, len, &rex); + if (off >= len) return 0; + op = bytes[off++]; + + if (op == 0x90u) { + set_mnemonic(d, "nop"); + } else if (op == 0xc3u) { + set_mnemonic(d, "ret"); + } else if (op >= 0xb8u && op <= 0xbfu) { + u32 reg = (op & 7u) | ((u32)rex.b << 3); + set_mnemonic(d, rex.w ? "movq" : "movl"); + if (rex.w) { + if (off + 8u > len) return 0; + put_imm(&d->ops, (i64)rd_u64_le(bytes + off)); + off += 8u; + } else { + if (off + 4u > len) return 0; + put_imm(&d->ops, (i64)(i32)rd_u32_le(bytes + off)); + off += 4u; + } + strbuf_puts(&d->ops, ", "); + put_reg(&d->ops, reg, rex.w ? 8u : 4u); + } else if (op == 0x89u) { + consumed = decode_modrm_two_operand( + d, bytes, len, off, rex, rex.w ? "movq" : "movl", rex.w ? 8u : 4u, 1); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x8bu) { + consumed = decode_modrm_two_operand( + d, bytes, len, off, rex, rex.w ? "movq" : "movl", rex.w ? 8u : 4u, 0); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x8du) { + consumed = decode_modrm_two_operand(d, bytes, len, off, rex, "leaq", 8u, 0); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x31u) { + consumed = decode_modrm_two_operand(d, bytes, len, off, rex, "xorl", 4u, 1); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x85u) { + consumed = decode_modrm_two_operand( + d, bytes, len, off, rex, rex.w ? "testq" : "testl", rex.w ? 8u : 4u, 1); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x63u && rex.w) { + consumed = + decode_modrm_two_operand(d, bytes, len, off, rex, "movslq", 4u, 0); + if (!consumed) return 0; + off += consumed; + } else if (op == 0x0fu) { + u8 op2; + if (off >= len) return 0; + op2 = bytes[off++]; + if (op2 == 0x0bu) { + set_mnemonic(d, "ud2"); + } else if (op2 == 0x44u) { + consumed = decode_modrm_two_operand(d, bytes, len, off, rex, "cmoveq", + rex.w ? 8u : 4u, 0); + if (!consumed) return 0; + off += consumed; + } else { + x64_unknown(d, op2); + } + } else if (op == 0xffu) { + u8 mr; + u32 sub; + u32 rm; + if (off >= len) return 0; + mr = bytes[off++]; + sub = (mr >> 3) & 7u; + rm = (mr & 7u) | ((u32)rex.b << 3); + if (((mr >> 6) & 3u) == 3u && (sub == 2u || sub == 4u)) { + set_mnemonic(d, sub == 2u ? "callq" : "jmpq"); + strbuf_putc(&d->ops, '*'); + put_reg(&d->ops, rm, 8); + } else { + x64_unknown(d, op); + } + } else if ((op == 0x81u || op == 0x83u) && off < len) { + u8 mr = bytes[off++]; + u32 mod = (mr >> 6) & 3u; + u32 sub = (mr >> 3) & 7u; + u32 rm = (mr & 7u) | ((u32)rex.b << 3); + if (mod == 3u && sub == 4u) { + i64 imm; + set_mnemonic(d, rex.w ? "andq" : "andl"); + if (op == 0x83u) { + if (off >= len) return 0; + imm = (i64)(i8)bytes[off++]; + } else { + if (off + 4u > len) return 0; + imm = (i64)(i32)rd_u32_le(bytes + off); + off += 4u; + } + put_imm(&d->ops, imm); + strbuf_puts(&d->ops, ", "); + put_reg(&d->ops, rm, rex.w ? 8u : 4u); + } else { + x64_unknown(d, op); + } + } else { + x64_unknown(d, op); + } + + strbuf_reset(&d->ann); + out->vaddr = vaddr; + out->bytes = bytes; + out->nbytes = off; + out->mnemonic = strbuf_cstr(&d->mnem); + out->operands = strbuf_cstr(&d->ops); + out->annotation = strbuf_cstr(&d->ann); + return off; +} + +static void x64_destroy(ArchDisasm* base) { + X64Disasm* d = (X64Disasm*)base; + d->heap->free(d->heap, d, sizeof *d); +} + +ArchDisasm* x64_disasm_new(Compiler* c) { + Heap* h = (Heap*)c->ctx->heap; + X64Disasm* d = (X64Disasm*)h->alloc(h, sizeof *d, _Alignof(X64Disasm)); + if (!d) return NULL; + memset(d, 0, sizeof *d); + d->c = c; + d->heap = h; + d->base.decode = x64_decode; + d->base.destroy = x64_destroy; + strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); + strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); + strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf); + return &d->base; +} diff --git a/src/arch/x64/disasm.h b/src/arch/x64/disasm.h @@ -0,0 +1,8 @@ +#ifndef CFREE_ARCH_X64_DISASM_H +#define CFREE_ARCH_X64_DISASM_H + +#include "arch/arch.h" + +ArchDisasm* x64_disasm_new(Compiler*); + +#endif diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -55,14 +55,14 @@ typedef struct XAllocaPatch { typedef struct XImpl { CGTarget base; SrcLoc loc; - const CGFuncDesc *fd; + const CGFuncDesc* fd; u32 func_start; u32 prologue_pos; u32 prologue_nbytes; MCLabel epilogue_label; - XSlot *slots; + XSlot* slots; u32 nslots; u32 slots_cap; u32 cum_off; @@ -87,19 +87,19 @@ typedef struct XImpl { u8 has_planned_regs; u8 pad1[3]; - XScope *scopes; + XScope* scopes; u32 nscopes; u32 scopes_cap; - XAllocaPatch *alloca_patches; + XAllocaPatch* alloca_patches; u32 nalloca_patches; u32 alloca_patches_cap; } XImpl; -static inline XImpl *impl_of(CGTarget *t) { return (XImpl *)t; } +static inline XImpl* impl_of(CGTarget* t) { return (XImpl*)t; } -extern void debug_emit_row(Debug *, ObjSecId text_section, u32 offset, SrcLoc); -extern void debug_func_pc_range(Debug *, ObjSecId text_section, u32 begin_ofs, +extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc); +extern void debug_func_pc_range(Debug*, ObjSecId text_section, u32 begin_ofs, u32 end_ofs); /* ============================================================ @@ -118,13 +118,11 @@ static inline u32 type_byte_size(CfreeCgTypeId t) { if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I8) || t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_BOOL)) return 1; - if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I16)) - return 2; + if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I16)) return 2; if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I32) || t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) return 4; - if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F128)) - return 16; + if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F128)) return 16; return 8; } static inline int type_is_signed(CfreeCgTypeId t) { @@ -132,7 +130,7 @@ static inline int type_is_signed(CfreeCgTypeId t) { return 0; } -static inline _Noreturn void x_panic(CGTarget *t, const char *what) { +static inline _Noreturn void x_panic(CGTarget* t, const char* what) { SrcLoc loc = impl_of(t)->loc; compiler_panic(t->c, loc, "x64: %s not implemented", what); } @@ -152,79 +150,80 @@ extern const u32 g_int_arg_regs[6]; /* --- emit.c exports (lifecycle used by ops.c vtable constructor, * encoding helpers used by alloc.c and ops.c) --- */ -void x_func_begin(CGTarget *t, const CGFuncDesc *fd); -void x_func_begin_known_frame(CGTarget *t, const CGFuncDesc *fd, - const CGKnownFrameDesc *frame, - FrameSlot *out_slots); -void x_func_end(CGTarget *t); +void x_func_begin(CGTarget* t, const CGFuncDesc* fd); +void x_func_begin_known_frame(CGTarget* t, const CGFuncDesc* fd, + const CGKnownFrameDesc* frame, + FrameSlot* out_slots); +void x_func_end(CGTarget* t); -void x_coord_vtable_init(CGTarget *t); +void x_coord_vtable_init(CGTarget* t); /* encoding helpers */ -void emit_u32le(MCEmitter *mc, u32 v); -void emit_rex(MCEmitter *mc, int w, u32 reg, u32 index, u32 rm); -void emit_rex_force(MCEmitter *mc, int w, u32 reg, u32 index, u32 rm); +void emit_u32le(MCEmitter* mc, u32 v); +void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm); +void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm); u8 modrm(u32 mod, u32 reg, u32 rm); u8 sib(u32 scale, u32 index, u32 base); -void emit_mem_operand(MCEmitter *mc, u32 reg, u32 base, i32 disp); -void emit_rm_reg(MCEmitter *mc, u32 reg, u32 rm); -void emit_mov_rr(MCEmitter *mc, int w, u32 dst, u32 src); -void emit_mov_load(MCEmitter *mc, u32 size, int signed_ext, u32 dst, u32 base, +void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp); +void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm); +void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src); +void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base, i32 disp); -void emit_mov_store(MCEmitter *mc, u32 size, u32 src, u32 base, i32 disp); -void emit_lea(MCEmitter *mc, u32 dst, u32 base, i32 disp); -void emit_ret(MCEmitter *mc); -void x64_emit_load_imm(MCEmitter *mc, int is64, u32 dst, i64 imm); -void emit_alu_rr(MCEmitter *mc, int w, u8 op, u32 dst, u32 src); -void emit_imul_rr(MCEmitter *mc, int w, u32 dst, u32 src); -void emit_f7_rm(MCEmitter *mc, int w, u32 sub, u32 reg); -void emit_shift_cl(MCEmitter *mc, int w, u32 sub, u32 reg); -void emit_shift_imm(MCEmitter *mc, int w, u32 sub, u32 reg, u8 imm); -void emit_cqo_or_cdq(MCEmitter *mc, int w); -void emit_xor_self(MCEmitter *mc, int w, u32 r); -void emit_cmp_imm8(MCEmitter *mc, int w, u32 reg, i8 imm); -void emit_alu_imm8(MCEmitter *mc, int w, u32 sub, u32 reg, i8 imm); -void emit_alu_imm32(MCEmitter *mc, int w, u32 sub, u32 reg, i32 imm); -void emit_imul_imm8(MCEmitter *mc, int w, u32 dst, u32 src, i8 imm); -void emit_imul_imm32(MCEmitter *mc, int w, u32 dst, u32 src, i32 imm); +void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp); +void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp); +void emit_ret(MCEmitter* mc); +void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm); +void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src); +void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src); +void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg); +void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg); +void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm); +void emit_cqo_or_cdq(MCEmitter* mc, int w); +void emit_xor_self(MCEmitter* mc, int w, u32 r); +void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm); +void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm); +void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm); +void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm); +void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm); int imm_fits_i8(i64 imm); int imm_fits_i32(i64 imm); -void emit_test_self(MCEmitter *mc, int w, u32 reg); -void emit_setcc(MCEmitter *mc, u32 cc, u32 reg); -void emit_movzx_r32_r8(MCEmitter *mc, u32 dst, u32 src); -void emit_extend_rr(MCEmitter *mc, int w, int signed_ext, u32 src_size, u32 dst, +void emit_test_self(MCEmitter* mc, int w, u32 reg); +void emit_setcc(MCEmitter* mc, u32 cc, u32 reg); +void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src); +void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size, u32 dst, u32 src); -void emit_sse_rr(MCEmitter *mc, u8 prefix, u8 opcode, u32 dst, u32 src); -void emit_sse_load(MCEmitter *mc, u8 prefix, u8 opcode, u32 dst, u32 base, +void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src); +void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, i32 disp); -void emit_sse_store(MCEmitter *mc, u8 prefix, u8 opcode, u32 src, u32 base, +void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, i32 disp); -void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst, +void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, u32 src); /* --- alloc.c exports (used by emit.c and/or ops.c) --- */ -XSlot *x64_slot_get(XImpl *a, FrameSlot fs); -FrameSlot x_frame_slot(CGTarget *t, const FrameSlotDesc *d); -CGLocalStorage x_param(CGTarget *t, const CGParamDesc *p); -void x_spill_reg(CGTarget *t, Operand src, FrameSlot slot, MemAccess ma); -void x_reload_reg(CGTarget *t, Operand dst, FrameSlot slot, MemAccess ma); -Label x_label_new(CGTarget *t); -void x_label_place(CGTarget *t, Label l); -void emit_jmp_label(MCEmitter *mc, MCLabel l); -void emit_jcc_label(MCEmitter *mc, u32 cc, MCLabel l); -void x_jump(CGTarget *t, Label l); -void x_cmp_branch(CGTarget *t, CmpOp op, Operand a, Operand b, Label l); -void x_load_label_addr(CGTarget *t, Operand dst, Label l); -void x_indirect_branch(CGTarget *t, Operand addr, const Label *targets, +XSlot* x64_slot_get(XImpl* a, FrameSlot fs); +int x_resolve_reg_name(CGTarget* t, Sym name, Reg* out, RegClass* cls_out); +FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); +CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p); +void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); +void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); +Label x_label_new(CGTarget* t); +void x_label_place(CGTarget* t, Label l); +void emit_jmp_label(MCEmitter* mc, MCLabel l); +void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l); +void x_jump(CGTarget* t, Label l); +void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l); +void x_load_label_addr(CGTarget* t, Operand dst, Label l); +void x_indirect_branch(CGTarget* t, Operand addr, const Label* targets, u32 ntargets); -void x_cmp(CGTarget *t, CmpOp op, Operand dst, Operand a, Operand b); -CGScope x_scope_begin(CGTarget *t, const CGScopeDesc *d); -void x_scope_else(CGTarget *t, CGScope s); -void x_scope_end(CGTarget *t, CGScope s); -void x_break_to(CGTarget *t, CGScope s); -void x_continue_to(CGTarget *t, CGScope s); -u32 x64_force_reg_int(CGTarget *t, Operand op, int w, u32 scratch); +void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b); +CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d); +void x_scope_else(CGTarget* t, CGScope s); +void x_scope_end(CGTarget* t, CGScope s); +void x_break_to(CGTarget* t, CGScope s); +void x_continue_to(CGTarget* t, CGScope s); +u32 x64_force_reg_int(CGTarget* t, Operand op, int w, u32 scratch); /* --- ops.c exports (used by alloc.c) --- */ -void x_load(CGTarget *t, Operand dst, Operand addr, MemAccess ma); -void x_store(CGTarget *t, Operand addr, Operand src, MemAccess ma); +void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma); +void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -13,14 +13,14 @@ #include <string.h> #include "arch/arch.h" -#include "arch/x64/x64.h" +#include "arch/x64/asm.h" +#include "arch/x64/internal.h" #include "arch/x64/isa.h" +#include "arch/x64/x64.h" #include "core/arena.h" #include "core/pool.h" #include "obj/obj.h" -#include "arch/x64/internal.h" - /* ============================================================ * Data movement */ @@ -75,7 +75,7 @@ static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) { emit_rex(t->mc, 0, dst_x, 0, 0); u8 op[2] = {0x0F, 0x10}; t->mc->emit_bytes(t->mc, op, 2); - u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */ + u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */ t->mc->emit_bytes(t->mc, &mr, 1); u32 disp_pos = t->mc->pos(t->mc); emit_u32le(t->mc, 0); @@ -177,7 +177,7 @@ static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym, emit_rex(t->mc, 1, dst_reg, 0, 0); u8 op = 0x8D; t->mc->emit_bytes(t->mc, &op, 1); - u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ + u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ t->mc->emit_bytes(t->mc, &mr, 1); u32 disp_pos = t->mc->pos(t->mc); emit_u32le(t->mc, 0); @@ -425,8 +425,8 @@ static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off); return scratch; } - compiler_panic(t->c, impl_of(t)->loc, - "x64 agg: address kind %d unsupported", (int)op.kind); + compiler_panic(t->c, impl_of(t)->loc, "x64 agg: address kind %d unsupported", + (int)op.kind); } static void x_copy_bytes(CGTarget* t, Operand da, Operand sa, @@ -524,7 +524,7 @@ static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src, emit_mov_load(t->mc, storage_bytes, 0, X64_RAX, base, (i32)bf.storage_offset); x64_emit_load_imm(t->mc, w, X64_RCX, (i64)~mask); - emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */ + emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */ if (src.kind == OPK_IMM) { u64 v = ((u64)src.v.imm & ones) << lsb; @@ -532,14 +532,14 @@ static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src, } else if (src.kind == OPK_REG) { emit_mov_rr(t->mc, w, X64_RCX, src.v.reg & 0xFu); x64_emit_load_imm(t->mc, w, X64_RDX, (i64)ones); - emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */ + emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */ if (lsb) emit_shift_imm(t->mc, w, 4u, X64_RCX, (u8)lsb); } else { compiler_panic(t->c, impl_of(t)->loc, "x64 bitfield_store: src kind %d unsupported", (int)src.kind); } - emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */ + emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */ emit_mov_store(t->mc, storage_bytes, X64_RAX, base, (i32)bf.storage_offset); } @@ -559,11 +559,21 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra); u8 opcode; switch (op) { - case BO_FADD: opcode = 0x58; break; - case BO_FSUB: opcode = 0x5C; break; - case BO_FMUL: opcode = 0x59; break; - case BO_FDIV: opcode = 0x5E; break; - default: opcode = 0x58; break; + case BO_FADD: + opcode = 0x58; + break; + case BO_FSUB: + opcode = 0x5C; + break; + case BO_FMUL: + opcode = 0x59; + break; + case BO_FDIV: + opcode = 0x5E; + break; + default: + opcode = 0x58; + break; } emit_sse_rr(mc, prefix2, opcode, rd, rb); return; @@ -593,10 +603,10 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, } if (op == BO_SDIV || op == BO_SREM) { emit_cqo_or_cdq(mc, w); - emit_f7_rm(mc, w, 7u, rb); /* idiv */ + emit_f7_rm(mc, w, 7u, rb); /* idiv */ } else { emit_xor_self(mc, w, X64_RDX); - emit_f7_rm(mc, w, 6u, rb); /* div */ + emit_f7_rm(mc, w, 6u, rb); /* div */ } u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX; if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg); @@ -636,11 +646,14 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, case BO_XOR: case BO_IMUL: { if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { - Operand t_op = a_op; a_op = b_op; b_op = t_op; + Operand t_op = a_op; + a_op = b_op; + b_op = t_op; } break; } - default: break; + default: + break; } /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding @@ -664,12 +677,24 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, } else { u32 sub; switch (op) { - case BO_IADD: sub = 0u; break; - case BO_OR: sub = 1u; break; - case BO_AND: sub = 4u; break; - case BO_ISUB: sub = 5u; break; - case BO_XOR: sub = 6u; break; - default: sub = 0u; break; /* unreachable */ + case BO_IADD: + sub = 0u; + break; + case BO_OR: + sub = 1u; + break; + case BO_AND: + sub = 4u; + break; + case BO_ISUB: + sub = 5u; + break; + case BO_XOR: + sub = 6u; + break; + default: + sub = 0u; + break; /* unreachable */ } if (imm_fits_i8(imm)) { if (rd != ra) emit_mov_rr(mc, w, rd, ra); @@ -690,15 +715,26 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, if (rd != ra) emit_mov_rr(mc, w, rd, ra); u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); switch (op) { - case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break; - case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break; - case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break; - case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break; - case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break; - case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break; + case BO_IADD: + emit_alu_rr(mc, w, 0x01, rd, rb); + break; + case BO_ISUB: + emit_alu_rr(mc, w, 0x29, rd, rb); + break; + case BO_AND: + emit_alu_rr(mc, w, 0x21, rd, rb); + break; + case BO_OR: + emit_alu_rr(mc, w, 0x09, rd, rb); + break; + case BO_XOR: + emit_alu_rr(mc, w, 0x31, rd, rb); + break; + case BO_IMUL: + emit_imul_rr(mc, w, rd, rb); + break; default: - compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl", - (int)op); + compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl", (int)op); } } @@ -715,8 +751,8 @@ static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { "x64 unop: FP neg requires FP REG operand"); } ra = a_op.v.reg & 0xFu; - if (rd != ra) emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0xF2 : 0xF3, - 0x10, rd, ra); + if (rd != ra) + emit_sse_rr(mc, type_is_fp_double(dst.type) ? 0xF2 : 0xF3, 0x10, rd, ra); memset(mask_bytes, 0, sizeof mask_bytes); if (type_is_fp_double(dst.type)) { mask_bytes[7] = 0x80u; @@ -762,8 +798,7 @@ static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { emit_movzx_r32_r8(mc, rd, rd); return; default: - compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl", - (int)op); + compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl", (int)op); } } @@ -796,8 +831,7 @@ static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { int w_src = type_is_64(src.type) ? 1 : 0; u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; if (k == CV_ITOF_U && w_src == 1) { - compiler_panic(t->c, a->loc, - "x64 convert: u64→fp not yet implemented"); + compiler_panic(t->c, a->loc, "x64 convert: u64→fp not yet implemented"); } if (k == CV_ITOF_U) { /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */ @@ -813,8 +847,7 @@ static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { int w_dst = type_is_64(dst.type) ? 1 : 0; u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3; if (k == CV_FTOI_U && w_dst == 1) { - compiler_panic(t->c, a->loc, - "x64 convert: fp→u64 not yet implemented"); + compiler_panic(t->c, a->loc, "x64 convert: fp→u64 not yet implemented"); } emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs); return; @@ -847,8 +880,7 @@ static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { /* ============================================================ * Calls / return */ -static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset, - int tail) { +static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset, int tail) { XImpl* a = impl_of(t); Operand addr; memset(&addr, 0, sizeof addr); @@ -925,8 +957,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } if (to_stack) { Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); - emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu, - addr.v.ind.ofs); + emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu, addr.v.ind.ofs); *stack_off += 8; } return; @@ -1034,8 +1065,8 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } } -static void count_arg_stack(const CGABIValue* av, u32* next_int, - u32* next_fp, u32* stack_off) { +static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp, + u32* stack_off) { ABIArgInfo va_ai; ABIArgPart va_pt; const ABIArgInfo* ai = av->abi; @@ -1089,8 +1120,7 @@ static u32 x_tail_collect_cs_regs(const XImpl* a, Reg* cs_regs) { u32 cs_used = 0; for (u32 i = 0; i < 5u; ++i) { Reg r = g_int_order[i]; - if (a->used_cs_int_mask & (1u << r)) - cs_regs[cs_used++] = r; + if (a->used_cs_int_mask & (1u << r)) cs_regs[cs_used++] = r; } return cs_used; } @@ -1118,8 +1148,7 @@ static void x_tail_branch(CGTarget* t, Operand callee) { MCEmitter* mc = t->mc; if (callee.kind == OPK_REG) { u32 r = callee.v.reg & 0xFu; - if (r != X64_R11) - emit_mov_rr(mc, 1, X64_R11, r); + if (r != X64_R11) emit_mov_rr(mc, 1, X64_R11, r); x_tail_restore_frame(t); emit_rex(mc, 0, 0, 0, X64_R11); u8 buf[2] = {0xFF, modrm(3u, 4u, X64_R11)}; @@ -1186,8 +1215,8 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { u32 disp_pos = mc->pos(mc); emit_u32le(mc, 0); mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32, - d->callee.v.global.sym, - d->callee.v.global.addend - 4, 1, 0); + d->callee.v.global.sym, d->callee.v.global.addend - 4, 1, + 0); } else if (d->callee.kind == OPK_REG) { u32 r = d->callee.v.reg & 0xFu; emit_rex(mc, 0, 0, 0, r); @@ -1209,15 +1238,16 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { for (u16 i = 0; i < ri->nparts; ++i) { const ABIArgPart* p = &ri->parts[i]; u32 src_reg; - if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++]; - else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++); - else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl", - (int)p->cls); + if (p->cls == ABI_CLASS_INT) + src_reg = ret_int_regs[next_int_ret++]; + else if (p->cls == ABI_CLASS_FP) + src_reg = (u32)(X64_XMM0 + next_fp_ret++); + else + compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl", (int)p->cls); if (rs.kind == OPK_REG) { if (ri->nparts != 1) { - compiler_panic(t->c, a->loc, - "x64 call: REG ret_storage with %u parts", + compiler_panic(t->c, a->loc, "x64 call: REG ret_storage with %u parts", (unsigned)ri->nparts); } if (p->cls == ABI_CLASS_INT) { @@ -1248,11 +1278,11 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off); } - } else if (rs.kind == OPK_IMM && rs.type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_VOID)) { + } else if (rs.kind == OPK_IMM && + rs.type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_VOID)) { /* void ret placeholder — nothing to do. */ } else { - compiler_panic(t->c, a->loc, - "x64 call: ret_storage kind %d unsupported", + compiler_panic(t->c, a->loc, "x64 call: ret_storage kind %d unsupported", (int)rs.kind); } } @@ -1266,8 +1296,7 @@ static void x_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (p->flags & CG_CALL_TAIL) { if (p->has_sret) - compiler_panic(t->c, impl_of(t)->loc, - "x64 tail call: sret unsupported"); + compiler_panic(t->c, impl_of(t)->loc, "x64 tail call: sret unsupported"); x_check_tail_stack_args(t, x_call_plan_stack_raw_size(p)); x_tail_branch(t, p->callee); return; @@ -1279,8 +1308,8 @@ static void x_emit_call_plan(CGTarget* t, const CGCallPlan* p) { u32 disp_pos = mc->pos(mc); emit_u32le(mc, 0); mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32, - p->callee.v.global.sym, - p->callee.v.global.addend - 4, 1, 0); + p->callee.v.global.sym, p->callee.v.global.addend - 4, 1, + 0); } else if (p->callee.kind == OPK_REG) { u32 r = p->callee.v.reg & 0xFu; emit_rex(mc, 0, 0, 0, r); @@ -1329,8 +1358,7 @@ static void x_load_call_arg(CGTarget* t, Operand dst, const CGCallPlanMove* m) { x_load(t, dst, src, m->mem); } -static void x_store_call_ret(CGTarget* t, const CGCallPlanRet* r, - Operand src) { +static void x_store_call_ret(CGTarget* t, const CGCallPlanRet* r, Operand src) { Operand dst = r->dst; if (dst.kind == OPK_INDIRECT) dst.v.ind.ofs += (i32)r->dst_offset; if (dst.kind == OPK_LOCAL) { @@ -1477,8 +1505,8 @@ static void x_ret(CGTarget* t, const CGABIValue* val) { const ABIArgPart* pt = &ri2->parts[i]; i32 off = base_off + (i32)pt->src_offset; if (pt->cls == ABI_CLASS_INT) { - emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++], - base_reg, off); + emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++], base_reg, + off); } else if (pt->cls == ABI_CLASS_FP) { u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3; emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++), @@ -1533,8 +1561,8 @@ static void x_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { if (d.kind != OPK_REG) compiler_panic(t->c, a->loc, "x64 alloca: dst must be REG"); if (align > 16) { - compiler_panic(t->c, a->loc, - "x64 alloca: align %u > 16 not yet supported", align); + compiler_panic(t->c, a->loc, "x64 alloca: align %u > 16 not yet supported", + align); } if (sz.kind == OPK_IMM) { @@ -1847,7 +1875,7 @@ static u32 atomic_addr_base(CGTarget* t, Operand addr, i32* out_disp) { static void x_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, MemOrder ord) { MCEmitter* mc = t->mc; - (void)ord; /* x86: plain MOV satisfies all orders for loads. */ + (void)ord; /* x86: plain MOV satisfies all orders for loads. */ u32 sz = ma.size ? ma.size : type_byte_size(dst.type); i32 disp; u32 base = atomic_addr_base(t, addr, &disp); @@ -1891,7 +1919,7 @@ static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, Operand val, MemAccess ma, MemOrder ord) { XImpl* a = impl_of(t); MCEmitter* mc = t->mc; - (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */ + (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */ u32 sz = ma.size ? ma.size : type_byte_size(dst.type); int w = (sz == 8) ? 1 : 0; i32 disp; @@ -1907,7 +1935,7 @@ static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, } else if (val.kind == OPK_REG) { u32 vr = val.v.reg & 0xFu; if (vr != X64_R11) emit_mov_rr(mc, w, X64_R11, vr); - if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */ + if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */ } else { compiler_panic(t->c, a->loc, "x64 atomic_rmw: val kind %d unsupported", (int)val.kind); @@ -1953,7 +1981,7 @@ static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, break; case AO_NAND: emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11); - emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */ + emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */ break; default: compiler_panic(t->c, a->loc, "x64 atomic_rmw: op %d unimpl", (int)op); @@ -2156,7 +2184,8 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, b64 |= b64 << 32; x64_emit_load_imm(mc, 1, X64_RAX, (i64)b64); } else if (bv.kind == OPK_REG) { - /* Broadcast low byte of bv across 8 bytes: rax = bv * 0x0101010101010101. */ + /* Broadcast low byte of bv across 8 bytes: rax = bv * + * 0x0101010101010101. */ x64_emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll); emit_mov_rr(mc, 1, X64_RAX, bv.v.reg & 0xFu); emit_imul_rr(mc, 1, X64_RAX, X64_R11); @@ -2230,12 +2259,10 @@ static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX); if (rd != ra) emit_mov_rr(mc, w, rd, ra); u32 rb = x64_force_reg_int(t, b_op, w, X64_R11); - u8 op = (kind == INTRIN_SADD_OVERFLOW || - kind == INTRIN_UADD_OVERFLOW) + u8 op = (kind == INTRIN_SADD_OVERFLOW || kind == INTRIN_UADD_OVERFLOW) ? 0x01 : 0x29; - u32 cc = (kind == INTRIN_UADD_OVERFLOW || - kind == INTRIN_USUB_OVERFLOW) + u32 cc = (kind == INTRIN_UADD_OVERFLOW || kind == INTRIN_USUB_OVERFLOW) ? X64_CC_B : X64_CC_O; emit_alu_rr(mc, w, op, rd, rb); @@ -2291,16 +2318,25 @@ static void x_asm_block(CGTarget* t, const char* tmpl, const AsmConstraint* outs, u32 no, Operand* oo, const AsmConstraint* ins, u32 ni, const Operand* io, const Sym* clobs, u32 nc) { - (void)tmpl; - (void)outs; - (void)no; - (void)oo; - (void)ins; - (void)ni; - (void)io; - (void)clobs; - (void)nc; - x_panic(t, "asm_block"); + XImpl* a_impl = impl_of(t); + u32 i; + X64Asm* a; + for (i = 0; i < nc; ++i) { + Reg phys; + RegClass cls; + if (!t->resolve_reg_name || + t->resolve_reg_name(t, clobs[i], &phys, &cls) != 0) + continue; + if (cls == RC_INT) { + if (phys == X64_RBX || phys == X64_RBP || phys == X64_R12 || + phys == X64_R13 || phys == X64_R14 || phys == X64_R15) + a_impl->used_cs_int_mask |= 1u << phys; + } + } + a = x64_asm_open(t->c); + x64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc); + x64_asm_run_template(a, t->mc, tmpl); + x64_asm_close(a); } static void x_set_loc(CGTarget* t, SrcLoc l) { @@ -2328,6 +2364,7 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->frame_slot = x_frame_slot; t->param = x_param; + t->resolve_reg_name = x_resolve_reg_name; t->spill_reg = x_spill_reg; t->reload_reg = x_reload_reg; diff --git a/src/arch/x64/regs.c b/src/arch/x64/regs.c @@ -0,0 +1,100 @@ +/* x86-64 register name table - DWARF index <-> assembler name. */ + +#include "arch/x64/regs.h" + +#include <string.h> + +typedef struct X64Reg { + uint32_t dwarf_idx; + const char* name; +} X64Reg; + +static const X64Reg X64_REGS[] = { + {0, "rax"}, {1, "rdx"}, {2, "rcx"}, {3, "rbx"}, {4, "rsi"}, + {5, "rdi"}, {6, "rbp"}, {7, "rsp"}, {8, "r8"}, {9, "r9"}, + {10, "r10"}, {11, "r11"}, {12, "r12"}, {13, "r13"}, {14, "r14"}, + {15, "r15"}, {16, "rip"}, {17, "xmm0"}, {18, "xmm1"}, {19, "xmm2"}, + {20, "xmm3"}, {21, "xmm4"}, {22, "xmm5"}, {23, "xmm6"}, {24, "xmm7"}, + {25, "xmm8"}, {26, "xmm9"}, {27, "xmm10"}, {28, "xmm11"}, {29, "xmm12"}, + {30, "xmm13"}, {31, "xmm14"}, {32, "xmm15"}, +}; + +static const uint32_t X64_REGS_N = + (uint32_t)(sizeof X64_REGS / sizeof X64_REGS[0]); + +static int gpr_alias_index(const char* name, const uint32_t* map, + uint32_t* idx_out) { + static const char* aliases[16][5] = { + {"rax", "eax", "ax", "al", NULL}, + {"rcx", "ecx", "cx", "cl", NULL}, + {"rdx", "edx", "dx", "dl", NULL}, + {"rbx", "ebx", "bx", "bl", NULL}, + {"rsp", "esp", "sp", "spl", NULL}, + {"rbp", "ebp", "bp", "bpl", NULL}, + {"rsi", "esi", "si", "sil", NULL}, + {"rdi", "edi", "di", "dil", NULL}, + {"r8", "r8d", "r8w", "r8b", NULL}, + {"r9", "r9d", "r9w", "r9b", NULL}, + {"r10", "r10d", "r10w", "r10b", NULL}, + {"r11", "r11d", "r11w", "r11b", NULL}, + {"r12", "r12d", "r12w", "r12b", NULL}, + {"r13", "r13d", "r13w", "r13b", NULL}, + {"r14", "r14d", "r14w", "r14b", NULL}, + {"r15", "r15d", "r15w", "r15b", NULL}, + }; + uint32_t i; + if (!name) return 1; + for (i = 0; i < 16u; ++i) { + uint32_t j; + for (j = 0; aliases[i][j]; ++j) { + if (!strcmp(name, aliases[i][j])) { + if (idx_out) *idx_out = map[i]; + return 0; + } + } + } + return 1; +} + +const char* x64_register_name(uint32_t dwarf_idx) { + uint32_t i; + for (i = 0; i < X64_REGS_N; ++i) { + if (X64_REGS[i].dwarf_idx == dwarf_idx) return X64_REGS[i].name; + } + return NULL; +} + +int x64_register_index(const char* name, uint32_t* idx_out) { + uint32_t i; + static const uint32_t dwarf[16] = { + 0, 2, 1, 3, 7, 6, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, + }; + if (!name) return 1; + if (name[0] == '%') ++name; + for (i = 0; i < X64_REGS_N; ++i) { + if (!strcmp(X64_REGS[i].name, name)) { + if (idx_out) *idx_out = X64_REGS[i].dwarf_idx; + return 0; + } + } + return gpr_alias_index(name, dwarf, idx_out); +} + +int x64_register_hw_index(const char* name, uint32_t* idx_out) { + static const uint32_t hw[16] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + }; + if (!name) return 1; + if (name[0] == '%') ++name; + return gpr_alias_index(name, hw, idx_out); +} + +uint32_t x64_register_iter_size(void) { return X64_REGS_N; } + +int x64_register_iter_get(uint32_t i, uint32_t* dwarf_out, + const char** name_out) { + if (i >= X64_REGS_N) return 1; + if (dwarf_out) *dwarf_out = X64_REGS[i].dwarf_idx; + if (name_out) *name_out = X64_REGS[i].name; + return 0; +} diff --git a/src/arch/x64/regs.h b/src/arch/x64/regs.h @@ -0,0 +1,13 @@ +#ifndef CFREE_ARCH_X64_REGS_H +#define CFREE_ARCH_X64_REGS_H + +#include <stdint.h> + +const char* x64_register_name(uint32_t dwarf_idx); +int x64_register_index(const char* name, uint32_t* idx_out); +int x64_register_hw_index(const char* name, uint32_t* idx_out); +uint32_t x64_register_iter_size(void); +int x64_register_iter_get(uint32_t i, uint32_t* dwarf_out, + const char** name_out); + +#endif diff --git a/test/asm/decode/nop_ret.targets b/test/asm/decode/nop_ret.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/decode/x64_mov_ret.expected.txt b/test/asm/decode/x64_mov_ret.expected.txt @@ -0,0 +1,2 @@ +0: movl $42, %eax +5: ret diff --git a/test/asm/decode/x64_mov_ret.hex b/test/asm/decode/x64_mov_ret.hex @@ -0,0 +1 @@ +b82a000000c3 diff --git a/test/asm/decode/x64_mov_ret.targets b/test/asm/decode/x64_mov_ret.targets @@ -0,0 +1 @@ +x64 diff --git a/test/asm/decode/x64_nop_ret.expected.txt b/test/asm/decode/x64_nop_ret.expected.txt @@ -0,0 +1,2 @@ +0: nop +1: ret diff --git a/test/asm/decode/x64_nop_ret.hex b/test/asm/decode/x64_nop_ret.hex @@ -0,0 +1 @@ +90c3 diff --git a/test/asm/decode/x64_nop_ret.targets b/test/asm/decode/x64_nop_ret.targets @@ -0,0 +1 @@ +x64 diff --git a/test/asm/encode/aa64_stp_ldp_q.targets b/test/asm/encode/aa64_stp_ldp_q.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/exit_zero.targets b/test/asm/encode/exit_zero.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/x64_exit_42.expected b/test/asm/encode/x64_exit_42.expected @@ -0,0 +1 @@ +42 diff --git a/test/asm/encode/x64_exit_42.expected.hex b/test/asm/encode/x64_exit_42.expected.hex @@ -0,0 +1 @@ +b82a000000c3 diff --git a/test/asm/encode/x64_exit_42.s b/test/asm/encode/x64_exit_42.s @@ -0,0 +1,6 @@ +// x64 smoke case: defines test_main returning 42. +.text +.globl test_main +test_main: + movl $42, %eax + ret diff --git a/test/asm/encode/x64_exit_42.targets b/test/asm/encode/x64_exit_42.targets @@ -0,0 +1 @@ +x64 diff --git a/test/asm/listing/nop_ret.targets b/test/asm/listing/nop_ret.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/run.sh b/test/asm/run.sh @@ -95,18 +95,25 @@ color_yel() { printf '\033[33m%s\033[0m' "$1"; } note_pass() { PASS=$((PASS+1)); printf ' %s %s\n' "$(color_grn PASS)" "$1"; } note_fail() { FAIL=$((FAIL+1)); FAIL_NAMES+=("$1"); printf ' %s %s\n' "$(color_red FAIL)" "$1"; } note_skip() { SKIP=$((SKIP+1)); SKIP_NAMES+=("$1"); printf ' %s %s — %s\n' "$(color_yel SKIP)" "$1" "$2"; } +note_na() { printf ' %s %s — not applicable to %s\n' "$(color_yel SKIP-NA)" "$1" "$TEST_ARCH"; } # ---- tool detection (mirrors test/parse/run.sh) ---------------------------- have_clang_cross=0 have_exe_runner=0 have_jit_runner=0 +have_qemu=0 +have_podman=0 is_aarch64=0 if clang $CLANG_TARGET -c -x c - -o /dev/null < /dev/null 2>/dev/null; then have_clang_cross=1 fi +QEMU_BIN="$(command -v qemu-aarch64-static 2>/dev/null || command -v qemu-aarch64 2>/dev/null || true)" +[ -n "$QEMU_BIN" ] && have_qemu=1 +command -v podman >/dev/null 2>&1 && have_podman=1 + arch_raw="$(uname -m 2>/dev/null || true)" { [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_aarch64=1 @@ -201,6 +208,20 @@ diff_case() { fi } +case_applies() { + local dir="$1" name="$2" targets tuple + targets="$dir/$name.targets" + [ -f "$targets" ] || return 0 + for tuple in $(cat "$targets"); do + case "$tuple:$TEST_ARCH" in + aa64:aa64|aarch64:aa64|arm64:aa64) return 0 ;; + x64:x64|x86_64:x64|amd64:x64) return 0 ;; + rv64:rv64|riscv64:rv64) return 0 ;; + esac + done + return 1 +} + # ---- decode and listing loops — single-path, golden-driven only ----------- if [ $RUN_T -eq 1 ] && [ -d "$TEST_DIR/decode" ]; then @@ -208,6 +229,10 @@ if [ $RUN_T -eq 1 ] && [ -d "$TEST_DIR/decode" ]; then [ -e "$in_path" ] || continue name="$(basename "$in_path" .hex)" [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue + if ! case_applies "$TEST_DIR/decode" "$name"; then + note_na "$name/T" + continue + fi work="$BUILD_DIR/asm/decode/$name" mkdir -p "$work" if [ -e "$TEST_DIR/decode/$name.skip" ]; then @@ -237,6 +262,10 @@ if [ $RUN_L -eq 1 ] && [ -d "$TEST_DIR/listing" ]; then [ -e "$in_path" ] || continue name="$(basename "$in_path" .in.bin)" [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue + if ! case_applies "$TEST_DIR/listing" "$name"; then + note_na "$name/L" + continue + fi work="$BUILD_DIR/asm/listing/$name" mkdir -p "$work" if [ -e "$TEST_DIR/listing/$name.skip" ]; then @@ -274,6 +303,13 @@ if [ -d "$TEST_DIR/encode" ]; then [ -e "$src" ] || continue name="$(basename "$src" .s)" [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue + if ! case_applies "$TEST_DIR/encode" "$name"; then + [ $RUN_H -eq 1 ] && note_na "$name/H" + [ $RUN_D -eq 1 ] && note_na "$name/D" + [ $RUN_J -eq 1 ] && note_na "$name/J" + [ $RUN_E -eq 1 ] && note_na "$name/E" + continue + fi work="$BUILD_DIR/asm/encode/$name" mkdir -p "$work" diff --git a/test/parse/cases/cg_x64_inline_asm_mov.c b/test/parse/cases/cg_x64_inline_asm_mov.c @@ -0,0 +1,9 @@ +int test_main(void) { +#if defined(__x86_64__) + long out; + __asm__ volatile("movq %1, %0" : "=r"(out) : "r"(42)); + return (int)out; +#else + return 42; +#endif +} diff --git a/test/parse/cases/cg_x64_inline_asm_mov.expected b/test/parse/cases/cg_x64_inline_asm_mov.expected @@ -0,0 +1 @@ +42