kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ca3d78eacf3e52586542094ebca90c079b9d9edd
parent 3499424fbf006dd515ad568fbe3e6f5907119437
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 16:08:15 -0700

src/opt src/cg breakup into multiple files

Diffstat:
Msrc/abi/abi.c | 2+-
Msrc/abi/abi_aapcs64.c | 2+-
Msrc/abi/abi_rv64.c | 2+-
Msrc/abi/abi_sysv_x64.c | 2+-
Dsrc/api/cg.c | 7005-------------------------------------------------------------------------------
Dsrc/api/cg_api.h | 23-----------------------
Dsrc/api/cg_type.h | 77-----------------------------------------------------------------------------
Asrc/cg/arith.c | 896+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/asm.c | 321+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/atomic.c | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/call.c | 317+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/control.c | 698+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/data.c | 292+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/debug.c | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/internal.h | 490+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/local.c | 171+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/memory.c | 597+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/session.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/symbol.c | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/type.c | 965+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/type.h | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/value.c | 1425+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/wide.c | 192+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/opt/opt.c | 1071+------------------------------------------------------------------------------
Asrc/opt/opt_internal.h | 35+++++++++++++++++++++++++++++++++++
Asrc/opt/opt_util.c | 5+++++
Asrc/opt/pass_combine.c | 552+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/opt/pass_dce.c | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/opt/pass_emit.c | 1043+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/opt/pass_hard_live.c | 269+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/opt/pass_loop.c | 150+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/opt/pass_lower.c | 1343++-----------------------------------------------------------------------------
Asrc/opt/pass_machinize.c | 198+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
33 files changed, 9450 insertions(+), 9483 deletions(-)

diff --git a/src/abi/abi.c b/src/abi/abi.c @@ -14,8 +14,8 @@ #include <string.h> #include "abi/abi_internal.h" -#include "api/cg_type.h" #include "arch/arch.h" +#include "cg/type.h" #include "core/arena.h" #include "core/core.h" diff --git a/src/abi/abi_aapcs64.c b/src/abi/abi_aapcs64.c @@ -14,7 +14,7 @@ #include <string.h> #include "abi/abi_internal.h" -#include "api/cg_type.h" +#include "cg/type.h" #include "core/arena.h" #include "core/core.h" diff --git a/src/abi/abi_rv64.c b/src/abi/abi_rv64.c @@ -14,7 +14,7 @@ #include <string.h> #include "abi/abi_internal.h" -#include "api/cg_type.h" +#include "cg/type.h" #include "core/arena.h" #include "core/core.h" diff --git a/src/abi/abi_sysv_x64.c b/src/abi/abi_sysv_x64.c @@ -16,7 +16,7 @@ #include <string.h> #include "abi/abi_internal.h" -#include "api/cg_type.h" +#include "cg/type.h" #include "core/arena.h" #include "core/core.h" diff --git a/src/api/cg.c b/src/api/cg.c @@ -1,7005 +0,0 @@ -#include <cfree/cg.h> -#include <stdarg.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -#include "abi/abi.h" -#include "api/cg_api.h" -#include "api/cg_type.h" -#include "arch/arch.h" -#include "arch/regalloc.h" -#include "asm/asm.h" -#include "asm/asm_lex.h" -#include "core/arena.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/segvec.h" -#include "debug/debug.h" -#include "obj/obj.h" -#include "opt/opt.h" - -typedef enum CgApiTypeKind { - CG_API_TYPE_PTR, - CG_API_TYPE_ARRAY, - CG_API_TYPE_ALIAS, - CG_API_TYPE_RECORD, - CG_API_TYPE_ENUM, - CG_API_TYPE_FUNC, -} CgApiTypeKind; - -typedef struct CgApiType { - CgType cg; - CfreeCgTypeId base; - CfreeSym name; - u32 count; - u32 flags; - u32 address_space; - u64 array_count; - const CfreeCgField *fields; - const CfreeCgEnumValue *values; - const CfreeCgFuncParam *params; - CfreeCgAbiAttrs ret_attrs; - CfreeCgCallConv call_conv; - u8 kind; - u8 abi_variadic; - u8 pad[2]; -} CgApiType; - -SEGVEC_DEFINE(CgApiTypes, CgApiType, CG_API_TYPE_SEG_SHIFT); - -typedef struct CgApiState { - Heap *heap; - CgApiTypes types; - CgType builtins[CFREE_CG_BUILTIN_COUNT]; - u8 builtins_init; - u8 pad[3]; -} CgApiState; - -static CfreeCgTypeId type_id_from_tuple(u32 seg, u32 index) { - return (CfreeCgTypeId)((seg << CG_API_TYPE_SEG_SHIFT) | - (index & CG_API_TYPE_SEG_MASK)); -} - -static CfreeCgTypeId builtin_id(CfreeCgBuiltinType t) { - return type_id_from_tuple(CG_API_TYPE_BUILTIN_SEG, (u32)t); -} - -static int decode_user_id(CfreeCgTypeId id, u32 *index_out) { - u32 seg = id >> CG_API_TYPE_SEG_SHIFT; - u32 off = id & CG_API_TYPE_SEG_MASK; - if (seg < CG_API_TYPE_USER_SEG_BIAS) - return 0; - *index_out = - ((seg - CG_API_TYPE_USER_SEG_BIAS) << CG_API_TYPE_SEG_SHIFT) | off; - return 1; -} - -static CfreeCgTypeId user_id_from_index(u32 index) { - u32 raw_seg = index >> CG_API_TYPE_SEG_SHIFT; - u32 off = index & CG_API_TYPE_SEG_MASK; - u32 seg_limit = UINT32_MAX >> CG_API_TYPE_SEG_SHIFT; - if (raw_seg > seg_limit - CG_API_TYPE_USER_SEG_BIAS) { - return CFREE_CG_TYPE_NONE; - } - return type_id_from_tuple(raw_seg + CG_API_TYPE_USER_SEG_BIAS, off); -} - -static CfreeCgTypeId type_id_for_user_index(u32 index) { - return user_id_from_index(index); -} - -static u64 cg_align_to(u64 n, u32 align) { - u64 a = align ? (u64)align : 1u; - return ((n + a - 1u) / a) * a; -} - -static void builtin_cg_type_init(Compiler *c, CgType *out, - CfreeCgBuiltinType t) { - memset(out, 0, sizeof(*out)); - switch (t) { - case CFREE_CG_BUILTIN_VOID: - out->kind = CFREE_CG_TYPE_VOID; - out->align = 1; - break; - case CFREE_CG_BUILTIN_BOOL: - out->kind = CFREE_CG_TYPE_BOOL; - out->size = 1; - out->align = 1; - out->integer.width = 8; - break; - case CFREE_CG_BUILTIN_I8: - out->kind = CFREE_CG_TYPE_INT; - out->size = 1; - out->align = 1; - out->integer.width = 8; - break; - case CFREE_CG_BUILTIN_I16: - out->kind = CFREE_CG_TYPE_INT; - out->size = 2; - out->align = 2; - out->integer.width = 16; - break; - case CFREE_CG_BUILTIN_I32: - out->kind = CFREE_CG_TYPE_INT; - out->size = 4; - out->align = 4; - out->integer.width = 32; - break; - case CFREE_CG_BUILTIN_I64: - out->kind = CFREE_CG_TYPE_INT; - out->size = 8; - out->align = 8; - out->integer.width = 64; - break; - case CFREE_CG_BUILTIN_I128: - out->kind = CFREE_CG_TYPE_INT; - out->size = 16; - out->align = 16; - out->integer.width = 128; - break; - case CFREE_CG_BUILTIN_F32: - out->kind = CFREE_CG_TYPE_FLOAT; - out->size = 4; - out->align = 4; - out->fp.width = 32; - break; - case CFREE_CG_BUILTIN_F64: - out->kind = CFREE_CG_TYPE_FLOAT; - out->size = 8; - out->align = 8; - out->fp.width = 64; - break; - case CFREE_CG_BUILTIN_F128: - out->kind = CFREE_CG_TYPE_FLOAT; - out->size = 16; - out->align = 16; - out->fp.width = 128; - break; - case CFREE_CG_BUILTIN_VARARG_STATE: { - ABITypeInfo info = abi_va_list_info(c->abi); - out->kind = CFREE_CG_TYPE_VARARG_STATE; - out->size = info.size; - out->align = info.align ? info.align : 1; - break; - } - case CFREE_CG_BUILTIN_COUNT: - break; - } -} - -static void cg_api_init_builtins(Compiler *c, CgApiState *s) { - if (s->builtins_init) - return; - for (u32 i = 0; i < CFREE_CG_BUILTIN_COUNT; ++i) { - builtin_cg_type_init(c, &s->builtins[i], (CfreeCgBuiltinType)i); - } - s->builtins_init = 1; -} - -static CgApiState *cg_api_get(Compiler *c) { - Heap *h; - CgApiState *s; - if (!c) - return NULL; - if (c->cg_api) - return (CgApiState *)c->cg_api; - h = (Heap *)c->ctx->heap; - s = (CgApiState *)h->alloc(h, sizeof(*s), _Alignof(CgApiState)); - if (!s) - return NULL; - memset(s, 0, sizeof(*s)); - s->heap = h; - CgApiTypes_init(&s->types, h); - c->cg_api = s; - c->cg_api_free = cg_api_fini; - cg_api_init_builtins(c, s); - return s; -} - -static CgApiType *api_type_from_id(Compiler *c, CfreeCgTypeId id); - -const CgType *cg_type_get(Compiler *c, CfreeCgTypeId id) { - u32 seg; - u32 off; - CgApiState *s; - CgApiType *e; - if (!c || id == CFREE_CG_TYPE_NONE) - return NULL; - seg = id >> CG_API_TYPE_SEG_SHIFT; - off = id & CG_API_TYPE_SEG_MASK; - if (seg == CG_API_TYPE_BUILTIN_SEG) { - if (off >= CFREE_CG_BUILTIN_COUNT) - return NULL; - s = cg_api_get(c); - if (!s) - return NULL; - cg_api_init_builtins(c, s); - return &s->builtins[off]; - } - e = api_type_from_id(c, id); - return e ? &e->cg : NULL; -} - -uint64_t cg_type_size(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return ty ? ty->size : 0; -} - -uint32_t cg_type_align(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return ty ? ty->align : 0; -} - -int cg_type_is_int(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { - return cg_type_is_int(c, ty->alias.base); - } - return ty && - (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL || - ty->kind == CFREE_CG_TYPE_ENUM); -} - -int cg_type_is_float(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { - return cg_type_is_float(c, ty->alias.base); - } - return ty && ty->kind == CFREE_CG_TYPE_FLOAT; -} - -int cg_type_is_ptr(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { - return cg_type_is_ptr(c, ty->alias.base); - } - return ty && ty->kind == CFREE_CG_TYPE_PTR; -} - -int cg_type_is_record(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { - return cg_type_is_record(c, ty->alias.base); - } - return ty && ty->kind == CFREE_CG_TYPE_RECORD; -} - -static int cg_type_is_void(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) - return cg_type_is_void(c, ty->alias.base); - return ty && ty->kind == CFREE_CG_TYPE_VOID; -} - -static int cg_type_is_aggregate(Compiler *c, CfreeCgTypeId id) { - return cg_type_is_record(c, id); -} - -static CfreeCgTypeId cg_type_ptr_to(Compiler *c, CfreeCgTypeId pointee) { - return cfree_cg_type_ptr(c, pointee, 0); -} - -static CfreeCgTypeId cg_type_pointee(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) - return cg_type_pointee(c, ty->alias.base); - return ty && ty->kind == CFREE_CG_TYPE_PTR ? ty->ptr.pointee - : CFREE_CG_TYPE_NONE; -} - -static CfreeCgTypeId cg_type_func_ret_id(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) - return cg_type_func_ret_id(c, ty->alias.base); - return ty && ty->kind == CFREE_CG_TYPE_FUNC ? ty->func.ret - : CFREE_CG_TYPE_NONE; -} - -static CfreeCgTypeId cg_type_func_param_id(Compiler *c, CfreeCgTypeId id, - u32 index) { - const CgType *ty = cg_type_get(c, id); - if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) - return cg_type_func_param_id(c, ty->alias.base, index); - if (!ty || ty->kind != CFREE_CG_TYPE_FUNC || index >= ty->func.nparams) - return CFREE_CG_TYPE_NONE; - return ty->func.params[index].type; -} - -static CgApiType *type_alloc(Compiler *c, CfreeCgTypeId *id_out) { - CgApiState *s = cg_api_get(c); - CgApiType *e; - u32 index; - if (!s) - return NULL; - e = CgApiTypes_push(&s->types, &index); - if (!e) - return NULL; - *id_out = user_id_from_index(index); - if (*id_out == CFREE_CG_TYPE_NONE) - return NULL; - return e; -} - -static CfreeCgTypeId find_ptr_type_id(Compiler *c, CfreeCgTypeId pointee, - u32 address_space) { - CgApiState *s; - u32 n; - if (!c || !c->cg_api) - return CFREE_CG_TYPE_NONE; - s = (CgApiState *)c->cg_api; - n = CgApiTypes_count(&s->types); - for (u32 i = 0; i < n; ++i) { - CgApiType *e = CgApiTypes_at(&s->types, i); - if (e && e->kind == CG_API_TYPE_PTR && e->base == pointee && - e->address_space == address_space) - return type_id_for_user_index(i); - } - return CFREE_CG_TYPE_NONE; -} - -static CfreeCgTypeId find_array_type_id(Compiler *c, CfreeCgTypeId elem, - u64 count) { - CgApiState *s; - u32 n; - if (!c || !c->cg_api) - return CFREE_CG_TYPE_NONE; - s = (CgApiState *)c->cg_api; - n = CgApiTypes_count(&s->types); - for (u32 i = 0; i < n; ++i) { - CgApiType *e = CgApiTypes_at(&s->types, i); - if (e && e->kind == CG_API_TYPE_ARRAY && e->base == elem && - e->array_count == count) - return type_id_for_user_index(i); - } - return CFREE_CG_TYPE_NONE; -} - -static int cg_params_eq(const CfreeCgFuncParam *a, const CfreeCgFuncParam *b, u32 n) { - for (u32 i = 0; i < n; ++i) - if (a[i].type != b[i].type || - memcmp(&a[i].attrs, &b[i].attrs, sizeof(a[i].attrs)) != 0) { - return 0; - } - return 1; -} - -static CfreeCgTypeId find_func_type_id(Compiler *c, CfreeCgFuncSig sig) { - CgApiState *s; - u32 n; - if (!c || !c->cg_api) - return CFREE_CG_TYPE_NONE; - s = (CgApiState *)c->cg_api; - n = CgApiTypes_count(&s->types); - for (u32 i = 0; i < n; ++i) { - CgApiType *e = CgApiTypes_at(&s->types, i); - if (!e || e->kind != CG_API_TYPE_FUNC) - continue; - if (e->base != sig.ret || e->count != sig.nparams) - continue; - if (e->abi_variadic != (sig.abi_variadic != 0)) - continue; - if (e->call_conv != sig.call_conv) - continue; - if (memcmp(&e->ret_attrs, &sig.ret_attrs, sizeof(e->ret_attrs)) != 0) { - continue; - } - if (sig.nparams && !cg_params_eq(e->params, sig.params, sig.nparams)) { - continue; - } - return type_id_for_user_index(i); - } - return CFREE_CG_TYPE_NONE; -} - -static CgApiType *api_type_from_id(Compiler *c, CfreeCgTypeId id) { - u32 index; - CgApiState *s; - CgApiType *e; - if (!c || id == CFREE_CG_TYPE_NONE) - return NULL; - if ((id >> CG_API_TYPE_SEG_SHIFT) == CG_API_TYPE_BUILTIN_SEG) - return NULL; - if (!decode_user_id(id, &index)) - return NULL; - s = (CgApiState *)c->cg_api; - if (!s) - return NULL; - e = CgApiTypes_at(&s->types, index); - return e; -} - -static CfreeCgTypeId resolve_type(Compiler *c, CfreeCgTypeId id) { - return cg_type_get(c, id) ? id : CFREE_CG_TYPE_NONE; -} - -static CfreeCgTypeId api_unalias_type(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - while (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { - id = ty->alias.base; - ty = cg_type_get(c, id); - } - return ty ? id : CFREE_CG_TYPE_NONE; -} - -static CfreeCgFuncParam *copy_cg_params(Compiler *c, const CfreeCgFuncParam *src, - u32 n) { - CfreeCgFuncParam *dst; - if (!n) - return NULL; - if (!src) - return NULL; - dst = arena_array(&c->global->arena, CfreeCgFuncParam, n); - if (!dst) - return NULL; - memcpy(dst, src, sizeof(*dst) * n); - return dst; -} - -static CgTypeField *copy_cg_fields(Compiler *c, const CfreeCgField *src, - u32 n) { - CgTypeField *dst; - if (!n) - return NULL; - if (!src) - return NULL; - dst = arena_array(&c->global->arena, CgTypeField, n); - if (!dst) - return NULL; - memset(dst, 0, sizeof(*dst) * n); - for (u32 i = 0; i < n; ++i) { - dst[i].name = src[i].name; - dst[i].type = src[i].type; - dst[i].align_override = src[i].align_override; - dst[i].flags = src[i].flags; - dst[i].bit_width = src[i].bit_width; - dst[i].bit_signed = src[i].bit_signed != 0; - } - return dst; -} - -static int cg_type_layout_record(Compiler *c, CgType *cg) { - u32 max_align = 1; - u64 size = 0; - if (!c || !cg || cg->kind != CFREE_CG_TYPE_RECORD) - return 0; - if (cg->record.nfields && !cg->record.fields) - return 0; - if (cg->record.is_union) { - for (u32 i = 0; i < cg->record.nfields; ++i) { - CgTypeField *f = &cg->record.fields[i]; - u64 fsize = cg_type_size(c, f->type); - u32 falign = cg_type_align(c, f->type); - if (!falign) - return 0; - if (f->align_override == 1u) { - falign = 1; - } else if (f->align_override > falign) { - falign = f->align_override; - } - if (falign > max_align) - max_align = falign; - if ((f->flags & CFREE_CG_FIELD_BITFIELD) != 0) { - f->offset = 0; - f->bit_offset = 0; - f->bit_storage_size = (u32)fsize; - if (f->bit_width == 0) - continue; - } - if (fsize > size) - size = fsize; - f->offset = 0; - } - } else { - u64 off = 0; - int active_bitfield_unit = 0; - u64 unit_off = 0; - u32 unit_bits = 0; - u32 unit_size = 0; - u32 next_bit = 0; - for (u32 i = 0; i < cg->record.nfields; ++i) { - CgTypeField *f = &cg->record.fields[i]; - u64 fsize = cg_type_size(c, f->type); - u32 falign = cg_type_align(c, f->type); - if (!falign) - return 0; - if (f->align_override == 1u) { - falign = 1; - } else if (f->align_override > falign) { - falign = f->align_override; - } - if (falign > max_align) - max_align = falign; - if ((f->flags & CFREE_CG_FIELD_BITFIELD) != 0) { - if (fsize > UINT32_MAX / 8u) - return 0; - if (f->bit_width == 0) { - if (active_bitfield_unit) - off = unit_off + unit_size; - off = cg_align_to(off, falign); - f->offset = off; - f->bit_offset = 0; - f->bit_storage_size = (u32)fsize; - active_bitfield_unit = 0; - next_bit = 0; - continue; - } - if (f->bit_width > fsize * 8u) - return 0; - if (!active_bitfield_unit || unit_size != (u32)fsize || - next_bit + f->bit_width > unit_bits) { - if (active_bitfield_unit) - off = unit_off + unit_size; - off = cg_align_to(off, falign); - unit_off = off; - unit_size = (u32)fsize; - unit_bits = unit_size * 8u; - next_bit = 0; - active_bitfield_unit = 1; - } - f->offset = unit_off; - f->bit_offset = (u16)next_bit; - f->bit_storage_size = unit_size; - next_bit += f->bit_width; - off = unit_off + unit_size; - continue; - } - active_bitfield_unit = 0; - off = cg_align_to(off, falign); - f->offset = off; - off += fsize; - } - size = off; - } - if (cg->record.align_override > max_align) { - max_align = cg->record.align_override; - } - cg->align = max_align; - cg->size = cg_align_to(size, max_align); - return 1; -} - -static int cg_type_set_ptr(Compiler *c, CgApiType *e, CfreeCgTypeId pointee, - u32 address_space) { - u32 ptr_size; - u32 ptr_align; - if (!cg_type_get(c, pointee)) - return 0; - memset(&e->cg, 0, sizeof(e->cg)); - ptr_size = c->target.ptr_size ? c->target.ptr_size : 8; - ptr_align = c->target.ptr_align ? c->target.ptr_align : ptr_size; - e->cg.kind = CFREE_CG_TYPE_PTR; - e->cg.size = ptr_size; - e->cg.align = ptr_align; - e->cg.ptr.pointee = pointee; - e->cg.ptr.address_space = address_space; - return 1; -} - -static int cg_type_set_array(Compiler *c, CgApiType *e, CfreeCgTypeId elem, - u64 count) { - const CgType *ety = cg_type_get(c, elem); - if (!ety) - return 0; - memset(&e->cg, 0, sizeof(e->cg)); - e->cg.kind = CFREE_CG_TYPE_ARRAY; - e->cg.size = ety->size * count; - e->cg.align = ety->align; - e->cg.array.elem = elem; - e->cg.array.count = count; - return 1; -} - -static int cg_type_set_alias(Compiler *c, CgApiType *e, CfreeSym name, - CfreeCgTypeId base) { - const CgType *bty = cg_type_get(c, base); - if (!bty) - return 0; - memset(&e->cg, 0, sizeof(e->cg)); - e->cg.kind = CFREE_CG_TYPE_ALIAS; - e->cg.size = bty->size; - e->cg.align = bty->align; - e->cg.alias.name = name; - e->cg.alias.base = base; - return 1; -} - -static int cg_type_set_record(Compiler *c, CgApiType *e, CfreeSym tag, - const CfreeCgField *fields, u32 nfields, - int is_union, u32 align_override, u32 flags) { - CgTypeField *copied = copy_cg_fields(c, fields, nfields); - if (nfields && !copied) - return 0; - memset(&e->cg, 0, sizeof(e->cg)); - e->cg.kind = CFREE_CG_TYPE_RECORD; - e->cg.record.tag = tag; - e->cg.record.fields = copied; - e->cg.record.nfields = nfields; - e->cg.record.is_union = is_union != 0; - e->cg.record.align_override = align_override; - e->cg.record.flags = flags; - return cg_type_layout_record(c, &e->cg); -} - -static int cg_type_set_enum(Compiler *c, CgApiType *e, CfreeSym tag, - CfreeCgTypeId base, CfreeCgEnumValue *values, - u32 nvalues) { - const CgType *bty; - if (base == CFREE_CG_TYPE_NONE) - base = builtin_id(CFREE_CG_BUILTIN_I32); - bty = cg_type_get(c, base); - if (!bty || - !(bty->kind == CFREE_CG_TYPE_INT || bty->kind == CFREE_CG_TYPE_BOOL)) { - return 0; - } - memset(&e->cg, 0, sizeof(e->cg)); - e->cg.kind = CFREE_CG_TYPE_ENUM; - e->cg.size = bty->size; - e->cg.align = bty->align; - e->cg.enum_.tag = tag; - e->cg.enum_.base = base; - e->cg.enum_.values = values; - e->cg.enum_.nvalues = nvalues; - return 1; -} - -static int cg_type_set_func(Compiler *c, CgApiType *e, CfreeCgFuncSig sig, - CfreeCgFuncParam *params) { - if (!cg_type_get(c, sig.ret)) - return 0; - for (u32 i = 0; i < sig.nparams; ++i) { - if (!cg_type_get(c, sig.params[i].type)) - return 0; - } - memset(&e->cg, 0, sizeof(e->cg)); - e->cg.kind = CFREE_CG_TYPE_FUNC; - e->cg.size = 1; - e->cg.align = 1; - e->cg.func.ret = sig.ret; - e->cg.func.params = params; - e->cg.func.nparams = sig.nparams; - e->cg.func.call_conv = sig.call_conv; - e->cg.func.abi_variadic = sig.abi_variadic != 0; - e->cg.func.ret_attrs = sig.ret_attrs; - return 1; -} - -CfreeCgBuiltinTypes cfree_cg_builtin_types(CfreeCompiler *c) { - CfreeCgBuiltinTypes out; - (void)c; - memset(&out, 0, sizeof(out)); - for (u32 i = 0; i < CFREE_CG_BUILTIN_COUNT; ++i) { - out.id[i] = builtin_id((CfreeCgBuiltinType)i); - } - return out; -} - -CfreeCgTypeId cfree_cg_type_ptr(CfreeCompiler *c, CfreeCgTypeId pointee, - uint32_t address_space) { - CfreeCgTypeId id; - CgApiType *e; - if (!cg_type_get(c, pointee)) - return CFREE_CG_TYPE_NONE; - id = find_ptr_type_id(c, pointee, address_space); - if (id != CFREE_CG_TYPE_NONE) - return id; - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->base = pointee; - e->address_space = address_space; - e->kind = CG_API_TYPE_PTR; - if (!cg_type_set_ptr(c, e, pointee, address_space)) { - return CFREE_CG_TYPE_NONE; - } - return id; -} - -CfreeCgTypeId cfree_cg_type_array(CfreeCompiler *c, CfreeCgTypeId elem, - uint64_t count) { - CfreeCgTypeId id; - CgApiType *e; - if (!cg_type_get(c, elem) || count > UINT32_MAX) - return CFREE_CG_TYPE_NONE; - id = find_array_type_id(c, elem, count); - if (id != CFREE_CG_TYPE_NONE) - return id; - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->base = elem; - e->array_count = count; - e->kind = CG_API_TYPE_ARRAY; - if (!cg_type_set_array(c, e, elem, count)) { - return CFREE_CG_TYPE_NONE; - } - return id; -} - -CfreeCgTypeId cfree_cg_type_alias(CfreeCompiler *c, CfreeSym name, - CfreeCgTypeId base) { - CfreeCgTypeId id; - CgApiType *e; - if (!cg_type_get(c, base)) - return CFREE_CG_TYPE_NONE; - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->base = base; - e->name = name; - e->kind = CG_API_TYPE_ALIAS; - return cg_type_set_alias(c, e, name, base) ? id : CFREE_CG_TYPE_NONE; -} - -CfreeCgTypeId cfree_cg_type_record(CfreeCompiler *c, CfreeSym tag, - const CfreeCgField *fields, - uint32_t nfields) { - CfreeCgRecordDesc desc; - memset(&desc, 0, sizeof desc); - desc.tag = tag; - desc.fields = fields; - desc.nfields = nfields; - return cfree_cg_type_record_ex(c, &desc); -} - -CfreeCgTypeId cfree_cg_type_record_ex(CfreeCompiler *c, - const CfreeCgRecordDesc *desc) { - CfreeCgTypeId id; - CgApiType *e; - CfreeCgField *copied = NULL; - if (!c || !desc || (desc->nfields && !desc->fields) || - desc->nfields > UINT16_MAX) { - return CFREE_CG_TYPE_NONE; - } - if (desc->nfields) { - copied = arena_array(&c->global->arena, CfreeCgField, desc->nfields); - if (!copied) - return CFREE_CG_TYPE_NONE; - } - - for (u32 i = 0; i < desc->nfields; ++i) { - if (!cg_type_get(c, desc->fields[i].type)) - return CFREE_CG_TYPE_NONE; - copied[i] = desc->fields[i]; - } - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->name = desc->tag; - e->count = desc->nfields; - e->fields = copied; - e->kind = CG_API_TYPE_RECORD; - if (!cg_type_set_record(c, e, desc->tag, desc->fields, desc->nfields, - desc->is_union, desc->align_override, 0)) { - return CFREE_CG_TYPE_NONE; - } - return id; -} - -CfreeCgTypeId cfree_cg_type_enum(CfreeCompiler *c, CfreeSym tag, - CfreeCgTypeId base, - const CfreeCgEnumValue *values, - uint32_t nvalues) { - CfreeCgEnumValue *copied = NULL; - CfreeCgTypeId id; - CgApiType *e; - if (!c || (nvalues && !values)) - return CFREE_CG_TYPE_NONE; - if (base == CFREE_CG_TYPE_NONE) - base = builtin_id(CFREE_CG_BUILTIN_I32); - if (!cg_type_is_int(c, base)) - return CFREE_CG_TYPE_NONE; - if (nvalues) { - copied = arena_array(&c->global->arena, CfreeCgEnumValue, nvalues); - if (!copied) - return CFREE_CG_TYPE_NONE; - memcpy(copied, values, sizeof(*copied) * nvalues); - } - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->base = base; - e->name = tag; - e->count = nvalues; - e->values = copied; - e->kind = CG_API_TYPE_ENUM; - if (!cg_type_set_enum(c, e, tag, base, copied, nvalues)) { - return CFREE_CG_TYPE_NONE; - } - return id; -} - -CfreeCgTypeId cfree_cg_type_func(CfreeCompiler *c, CfreeCgFuncSig sig) { - CfreeCgFuncParam *copied = NULL; - CfreeCgTypeId id; - CgApiType *e; - if (!c || !cg_type_get(c, sig.ret) || (sig.nparams && !sig.params) || - sig.nparams > UINT16_MAX) { - return CFREE_CG_TYPE_NONE; - } - id = find_func_type_id(c, sig); - if (id != CFREE_CG_TYPE_NONE) - return id; - if (sig.nparams) { - copied = copy_cg_params(c, sig.params, sig.nparams); - if (!copied) - return CFREE_CG_TYPE_NONE; - for (u32 i = 0; i < sig.nparams; ++i) { - if (!cg_type_get(c, sig.params[i].type)) - return CFREE_CG_TYPE_NONE; - } - } - e = type_alloc(c, &id); - if (!e) - return CFREE_CG_TYPE_NONE; - e->base = sig.ret; - e->count = sig.nparams; - e->params = copied; - e->ret_attrs = sig.ret_attrs; - e->call_conv = sig.call_conv; - e->abi_variadic = sig.abi_variadic != 0; - e->kind = CG_API_TYPE_FUNC; - if (!cg_type_set_func(c, e, sig, copied)) { - return CFREE_CG_TYPE_NONE; - } - return id; -} - -uint64_t cfree_cg_type_size(CfreeCompiler *c, CfreeCgTypeId id) { - return cg_type_size(c, id); -} - -uint32_t cfree_cg_type_align(CfreeCompiler *c, CfreeCgTypeId id) { - return cg_type_align(c, id); -} - -CfreeCgTypeKind cfree_cg_type_kind(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return ty ? ty->kind : CFREE_CG_TYPE_VOID; -} - -uint32_t cfree_cg_type_int_width(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (!ty) - return 0; - if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) { - return ty->integer.width; - } - if (ty->kind == CFREE_CG_TYPE_ENUM) { - return (uint32_t)ty->size * 8u; - } - if (ty->kind == CFREE_CG_TYPE_ALIAS) { - return cfree_cg_type_int_width(c, ty->alias.base); - } - return 0; -} - -uint32_t cfree_cg_type_float_width(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (!ty) - return 0; - if (ty->kind == CFREE_CG_TYPE_FLOAT) - return ty->fp.width; - if (ty->kind == CFREE_CG_TYPE_ALIAS) { - return cfree_cg_type_float_width(c, ty->alias.base); - } - return 0; -} - -CfreeCgTypeId cfree_cg_type_ptr_pointee(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_PTR) ? ty->ptr.pointee - : CFREE_CG_TYPE_NONE; -} - -CfreeCgTypeId cfree_cg_type_array_elem(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_ARRAY) ? ty->array.elem - : CFREE_CG_TYPE_NONE; -} - -uint32_t cfree_cg_type_ptr_address_space(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_PTR) ? ty->ptr.address_space : 0u; -} - -uint64_t cfree_cg_type_array_count(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_ARRAY) ? ty->array.count : 0u; -} - -CfreeCgTypeId cfree_cg_type_func_ret(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.ret - : CFREE_CG_TYPE_NONE; -} - -uint32_t cfree_cg_type_func_nparams(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.nparams : 0; -} - -CfreeCgAbiAttrs cfree_cg_type_func_ret_attrs(CfreeCompiler *c, - CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - CfreeCgAbiAttrs empty; - memset(&empty, 0, sizeof(empty)); - return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.ret_attrs : empty; -} - -CfreeCgFuncParam cfree_cg_type_func_param(CfreeCompiler *c, CfreeCgTypeId id, - uint32_t index) { - const CgType *ty = cg_type_get(c, id); - CfreeCgFuncParam empty; - memset(&empty, 0, sizeof(empty)); - if (!ty || ty->kind != CFREE_CG_TYPE_FUNC || index >= ty->func.nparams) { - return empty; - } - return ty->func.params[index]; -} - -CfreeCgCallConv cfree_cg_type_func_call_conv(CfreeCompiler *c, - CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.call_conv - : CFREE_CG_CC_TARGET_C; -} - -int cfree_cg_type_func_is_variadic(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return ty && ty->kind == CFREE_CG_TYPE_FUNC && ty->func.abi_variadic; -} - -uint32_t cfree_cg_type_record_nfields(CfreeCompiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - return (ty && ty->kind == CFREE_CG_TYPE_RECORD) ? ty->record.nfields : 0; -} - -CfreeStatus cfree_cg_type_record_field(CfreeCompiler *c, CfreeCgTypeId id, - uint32_t index, CfreeCgField *out, - uint64_t *offset_out) { - const CgType *ty = cg_type_get(c, id); - const CgTypeField *f; - if (!ty || ty->kind != CFREE_CG_TYPE_RECORD || index >= ty->record.nfields) { - return CFREE_NOT_FOUND; - } - f = &ty->record.fields[index]; - if (out) { - out->name = f->name; - out->type = f->type; - out->align_override = f->align_override; - out->flags = f->flags; - out->bit_width = f->bit_width; - out->bit_offset = f->bit_offset; - out->bit_storage_size = f->bit_storage_size; - out->bit_signed = f->bit_signed; - } - if (offset_out) - *offset_out = f->offset; - return CFREE_OK; -} - -int cfree_cg_target_supports_call_conv(CfreeCompiler *c, CfreeCgCallConv cc) { - if (!c) - return 0; - switch (cc) { - case CFREE_CG_CC_TARGET_C: - return 1; - case CFREE_CG_CC_SYSV: - return c->target.arch == CFREE_ARCH_X86_64 && - c->target.os != CFREE_OS_WINDOWS; - case CFREE_CG_CC_WIN64: - return c->target.arch == CFREE_ARCH_X86_64 && - c->target.os == CFREE_OS_WINDOWS; - case CFREE_CG_CC_AAPCS: - return c->target.arch == CFREE_ARCH_ARM_32 || - c->target.arch == CFREE_ARCH_ARM_64; - case CFREE_CG_CC_WASM: - return c->target.arch == CFREE_ARCH_WASM; - case CFREE_CG_CC_INTERRUPT: - return 0; - } - return 0; -} - -int cfree_cg_target_supports_symbol_feature(CfreeCompiler *c, - CfreeCgSymbolFeature feat) { - if (!c) - return 0; - switch (feat) { - case CFREE_CG_SYMFEAT_WEAK: - case CFREE_CG_SYMFEAT_PROTECTED_VISIBILITY: - case CFREE_CG_SYMFEAT_COMDAT: - case CFREE_CG_SYMFEAT_COMMON: - return 1; - case CFREE_CG_SYMFEAT_TLS_LOCAL_EXEC: - case CFREE_CG_SYMFEAT_TLS_INITIAL_EXEC: - case CFREE_CG_SYMFEAT_TLS_LOCAL_DYNAMIC: - case CFREE_CG_SYMFEAT_TLS_GENERAL_DYNAMIC: - return c->target.obj == CFREE_OBJ_ELF || c->target.obj == CFREE_OBJ_MACHO; - case CFREE_CG_SYMFEAT_DLLIMPORT: - case CFREE_CG_SYMFEAT_DLLEXPORT: - case CFREE_CG_SYMFEAT_MERGE_SECTIONS: - case CFREE_CG_SYMFEAT_CONSTRUCTOR_PRIORITY: - return 0; - } - return 0; -} - -uint64_t cfree_cg_target_backend_features(CfreeCompiler *c) { - uint64_t out = 0; - if (!c) - return 0; - if (c->target.arch == CFREE_ARCH_X86_64 || - c->target.arch == CFREE_ARCH_X86_32) { - out |= CFREE_CG_BACKEND_UNALIGNED_MEMORY; - out |= CFREE_CG_BACKEND_RED_ZONE; - out |= CFREE_CG_BACKEND_SIMD; - } else { - out |= CFREE_CG_BACKEND_STRICT_ALIGNMENT; - } - return out; -} - -void cg_api_fini(Compiler *c) { - CgApiState *s; - if (!c || !c->cg_api) - return; - s = (CgApiState *)c->cg_api; - CgApiTypes_fini(&s->types); - s->heap->free(s->heap, s, sizeof(*s)); - c->cg_api = NULL; - c->cg_api_free = NULL; -} - -/* ============================================================ - * CfreeCg: public codegen API implementation - * - * Drives CGTarget directly with its own value stack. - * ============================================================ */ - -typedef enum SResidency { - RES_INHERENT, - RES_REG, - RES_SPILLED, - RES_FIXED_REG, -} SResidency; - -typedef enum ApiSValueKind { - SV_OPERAND, - SV_CMP, - SV_ARITH, -} ApiSValueKind; - -typedef enum ApiDelayedArithKind { - API_DELAYED_UNOP, - API_DELAYED_BINOP, -} ApiDelayedArithKind; - -typedef struct ApiDelayedCmp { - Operand a; - Operand b; - CmpOp op; - u8 a_owned; - u8 b_owned; - u8 pad[2]; -} ApiDelayedCmp; - -typedef struct ApiDelayedArith { - Operand a; - Operand b; - BinOp bin_op; - UnOp un_op; - u8 kind; - u8 a_owned; - u8 b_owned; - u8 pad; -} ApiDelayedArith; - -typedef struct ApiSValue { - Operand op; - union { - ApiDelayedCmp cmp; - ApiDelayedArith arith; - BitFieldAccess bitfield; - } delayed; - CfreeCgTypeId type; - u8 kind; - u8 res; - u8 pinned; - u8 lvalue; - u8 bitfield_lvalue; - FrameSlot spill_slot; - CfreeCgLocal source_local; -} ApiSValue; - -#define API_CG_STACK_INITIAL 16u - -typedef struct ApiCgScope { - Label break_lbl; - Label continue_lbl; - CGScope target_scope; - CfreeCgTypeId result_type; - FrameSlot result_slot; - u32 generation; - u8 active; - u8 pad[3]; -} ApiCgScope; - -#define API_CG_MAX_SCOPES 64 - -typedef enum ApiSourceLocalKind { - API_SOURCE_LOCAL_AUTO, - API_SOURCE_LOCAL_PARAM, -} ApiSourceLocalKind; - -typedef struct ApiSourceLocal { - CfreeCgTypeId type; - CfreeSym name; - CfreeCgLocalAttrs attrs; - SrcLoc loc; - CGLocalDesc desc; - CGLocalStorage storage; - i64 const_value; - u32 param_index; - u8 kind; - u8 const_valid; - u8 pad[2]; -} ApiSourceLocal; - -struct CfreeCg { - Compiler *c; - ObjBuilder *obj; - CGTarget *target; - MCEmitter *mc; - Debug *debug; - CGSimpleRegAlloc regalloc; - - ApiSValue *stack; - u32 sp; - u32 cap; - - ApiSourceLocal *locals; - u32 nlocals; - u32 locals_cap; - - struct { - FrameSlot *free; - u32 n; - u32 cap; - } slot_pools[3]; - - CGABIValue *avs_in_flight; - u32 avs_in_flight_n; - - CfreeCgTypeId fn_ret_type; - const ABIFuncInfo *fn_abi; - SrcLoc cur_loc; - - CGFuncDesc fn_desc; - CGParamDesc fn_params[64]; - - CfreeCgTypeId *sym_types; - CfreeCgDecl *sym_attrs; - u32 sym_cap; - - ApiCgScope scopes[API_CG_MAX_SCOPES]; - u32 nscopes; - u32 scope_generation; - - u32 rodata_counter; - - ObjSecId data_sec; - ObjSymId data_sym; - u32 data_base; - u64 data_size; -}; - -static DebugTypeId api_debug_type(CfreeCg *g, CfreeCgTypeId id) { - const CgType *ty; - if (!g || !g->debug) - return DEBUG_TYPE_NONE; - ty = cg_type_get(g->c, id); - if (!ty) - return DEBUG_TYPE_NONE; - switch (ty->kind) { - case CFREE_CG_TYPE_VOID: - return debug_type_void(g->debug); - case CFREE_CG_TYPE_BOOL: - return debug_type_base(g->debug, pool_intern_cstr(g->c->global, "_Bool"), - DEBUG_BE_BOOL, 1); - case CFREE_CG_TYPE_INT: { - const char *name = "long long"; - if (ty->integer.width <= 8) - name = "char"; - else if (ty->integer.width <= 16) - name = "short"; - else if (ty->integer.width <= 32) - name = "int"; - return debug_type_base(g->debug, pool_intern_cstr(g->c->global, name), - DEBUG_BE_SIGNED, - (u32)((ty->integer.width + 7u) / 8u)); - } - case CFREE_CG_TYPE_FLOAT: { - const char *name = ty->fp.width <= 32 ? "float" : "double"; - return debug_type_base(g->debug, pool_intern_cstr(g->c->global, name), - DEBUG_BE_FLOAT, (u32)((ty->fp.width + 7u) / 8u)); - } - case CFREE_CG_TYPE_PTR: { - DebugTypeId pointee = api_debug_type(g, ty->ptr.pointee); - if (pointee == DEBUG_TYPE_NONE) - pointee = debug_type_void(g->debug); - return debug_type_ptr(g->debug, pointee); - } - case CFREE_CG_TYPE_ARRAY: { - DebugTypeId elem = api_debug_type(g, ty->array.elem); - u32 count = ty->array.count > UINT32_MAX ? 0u : (u32)ty->array.count; - if (elem == DEBUG_TYPE_NONE) - elem = debug_type_void(g->debug); - return debug_type_array(g->debug, elem, count); - } - case CFREE_CG_TYPE_FUNC: { - Heap *h = (Heap *)g->c->ctx->heap; - DebugTypeId ret = api_debug_type(g, ty->func.ret); - DebugTypeId *params = NULL; - DebugTypeId fn; - if (ret == DEBUG_TYPE_NONE) - ret = debug_type_void(g->debug); - if (ty->func.nparams) { - params = (DebugTypeId *)h->alloc(h, sizeof(*params) * ty->func.nparams, - _Alignof(DebugTypeId)); - if (!params) - return DEBUG_TYPE_NONE; - for (u32 i = 0; i < ty->func.nparams; ++i) { - params[i] = api_debug_type(g, ty->func.params[i].type); - if (params[i] == DEBUG_TYPE_NONE) - params[i] = debug_type_void(g->debug); - } - } - fn = debug_type_func(g->debug, ret, params, ty->func.nparams, - ty->func.abi_variadic); - if (params) - h->free(h, params, sizeof(*params) * ty->func.nparams); - return fn; - } - case CFREE_CG_TYPE_RECORD: { - DebugTypeBuilder *b = - debug_type_record_begin(g->debug, (Sym)ty->record.tag, - ty->record.is_union, (u32)ty->size, ty->align); - if (!b) - return DEBUG_TYPE_NONE; - return debug_type_record_end(b); - } - case CFREE_CG_TYPE_ENUM: - return debug_type_base(g->debug, pool_intern_cstr(g->c->global, "int"), - DEBUG_BE_SIGNED, ty->size ? (u32)ty->size : 4u); - case CFREE_CG_TYPE_ALIAS: { - DebugTypeId base = api_debug_type(g, ty->alias.base); - if (base == DEBUG_TYPE_NONE) - base = debug_type_void(g->debug); - return debug_type_typedef(g->debug, (Sym)ty->alias.name, base); - } - case CFREE_CG_TYPE_VARARG_STATE: - return debug_type_void(g->debug); - } - return DEBUG_TYPE_NONE; -} - -/* ---- value stack helpers ---- */ - -static u8 api_type_class(CfreeCgTypeId ty) { - if (ty == builtin_id(CFREE_CG_BUILTIN_F32) || - ty == builtin_id(CFREE_CG_BUILTIN_F64) || - ty == builtin_id(CFREE_CG_BUILTIN_F128)) { - return RC_FP; - } - return RC_INT; -} - -static int api_is_f128_type(Compiler *c, CfreeCgTypeId ty) { - const CgType *cg; - ty = api_unalias_type(c, ty); - cg = cg_type_get(c, ty); - return cg && cg->kind == CFREE_CG_TYPE_FLOAT && cg->fp.width == 128; -} - -static int api_is_i128_type(Compiler *c, CfreeCgTypeId ty) { - const CgType *cg; - ty = api_unalias_type(c, ty); - cg = cg_type_get(c, ty); - return cg && cg->kind == CFREE_CG_TYPE_INT && cg->integer.width == 128; -} - -static int api_is_wide16_scalar_type(Compiler *c, CfreeCgTypeId ty) { - return api_is_f128_type(c, ty) || api_is_i128_type(c, ty); -} - -/* Whether a CGABIValue.storage for `ty` must be an address operand (pointing - * to a memory image of the value) rather than a value operand. Today this is - * driven by the type shape — aggregates and wide16 scalars cannot fit in a - * single Operand. A future refactor will key this off ABIArgInfo so a - * trivial-DIRECT ABI (e.g. for a C-source backend) can keep aggregates as - * value operands. See doc/CBACKEND.md. */ -static int api_arg_storage_must_be_addr(Compiler *c, CfreeCgTypeId ty) { - return cg_type_is_aggregate(c, ty) || api_is_wide16_scalar_type(c, ty); -} - -static Operand api_op_imm(i64 v, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_IMM; - o.cls = api_type_class(ty); - o.type = ty; - o.v.imm = v; - return o; -} - -static Operand api_op_reg(Reg r, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_REG; - o.cls = api_type_class(ty); - o.type = ty; - o.v.reg = r; - return o; -} - -static Operand api_op_local(FrameSlot s, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_LOCAL; - o.cls = RC_INT; - o.type = ty; - o.v.frame_slot = s; - return o; -} - -static Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_GLOBAL; - o.cls = RC_INT; - o.type = ty; - o.v.global.sym = sym; - o.v.global.addend = addend; - return o; -} - -static Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_INDIRECT; - o.cls = RC_INT; - o.type = ty; - o.v.ind.base = base; - o.v.ind.ofs = ofs; - return o; -} - -static u8 api_residency_for(const Operand *o) { - if (o->kind == OPK_REG || o->kind == OPK_INDIRECT) - return RES_REG; - return RES_INHERENT; -} - -static ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_OPERAND; - sv.op = op; - sv.type = ty; - sv.res = api_residency_for(&op); - sv.spill_slot = FRAME_SLOT_NONE; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -static ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty) { - ApiSValue sv = api_make_sv(op, ty); - sv.lvalue = 1; - return sv; -} - -static ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, - CfreeCgTypeId result_ty, int a_owned, - int b_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_CMP; - sv.type = result_ty; - sv.delayed.cmp.op = op; - sv.delayed.cmp.a = a; - sv.delayed.cmp.b = b; - sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; - sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -static ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, - int a_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_ARITH; - sv.delayed.arith.kind = API_DELAYED_UNOP; - sv.type = ty; - sv.delayed.arith.un_op = op; - sv.delayed.arith.a = a; - sv.delayed.arith.a_owned = a_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -static ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, - CfreeCgTypeId ty, int a_owned, - int b_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_ARITH; - sv.delayed.arith.kind = API_DELAYED_BINOP; - sv.type = ty; - sv.delayed.arith.bin_op = op; - sv.delayed.arith.a = a; - sv.delayed.arith.b = b; - sv.delayed.arith.a_owned = a_owned ? 1u : 0u; - sv.delayed.arith.b_owned = b_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -static ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, - int owned) { - ApiSValue sv = api_make_sv(op, ty); - if (op.kind == OPK_REG && !owned) - sv.res = RES_FIXED_REG; - return sv; -} - -static CfreeCgTypeId api_sv_type(const ApiSValue *sv) { - return sv->type ? sv->type : sv->op.type; -} - -static int api_operand_can_address(const Operand *o) { - return o->kind == OPK_LOCAL || o->kind == OPK_GLOBAL || - o->kind == OPK_INDIRECT; -} - -static int api_sv_op_is(const ApiSValue *sv, OpKind kind) { - return sv->kind == SV_OPERAND && sv->op.kind == kind; -} - -static int api_sv_op_is_reg_or_imm(const ApiSValue *sv) { - return sv->kind == SV_OPERAND && - (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG); -} - -static int api_is_lvalue_sv(const ApiSValue *sv) { - return sv->lvalue && - (sv->bitfield_lvalue || api_operand_can_address(&sv->op) || - (sv->source_local != CFREE_CG_LOCAL_NONE && - sv->op.kind == OPK_REG)); -} - -static void api_stack_grow(CfreeCg *g, u32 want) { - Heap *h = g->c->ctx->heap; - u32 cap = g->cap; - ApiSValue *nb; - if (cap >= want) - return; - while (cap < want) - cap = cap ? cap * 2u : API_CG_STACK_INITIAL; - nb = (ApiSValue *)h->alloc(h, sizeof(ApiSValue) * cap, _Alignof(ApiSValue)); - if (g->stack) { - memcpy(nb, g->stack, sizeof(ApiSValue) * g->sp); - h->free(h, g->stack, sizeof(ApiSValue) * g->cap); - } - g->stack = nb; - g->cap = cap; -} - -static void api_push(CfreeCg *g, ApiSValue v) { - api_stack_grow(g, g->sp + 1); - g->stack[g->sp++] = v; -} - -static ApiSValue api_pop(CfreeCg *g) { - if (g->sp == 0) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: stack underflow"); - } - return g->stack[--g->sp]; -} - -/* ---- register class helpers ---- */ - -static u8 api_class_of_sv(const ApiSValue *sv) { - if (sv->kind == SV_CMP || sv->kind == SV_ARITH) - return RC_INT; - if (sv->op.kind == OPK_INDIRECT) - return RC_INT; - if (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG) - return sv->op.cls; - return api_type_class(api_sv_type(sv)); -} - -static Reg api_reg_of_sv(const ApiSValue *sv) { - if (sv->kind == SV_ARITH || sv->kind == SV_CMP) - return (Reg)REG_NONE; - if (sv->op.kind == OPK_REG) - return sv->op.v.reg; - if (sv->op.kind == OPK_INDIRECT) - return sv->op.v.ind.base; - return (Reg)REG_NONE; -} - -static void api_set_owned_reg(ApiSValue *sv, Reg r) { - if (sv->op.kind == OPK_REG) - sv->op.v.reg = r; - else if (sv->op.kind == OPK_INDIRECT) - sv->op.v.ind.base = r; -} - -static CfreeCgTypeId api_owned_reg_type(CfreeCg *g, const ApiSValue *sv) { - if (sv->op.kind == OPK_INDIRECT) { - CfreeCgTypeId base = - sv->type ? sv->type : builtin_id(CFREE_CG_BUILTIN_VOID); - return cg_type_ptr_to(g->c, base); - } - return api_sv_type(sv); -} - -/* ---- spill slot management ---- */ - -static void api_take_spill_slot_alloc(CfreeCg *g, u8 cls, FrameSlot *out) { - CGTarget *T = g->target; - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.kind = FS_SPILL; - fsd.size = (cls == RC_FP) ? 16 : 8; - fsd.align = fsd.size; - *out = T->frame_slot(T, &fsd); -} - -static FrameSlot api_take_spill_slot(CfreeCg *g, u8 cls) { - if (cls < 3 && g->slot_pools[cls].n > 0) { - return g->slot_pools[cls].free[--g->slot_pools[cls].n]; - } - FrameSlot s; - api_take_spill_slot_alloc(g, cls, &s); - return s; -} - -static void api_return_spill_slot(CfreeCg *g, FrameSlot s, u8 cls) { - Heap *h; - if (s == FRAME_SLOT_NONE) - return; - if (cls >= 3) - return; - h = g->c->ctx->heap; - if (g->slot_pools[cls].n >= g->slot_pools[cls].cap) { - u32 new_cap = g->slot_pools[cls].cap ? g->slot_pools[cls].cap * 2 : 8; - FrameSlot *nb = (FrameSlot *)h->alloc(h, sizeof(FrameSlot) * new_cap, - _Alignof(FrameSlot)); - if (g->slot_pools[cls].free) { - memcpy(nb, g->slot_pools[cls].free, - sizeof(FrameSlot) * g->slot_pools[cls].n); - h->free(h, g->slot_pools[cls].free, - sizeof(FrameSlot) * g->slot_pools[cls].cap); - } - g->slot_pools[cls].free = nb; - g->slot_pools[cls].cap = new_cap; - } - g->slot_pools[cls].free[g->slot_pools[cls].n++] = s; -} - -/* ---- register allocation / spill ---- */ - -static ApiSValue *api_pick_victim(CfreeCg *g, u8 cls) { - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue *sv = &g->stack[i]; - if (sv->res != RES_REG || sv->pinned) - continue; - if (api_class_of_sv(sv) != cls) - continue; - return sv; - } - return NULL; -} - -static MemAccess api_mem_for_spill(CfreeCg *g, const ApiSValue *sv); -static u8 api_type_class(CfreeCgTypeId ty); - -static void api_regalloc_begin(CfreeCg *g) { - CGTarget *T = g->target; - if (T->virtual_regs) { - cg_simple_regalloc_init_virtual(&g->regalloc); - return; - } - cg_simple_regalloc_init(&g->regalloc); - for (u32 c = 0; c < 3u; ++c) { - const Reg *regs = NULL; - u32 nregs = 0; - if (T->get_allocable_regs) - T->get_allocable_regs(T, (RegClass)c, &regs, &nregs); - if (regs && nregs) - cg_simple_regalloc_set_ordered(&g->regalloc, (RegClass)c, regs, nregs); - } -} - -static void api_regalloc_finish(CfreeCg *g) { - if (cg_simple_regalloc_is_virtual(&g->regalloc)) - return; - if (!g->target->reserve_hard_regs) - return; - for (u32 c = 0; c < 3u; ++c) { - Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; - u32 nused = cg_simple_regalloc_used_regs(&g->regalloc, (RegClass)c, used, - CG_SIMPLE_REGALLOC_MAX_REGS); - if (nused) - g->target->reserve_hard_regs(g->target, (RegClass)c, used, nused); - } -} - -static Reg api_alloc_reg(CfreeCg *g, u8 cls) { - Reg r = cg_simple_regalloc_alloc(&g->regalloc, (RegClass)cls); - if (r == (Reg)REG_NONE && cg_simple_regalloc_is_virtual(&g->regalloc)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: virtual regalloc exhausted"); - } - return r; -} - -static void api_free_reg(CfreeCg *g, Reg r, u8 cls) { - int rc; - if (r == (Reg)REG_NONE) - return; - rc = cg_simple_regalloc_free(&g->regalloc, (RegClass)cls, r); - if (rc == 1) - return; - if (rc == -1) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - reg %u already free in class %u", - (unsigned)r, (unsigned)cls); - } - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - reg %u not in class %u pool", (unsigned)r, - (unsigned)cls); -} - -static int api_spill_avs_victim(CfreeCg *g, u8 cls) { - CGTarget *T = g->target; - if (!g->avs_in_flight) - return 0; - for (u32 i = 0; i < g->avs_in_flight_n; ++i) { - CGABIValue *av = &g->avs_in_flight[i]; - if (av->storage.kind != OPK_REG) - continue; - if (av->storage.cls != cls) - continue; - FrameSlot slot = api_take_spill_slot(g, cls); - ApiSValue tmp = api_make_sv(av->storage, av->type); - T->spill_reg(T, av->storage, slot, api_mem_for_spill(g, &tmp)); - api_free_reg(g, av->storage.v.reg, cls); - Operand local = api_op_local(slot, av->type); - local.cls = cls; - av->storage = local; - return 1; - } - return 0; -} - -static MemAccess api_mem_for_lvalue(CfreeCg *g, const Operand *lv, - CfreeCgTypeId ty) { - MemAccess m; - memset(&m, 0, sizeof m); - m.type = ty; - m.size = ty ? abi_cg_sizeof(g->c->abi, ty) : 0; - m.align = ty ? abi_cg_alignof(g->c->abi, ty) : 0; - m.flags = MF_NONE; - if (lv->kind == OPK_LOCAL) { - m.alias.kind = (u8)ALIAS_LOCAL; - m.alias.v.local_id = (i32)lv->v.frame_slot; - } else if (lv->kind == OPK_GLOBAL) { - m.alias.kind = (u8)ALIAS_GLOBAL; - } else { - m.alias.kind = (u8)ALIAS_UNKNOWN; - } - return m; -} - -static MemAccess api_mem_from_access(CfreeCg *g, const Operand *lv, - CfreeCgMemAccess access) { - CfreeCgTypeId ty = resolve_type(g->c, access.type); - MemAccess m = api_mem_for_lvalue(g, lv, ty); - if (access.align) - m.align = access.align; - m.addr_space = (u16)access.address_space; - if (access.flags & CFREE_CG_MEM_VOLATILE) - m.flags |= MF_VOLATILE; - if (!access.align || (ty && access.align < abi_cg_alignof(g->c->abi, ty))) { - m.flags |= MF_UNALIGNED; - } - return m; -} - -static CfreeCgTypeId api_mem_access_type(CfreeCg *g, CfreeCgMemAccess access, - CfreeCgTypeId fallback, - const char *who) { - CfreeCgTypeId ty = resolve_type(g->c, access.type); - if (!ty) - ty = resolve_type(g->c, fallback); - if (!ty) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: %s has no value type", who); - } - return ty; -} - -static u32 api_mem_type_size(CfreeCg *g, CfreeCgTypeId ty, const char *who) { - ty = resolve_type(g->c, ty); - if (!ty) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: %s has invalid type", who); - } - if (cg_type_is_void(g->c, ty)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: %s uses void type", who); - } - return abi_cg_sizeof(g->c->abi, ty); -} - -static void api_require_scalar_mem_type(CfreeCg *g, const char *who, - CfreeCgTypeId ty) { - if (cg_type_is_aggregate(g->c, ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: %s cannot use aggregate value type (size %u); " - "copy fields or use byte memory operations", - who, (unsigned)api_mem_type_size(g, ty, who)); - } - (void)api_mem_type_size(g, ty, who); -} - -static void api_require_pointer_value(CfreeCg *g, const char *who, - CfreeCgTypeId ty) { - if (!cg_type_pointee(g->c, ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: %s operand must be a pointer", who); - } -} - -static void api_validate_memory_value(CfreeCg *g, const char *who, - CfreeCgTypeId access_ty, - CfreeCgTypeId value_ty) { - u32 access_size; - u32 value_size; - access_ty = resolve_type(g->c, access_ty); - value_ty = resolve_type(g->c, value_ty); - api_require_scalar_mem_type(g, who, access_ty); - if (!value_ty) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: %s value has no type", who); - } - if (cg_type_is_aggregate(g->c, value_ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: %s value is aggregate (size %u); copy fields or " - "use byte memory operations", - who, (unsigned)api_mem_type_size(g, value_ty, who)); - } - access_size = api_mem_type_size(g, access_ty, who); - value_size = api_mem_type_size(g, value_ty, who); - if (access_size != value_size || - api_type_class(access_ty) != api_type_class(value_ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: %s value type/size mismatch: access size %u, " - "value size %u", - who, (unsigned)access_size, (unsigned)value_size); - } -} - -static MemAccess api_mem_for_spill(CfreeCg *g, const ApiSValue *sv) { - CfreeCgTypeId ty = api_owned_reg_type(g, sv); - MemAccess m; - memset(&m, 0, sizeof m); - m.type = ty; - m.size = ty ? abi_cg_sizeof(g->c->abi, ty) : 8; - m.align = ty ? abi_cg_alignof(g->c->abi, ty) : 8; - m.alias.kind = (u8)ALIAS_UNKNOWN; - return m; -} - -static void api_release_operand_reg(CfreeCg *g, Operand op) { - if (op.kind == OPK_REG) - api_free_reg(g, op.v.reg, op.cls); -} - -static int api_sv_owns_operand_reg(const ApiSValue *sv, const Operand *op) { - return sv->res == RES_REG && op->kind == OPK_REG && sv->op.kind == OPK_REG && - sv->op.v.reg == op->v.reg && sv->op.cls == op->cls; -} - -static void api_release_cmp(CfreeCg *g, ApiSValue *sv) { - if (sv->delayed.cmp.a_owned) - api_release_operand_reg(g, sv->delayed.cmp.a); - if (sv->delayed.cmp.b_owned && - (sv->delayed.cmp.b.kind != OPK_REG || sv->delayed.cmp.a.kind != OPK_REG || - sv->delayed.cmp.b.v.reg != sv->delayed.cmp.a.v.reg || - sv->delayed.cmp.b.cls != sv->delayed.cmp.a.cls || - !sv->delayed.cmp.a_owned)) { - api_release_operand_reg(g, sv->delayed.cmp.b); - } - memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); - memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); - sv->delayed.cmp.a_owned = 0; - sv->delayed.cmp.b_owned = 0; - sv->kind = SV_OPERAND; -} - -static void api_release_arith(CfreeCg *g, ApiSValue *sv) { - if (sv->delayed.arith.a_owned) - api_release_operand_reg(g, sv->delayed.arith.a); - if (sv->delayed.arith.b_owned && - (sv->delayed.arith.b.kind != OPK_REG || - sv->delayed.arith.a.kind != OPK_REG || - sv->delayed.arith.b.v.reg != sv->delayed.arith.a.v.reg || - sv->delayed.arith.b.cls != sv->delayed.arith.a.cls || - !sv->delayed.arith.a_owned)) { - api_release_operand_reg(g, sv->delayed.arith.b); - } - memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); - memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); - sv->delayed.arith.a_owned = 0; - sv->delayed.arith.b_owned = 0; - sv->kind = SV_OPERAND; -} - -static void api_materialize_cmp_to(CfreeCg *g, ApiSValue *sv, Operand dst) { - g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, - sv->delayed.cmp.b); - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - (sv->delayed.cmp.a.v.reg != dst.v.reg || - sv->delayed.cmp.a.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.cmp.a); - } - if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - (sv->delayed.cmp.b.v.reg != dst.v.reg || - sv->delayed.cmp.b.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.cmp.b); - } - memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); - memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); - sv->delayed.cmp.a_owned = 0; - sv->delayed.cmp.b_owned = 0; - sv->kind = SV_OPERAND; - sv->op = dst; - sv->type = dst.type; - sv->res = RES_REG; - sv->lvalue = 0; -} - -static void api_materialize_arith_to(CfreeCg *g, ApiSValue *sv, Operand dst) { - if (sv->delayed.arith.kind == API_DELAYED_UNOP) { - g->target->unop(g->target, sv->delayed.arith.un_op, dst, - sv->delayed.arith.a); - } else { - g->target->binop(g->target, sv->delayed.arith.bin_op, dst, - sv->delayed.arith.a, - sv->delayed.arith.b); - } - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - (sv->delayed.arith.a.v.reg != dst.v.reg || - sv->delayed.arith.a.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.arith.a); - } - if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_REG && - (sv->delayed.arith.b.v.reg != dst.v.reg || - sv->delayed.arith.b.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.arith.b); - } - memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); - memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); - sv->delayed.arith.a_owned = 0; - sv->delayed.arith.b_owned = 0; - sv->kind = SV_OPERAND; - sv->op = dst; - sv->type = dst.type; - sv->res = RES_REG; - sv->lvalue = 0; -} - -static int api_arith_rhs_reusable(const ApiSValue *sv) { - if (sv->delayed.arith.kind == API_DELAYED_UNOP) - return 0; - switch (sv->delayed.arith.bin_op) { - case BO_IADD: - case BO_IMUL: - case BO_AND: - case BO_OR: - case BO_XOR: - return 1; - default: - return 0; - } -} - -static int api_materialize_cmp_victim(CfreeCg *g, u8 cls) { - if (cls != RC_INT) - return 0; - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue *sv = &g->stack[i]; - Operand dst; - if (sv->kind != SV_CMP || sv->pinned) - continue; - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - sv->delayed.cmp.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.a.v.reg, api_sv_type(sv)); - } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - sv->delayed.cmp.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.b.v.reg, api_sv_type(sv)); - } else { - continue; - } - api_materialize_cmp_to(g, sv, dst); - return 1; - } - return 0; -} - -static int api_materialize_arith_victim(CfreeCg *g, u8 cls) { - if (cls != RC_INT) - return 0; - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue *sv = &g->stack[i]; - Operand dst; - if (sv->kind != SV_ARITH || sv->pinned) - continue; - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - sv->delayed.arith.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.a.v.reg, api_sv_type(sv)); - } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && - sv->delayed.arith.b.kind == OPK_REG && - sv->delayed.arith.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.b.v.reg, api_sv_type(sv)); - } else { - continue; - } - api_materialize_arith_to(g, sv, dst); - return 1; - } - return 0; -} - -static Reg api_alloc_reg_or_spill(CfreeCg *g, u8 cls, CfreeCgTypeId ty) { - CGTarget *T = g->target; - Reg r; - (void)ty; - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) - return r; - - ApiSValue *victim = api_pick_victim(g, cls); - if (!victim && api_materialize_cmp_victim(g, cls)) { - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) - return r; - victim = api_pick_victim(g, cls); - } - if (!victim && api_materialize_arith_victim(g, cls)) { - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) - return r; - victim = api_pick_victim(g, cls); - } - if (victim) { - FrameSlot slot = api_take_spill_slot(g, cls); - CfreeCgTypeId rty = api_owned_reg_type(g, victim); - Operand victim_reg = api_op_reg((Reg)api_reg_of_sv(victim), rty); - T->spill_reg(T, victim_reg, slot, api_mem_for_spill(g, victim)); - api_free_reg(g, victim_reg.v.reg, cls); - victim->spill_slot = slot; - victim->res = RES_SPILLED; - api_set_owned_reg(victim, (Reg)REG_NONE); - } else if (!api_spill_avs_victim(g, cls)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - no spillable victim (class %u)", - (unsigned)cls); - } - - r = api_alloc_reg(g, cls); - if (r == (Reg)REG_NONE) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - class %u still empty after spill", - (unsigned)cls); - } - return r; -} - -static void api_ensure_reg(CfreeCg *g, ApiSValue *sv) { - if (sv->kind == SV_CMP) { - CfreeCgTypeId ty = api_sv_type(sv); - Operand dst; - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - sv->delayed.cmp.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty); - } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - sv->delayed.cmp.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty); - } else { - Reg r = - api_alloc_reg_or_spill(g, RC_INT, - ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - dst = api_op_reg(r, ty); - } - api_materialize_cmp_to(g, sv, dst); - return; - } - if (sv->kind == SV_ARITH) { - CfreeCgTypeId ty = api_sv_type(sv); - Operand dst; - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - sv->delayed.arith.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.a.v.reg, ty); - } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && - sv->delayed.arith.b.kind == OPK_REG && - sv->delayed.arith.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.b.v.reg, ty); - } else { - Reg r = - api_alloc_reg_or_spill(g, RC_INT, - ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - dst = api_op_reg(r, ty); - } - api_materialize_arith_to(g, sv, dst); - return; - } - if (sv->res != RES_SPILLED) - return; - CGTarget *T = g->target; - u8 cls = api_class_of_sv(sv); - CfreeCgTypeId ty = api_owned_reg_type(g, sv); - Reg r = api_alloc_reg_or_spill(g, cls, - ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - T->reload_reg(T, api_op_reg(r, ty), sv->spill_slot, api_mem_for_spill(g, sv)); - api_return_spill_slot(g, sv->spill_slot, cls); - sv->spill_slot = FRAME_SLOT_NONE; - if (sv->op.kind == OPK_INDIRECT) { - sv->op.v.ind.base = r; - } else { - sv->op = api_op_reg(r, api_sv_type(sv)); - } - sv->res = RES_REG; -} - -static Operand api_force_reg(CfreeCg *g, ApiSValue *v, CfreeCgTypeId ty) { - CGTarget *T = g->target; - ty = api_unalias_type(g->c, ty); - api_ensure_reg(g, v); - if (v->op.kind == OPK_REG) { - if (ty) { - v->op.type = ty; - v->type = ty; - } - return v->op; - } - Reg r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - Operand dst = api_op_reg(r, ty); - if (v->op.kind == OPK_IMM) { - T->load_imm(T, dst, v->op.v.imm); - } else if (api_is_lvalue_sv(v)) { - T->load(T, dst, v->op, api_mem_for_lvalue(g, &v->op, ty)); - if (v->op.kind == OPK_INDIRECT) { - api_free_reg(g, v->op.v.ind.base, RC_INT); - } - } else if (v->op.kind == OPK_GLOBAL) { - T->addr_of(T, dst, v->op); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: cannot force operand to register"); - } - v->op = dst; - v->res = RES_REG; - return dst; -} - -static Operand api_force_reg_unless_imm(CfreeCg *g, ApiSValue *v, - CfreeCgTypeId ty) { - if (api_sv_op_is(v, OPK_IMM)) - return v->op; - return api_force_reg(g, v, ty); -} - -static void api_release(CfreeCg *g, ApiSValue *sv) { - if (sv->kind == SV_CMP) { - api_release_cmp(g, sv); - } else if (sv->kind == SV_ARITH) { - api_release_arith(g, sv); - } else if (sv->res == RES_REG) { - api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); - } else if (sv->res == RES_SPILLED) { - api_return_spill_slot(g, sv->spill_slot, api_class_of_sv(sv)); - sv->spill_slot = FRAME_SLOT_NONE; - } - sv->res = RES_INHERENT; -} - -static void api_release_arg_storage(CfreeCg *g, Operand *storage) { - if (storage->kind == OPK_REG) { - api_free_reg(g, storage->v.reg, storage->cls); - } else if (storage->kind == OPK_LOCAL && storage->cls < 3) { - CfreeCgTypeId ty = storage->type; - if (api_arg_storage_must_be_addr(g->c, ty)) - return; - api_return_spill_slot(g, storage->v.frame_slot, storage->cls); - } else if (storage->kind == OPK_INDIRECT) { - api_free_reg(g, storage->v.ind.base, RC_INT); - } -} - -/* ---- BinOp / UnOp / CmpOp mapping ---- */ - -static BinOp api_map_int_binop(CfreeCgIntBinOp op) { - switch (op) { - case CFREE_CG_INT_ADD: - return BO_IADD; - case CFREE_CG_INT_SUB: - return BO_ISUB; - case CFREE_CG_INT_MUL: - return BO_IMUL; - case CFREE_CG_INT_SDIV: - return BO_SDIV; - case CFREE_CG_INT_UDIV: - return BO_UDIV; - case CFREE_CG_INT_SREM: - return BO_SREM; - case CFREE_CG_INT_UREM: - return BO_UREM; - case CFREE_CG_INT_AND: - return BO_AND; - case CFREE_CG_INT_OR: - return BO_OR; - case CFREE_CG_INT_XOR: - return BO_XOR; - case CFREE_CG_INT_SHL: - return BO_SHL; - case CFREE_CG_INT_LSHR: - return BO_SHR_U; - case CFREE_CG_INT_ASHR: - return BO_SHR_S; - } - return BO_IADD; -} - -static BinOp api_map_fp_binop(CfreeCgFpBinOp op) { - switch (op) { - case CFREE_CG_FP_ADD: - return BO_FADD; - case CFREE_CG_FP_SUB: - return BO_FSUB; - case CFREE_CG_FP_MUL: - return BO_FMUL; - case CFREE_CG_FP_DIV: - return BO_FDIV; - case CFREE_CG_FP_REM: - return BO_FDIV; - } - return BO_FADD; -} - -static UnOp api_map_int_unop(CfreeCgIntUnOp op) { - switch (op) { - case CFREE_CG_INT_NEG: - return UO_NEG; - case CFREE_CG_INT_NOT: - return UO_NOT; - case CFREE_CG_INT_BNOT: - return UO_BNOT; - } - return UO_NEG; -} - -static CmpOp api_map_int_cmp(CfreeCgIntCmpOp op) { - switch (op) { - case CFREE_CG_INT_EQ: - return CMP_EQ; - case CFREE_CG_INT_NE: - return CMP_NE; - case CFREE_CG_INT_LT_S: - return CMP_LT_S; - case CFREE_CG_INT_LE_S: - return CMP_LE_S; - case CFREE_CG_INT_GT_S: - return CMP_GT_S; - case CFREE_CG_INT_GE_S: - return CMP_GE_S; - case CFREE_CG_INT_LT_U: - return CMP_LT_U; - case CFREE_CG_INT_LE_U: - return CMP_LE_U; - case CFREE_CG_INT_GT_U: - return CMP_GT_U; - case CFREE_CG_INT_GE_U: - return CMP_GE_U; - } - return CMP_EQ; -} - -static CmpOp api_map_fp_cmp(CfreeCgFpCmpOp op) { - switch (op) { - case CFREE_CG_FP_OEQ: - case CFREE_CG_FP_UEQ: - return CMP_EQ; - case CFREE_CG_FP_ONE: - case CFREE_CG_FP_UNE: - return CMP_NE; - case CFREE_CG_FP_OLT: - case CFREE_CG_FP_ULT: - return CMP_LT_F; - case CFREE_CG_FP_OLE: - case CFREE_CG_FP_ULE: - return CMP_LE_F; - case CFREE_CG_FP_OGT: - case CFREE_CG_FP_UGT: - return CMP_GT_F; - case CFREE_CG_FP_OGE: - case CFREE_CG_FP_UGE: - return CMP_GE_F; - } - return CMP_EQ; -} - -static CmpOp api_invert_cmp(CmpOp op) { - switch (op) { - case CMP_EQ: - return CMP_NE; - case CMP_NE: - return CMP_EQ; - case CMP_LT_S: - return CMP_GE_S; - case CMP_LE_S: - return CMP_GT_S; - case CMP_GT_S: - return CMP_LE_S; - case CMP_GE_S: - return CMP_LT_S; - case CMP_LT_U: - return CMP_GE_U; - case CMP_LE_U: - return CMP_GT_U; - case CMP_GT_U: - return CMP_LE_U; - case CMP_GE_U: - return CMP_LT_U; - case CMP_LT_F: - return CMP_GE_F; - case CMP_LE_F: - return CMP_GT_F; - case CMP_GT_F: - return CMP_LE_F; - case CMP_GE_F: - return CMP_LT_F; - } - return CMP_EQ; -} - -static AtomicOp api_map_atomic_op(CfreeCgAtomicOp op) { - switch (op) { - case CFREE_CG_ATOMIC_XCHG: - return AO_XCHG; - case CFREE_CG_ATOMIC_ADD: - return AO_ADD; - case CFREE_CG_ATOMIC_SUB: - return AO_SUB; - case CFREE_CG_ATOMIC_AND: - return AO_AND; - case CFREE_CG_ATOMIC_OR: - return AO_OR; - case CFREE_CG_ATOMIC_XOR: - return AO_XOR; - case CFREE_CG_ATOMIC_NAND: - return AO_NAND; - } - return AO_XCHG; -} - -static MemOrder api_map_mem_order(CfreeCgMemOrder order) { - switch (order) { - case CFREE_CG_MO_RELAXED: - return MO_RELAXED; - case CFREE_CG_MO_CONSUME: - return MO_CONSUME; - case CFREE_CG_MO_ACQUIRE: - return MO_ACQUIRE; - case CFREE_CG_MO_RELEASE: - return MO_RELEASE; - case CFREE_CG_MO_ACQ_REL: - return MO_ACQ_REL; - case CFREE_CG_MO_SEQ_CST: - return MO_SEQ_CST; - } - return MO_RELAXED; -} - -static AsmDir api_map_asm_dir(uint8_t dir) { - switch ((CfreeCgAsmDir)dir) { - case CFREE_CG_ASM_IN: - return ASM_IN; - case CFREE_CG_ASM_OUT: - return ASM_OUT; - case CFREE_CG_ASM_INOUT: - return ASM_INOUT; - } - return ASM_IN; -} - -/* ---- immediate integer folding ---- */ - -static u32 api_int_like_width(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (!ty) - return 0; - if (ty->kind == CFREE_CG_TYPE_ALIAS) - return api_int_like_width(c, ty->alias.base); - if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) - return ty->integer.width; - if (ty->kind == CFREE_CG_TYPE_ENUM) - return (u32)(ty->size * 8u); - if (ty->kind == CFREE_CG_TYPE_PTR) - return (u32)(ty->size * 8u); - return 0; -} - -static int api_type_is_bool(Compiler *c, CfreeCgTypeId id) { - const CgType *ty = cg_type_get(c, id); - if (!ty) - return 0; - if (ty->kind == CFREE_CG_TYPE_ALIAS) - return api_type_is_bool(c, ty->alias.base); - return ty->kind == CFREE_CG_TYPE_BOOL; -} - -static u64 api_width_mask(u32 width) { - if (width >= 64) - return UINT64_MAX; - return (1ull << width) - 1ull; -} - -static u64 api_mask_width(u64 v, u32 width) { - return v & api_width_mask(width); -} - -static i64 api_sign_extend_width(u64 v, u32 width) { - v = api_mask_width(v, width); - if (width >= 64) - return (i64)v; - u64 sign = 1ull << (width - 1u); - return (i64)((v ^ sign) - sign); -} - -static int api_foldable_int_like_type(Compiler *c, CfreeCgTypeId ty, - u32 *width_out) { - u32 width = api_int_like_width(c, ty); - if (!width || width > 64) - return 0; - *width_out = width; - return 1; -} - -static int api_foldable_int_type(Compiler *c, CfreeCgTypeId ty, - u32 *width_out) { - if (!cg_type_is_int(c, ty)) - return 0; - return api_foldable_int_like_type(c, ty, width_out); -} - -static i64 api_fold_result(Compiler *c, CfreeCgTypeId ty, u64 v, u32 width) { - v = api_mask_width(v, width); - if (api_type_is_bool(c, ty)) - v = v != 0; - return (i64)v; -} - -static int api_try_fold_int_binop(CfreeCg *g, BinOp op, CfreeCgTypeId ty, - i64 a, i64 b, i64 *out) { - u32 width; - u64 ua, ub, r; - if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) - return 0; - ua = api_mask_width((u64)a, width); - ub = api_mask_width((u64)b, width); - r = 0; - switch (op) { - case BO_IADD: - r = ua + ub; - break; - case BO_ISUB: - r = ua - ub; - break; - case BO_IMUL: - r = ua * ub; - break; - case BO_AND: - r = ua & ub; - break; - case BO_OR: - r = ua | ub; - break; - case BO_XOR: - r = ua ^ ub; - break; - case BO_SHL: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - r = ua << sh; - break; - } - case BO_SHR_U: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - r = ua >> sh; - break; - } - case BO_SHR_S: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - if (!sh) { - r = ua; - } else { - u64 sign = 1ull << (width - 1u); - r = ua >> sh; - if (ua & sign) - r |= api_width_mask(width) << (width - sh); - } - break; - } - default: - return 0; - } - *out = api_fold_result(g->c, ty, r, width); - return 1; -} - -static int api_try_fold_int_unop(CfreeCg *g, UnOp op, CfreeCgTypeId ty, i64 a, - i64 *out) { - u32 width; - u64 ua, r; - if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) - return 0; - ua = api_mask_width((u64)a, width); - switch (op) { - case UO_NEG: - r = 0u - ua; - break; - case UO_NOT: - r = ua == 0; - break; - case UO_BNOT: - r = ~ua; - break; - default: - return 0; - } - *out = api_fold_result(g->c, ty, r, width); - return 1; -} - -static int api_try_fold_int_cmp(CfreeCg *g, CmpOp op, CfreeCgTypeId ty, i64 a, - i64 b, i64 *out) { - u32 width; - u64 ua, ub; - i64 sa, sb; - int r; - if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) - return 0; - ua = api_mask_width((u64)a, width); - ub = api_mask_width((u64)b, width); - sa = api_sign_extend_width(ua, width); - sb = api_sign_extend_width(ub, width); - switch (op) { - case CMP_EQ: - r = ua == ub; - break; - case CMP_NE: - r = ua != ub; - break; - case CMP_LT_S: - r = sa < sb; - break; - case CMP_LE_S: - r = sa <= sb; - break; - case CMP_GT_S: - r = sa > sb; - break; - case CMP_GE_S: - r = sa >= sb; - break; - case CMP_LT_U: - r = ua < ub; - break; - case CMP_LE_U: - r = ua <= ub; - break; - case CMP_GT_U: - r = ua > ub; - break; - case CMP_GE_U: - r = ua >= ub; - break; - default: - return 0; - } - *out = r ? 1 : 0; - return 1; -} - -static int api_source_flags_addr_taken(u32 flags); -static ApiSourceLocal *api_local_from_handle(CfreeCg *g, CfreeCgLocal local); - -static void api_local_const_clear(ApiSourceLocal *rec) { - if (!rec) - return; - rec->const_valid = 0; - rec->const_value = 0; -} - -static void api_local_const_clear_all(CfreeCg *g) { - if (!g) - return; - for (u32 i = 0; i < g->nlocals; ++i) - api_local_const_clear(&g->locals[i]); -} - -static void api_local_const_memory_boundary(CfreeCg *g) { - api_local_const_clear_all(g); -} - -static void api_local_const_control_boundary(CfreeCg *g) { - api_local_const_clear_all(g); -} - -static void api_local_const_address_taken(CfreeCg *g, CfreeCgLocal local) { - api_local_const_clear_all(g); - api_local_const_clear(api_local_from_handle(g, local)); -} - -static Operand api_lvalue_addr(CfreeCg *g, ApiSValue *v, CfreeCgTypeId pty) { - CGTarget *T; - ApiSourceLocal *rec; - Reg r; - Operand dst; - api_local_const_address_taken(g, v->source_local); - api_ensure_reg(g, v); - if (!api_is_lvalue_sv(v)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: addr operand is not an lvalue"); - } - T = g->target; - r = api_alloc_reg_or_spill(g, RC_INT, pty); - dst = api_op_reg(r, pty); - rec = v->source_local != CFREE_CG_LOCAL_NONE - ? api_local_from_handle(g, v->source_local) - : NULL; - if (rec && rec->storage.kind == CG_LOCAL_STORAGE_REG && T->local_addr) - T->local_addr(T, dst, &rec->desc, rec->storage); - else - T->addr_of(T, dst, v->op); - return dst; -} - -static int api_local_const_can_track(CfreeCg *g, const ApiSourceLocal *rec, - CfreeCgMemAccess access) { - u32 width; - CfreeCgTypeId ty; - u64 access_size; - u64 local_size; - if (!g || !rec) - return 0; - if (rec->kind != API_SOURCE_LOCAL_AUTO) - return 0; - if (api_source_flags_addr_taken(rec->attrs.flags)) - return 0; - if (access.flags & CFREE_CG_MEM_VOLATILE) - return 0; - ty = resolve_type(g->c, access.type); - if (!ty) - ty = rec->type; - if (ty != rec->type) - return 0; - access_size = abi_cg_sizeof(g->c->abi, ty); - local_size = abi_cg_sizeof(g->c->abi, rec->type); - if (access_size != local_size) - return 0; - return api_foldable_int_like_type(g->c, ty, &width); -} - -static void api_local_const_store(CfreeCg *g, CfreeCgLocal local, - CfreeCgMemAccess access, i64 value) { - ApiSourceLocal *rec = api_local_from_handle(g, local); - CfreeCgTypeId ty; - u32 width; - if (!api_local_const_can_track(g, rec, access)) { - api_local_const_clear(rec); - return; - } - ty = resolve_type(g->c, access.type); - if (!ty) - ty = rec->type; - if (!api_foldable_int_like_type(g->c, ty, &width)) { - api_local_const_clear(rec); - return; - } - rec->const_value = api_fold_result(g->c, ty, (u64)value, width); - rec->const_valid = 1; -} - -static int api_local_const_load(CfreeCg *g, CfreeCgLocal local, - CfreeCgMemAccess access, Operand *out) { - ApiSourceLocal *rec = api_local_from_handle(g, local); - CfreeCgTypeId ty; - u32 width; - if (!out || !api_local_const_can_track(g, rec, access)) - return 0; - if (!rec->const_valid) - return 0; - ty = resolve_type(g->c, access.type); - if (!ty) - ty = rec->type; - if (!api_foldable_int_like_type(g->c, ty, &width)) - return 0; - *out = - api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); - return 1; -} - -static int api_can_delay_int_arith(CfreeCg *g, CfreeCgTypeId ty, u32 flags) { - u32 width; - return g && !flags && api_foldable_int_type(g->c, ty, &width); -} - -static int api_op_is_int_identity(CfreeCg *g, BinOp op, CfreeCgTypeId ty, - i64 imm) { - u32 width; - u64 v; - if (!api_foldable_int_type(g->c, ty, &width)) - return 0; - v = api_mask_width((u64)imm, width); - switch (op) { - case BO_IADD: - case BO_ISUB: - case BO_OR: - case BO_XOR: - return v == 0; - case BO_AND: - return v == api_width_mask(width); - default: - return 0; - } -} - -static int api_try_collapse_binop_identity(CfreeCg *g, BinOp op, - CfreeCgTypeId ty, ApiSValue *a, - ApiSValue *b, ApiSValue *out) { - if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && - a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { - *out = api_make_sv_with_reg_ownership(a->op, ty, - api_sv_owns_operand_reg(a, &a->op)); - a->res = RES_INHERENT; - return 1; - } - if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && - b->op.kind != OPK_IMM && - (op == BO_IADD || op == BO_OR || op == BO_XOR || op == BO_AND) && - api_op_is_int_identity(g, op, ty, a->op.v.imm)) { - *out = api_make_sv_with_reg_ownership(b->op, ty, - api_sv_owns_operand_reg(b, &b->op)); - b->res = RES_INHERENT; - return 1; - } - return 0; -} - -static int api_try_fold_arith_chain(CfreeCg *g, BinOp op, CfreeCgTypeId ty, - ApiSValue *a, ApiSValue *b, - ApiSValue *out) { - i64 folded; - BinOp result_op; - if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || - a->delayed.arith.a.kind != OPK_REG || - a->delayed.arith.b.kind != OPK_IMM || - b->kind != SV_OPERAND || b->op.kind != OPK_IMM) { - return 0; - } - result_op = a->delayed.arith.bin_op; - switch (a->delayed.arith.bin_op) { - case BO_IADD: - if (op == BO_IADD) { - if (!api_try_fold_int_binop(g, BO_IADD, ty, - a->delayed.arith.b.v.imm, b->op.v.imm, - &folded)) - return 0; - result_op = BO_IADD; - } else if (op == BO_ISUB) { - if (!api_try_fold_int_binop(g, BO_ISUB, ty, - a->delayed.arith.b.v.imm, b->op.v.imm, - &folded)) - return 0; - result_op = BO_IADD; - } else { - return 0; - } - break; - case BO_ISUB: - if (op == BO_IADD) { - if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, - a->delayed.arith.b.v.imm, - &folded)) - return 0; - result_op = BO_IADD; - } else if (op == BO_ISUB) { - if (!api_try_fold_int_binop(g, BO_IADD, ty, - a->delayed.arith.b.v.imm, b->op.v.imm, - &folded)) - return 0; - result_op = BO_ISUB; - } else { - return 0; - } - break; - case BO_XOR: - if (op != BO_XOR || !api_try_fold_int_binop(g, BO_XOR, ty, - a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_XOR; - break; - case BO_AND: - if (op != BO_AND || !api_try_fold_int_binop(g, BO_AND, ty, - a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_AND; - break; - case BO_OR: - if (op != BO_OR || !api_try_fold_int_binop(g, BO_OR, ty, - a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_OR; - break; - default: - return 0; - } - if (api_op_is_int_identity(g, result_op, ty, folded)) { - *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, - a->delayed.arith.a_owned); - a->delayed.arith.a_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - return 1; - } - a->delayed.arith.bin_op = result_op; - a->delayed.arith.b.v.imm = folded; - *out = *a; - a->delayed.arith.a_owned = 0; - a->delayed.arith.b_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b); - return 1; -} - -static int api_try_fold_unary_chain(ApiSValue *a, UnOp op, CfreeCgTypeId ty, - ApiSValue *out) { - if (op != UO_BNOT || a->kind != SV_ARITH || - a->delayed.arith.kind != API_DELAYED_UNOP || - a->delayed.arith.un_op != UO_BNOT || - a->delayed.arith.a.kind != OPK_REG) { - return 0; - } - *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, - a->delayed.arith.a_owned); - a->delayed.arith.a_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - return 1; -} - -/* ---- C-symbol mangling ---- */ - -static SymBind api_map_bind(CfreeSymBind b) { - switch (b) { - case CFREE_SB_LOCAL: - return SB_LOCAL; - case CFREE_SB_GLOBAL: - return SB_GLOBAL; - case CFREE_SB_WEAK: - return SB_WEAK; - } - return SB_LOCAL; -} - -static SymVis api_map_vis(CfreeCgVisibility v) { - switch (v) { - case CFREE_CG_VIS_DEFAULT: - return SV_DEFAULT; - case CFREE_CG_VIS_HIDDEN: - return SV_HIDDEN; - case CFREE_CG_VIS_PROTECTED: - return SV_PROTECTED; - } - return SV_DEFAULT; -} - -static SymKind api_decl_sym_kind(CfreeCgDecl decl) { - if (decl.kind == CFREE_CG_DECL_FUNC) { - if (decl.as.func.flags & CFREE_CG_FUNC_IFUNC) - return SK_IFUNC; - return SK_FUNC; - } - if (decl.as.object.flags & CFREE_CG_OBJ_TLS) - return SK_TLS; - return SK_OBJ; -} - -static void api_remember_sym(CfreeCg *g, ObjSymId sym, CfreeCgTypeId ty, - CfreeCgDecl decl) { - Heap *h; - CfreeCgTypeId *nts; - CfreeCgDecl *nas; - u32 cap; - if (!g || sym == OBJ_SYM_NONE) - return; - if (sym < g->sym_cap) { - g->sym_types[sym] = ty; - g->sym_attrs[sym] = decl; - return; - } - h = g->c->ctx->heap; - cap = g->sym_cap ? g->sym_cap : 16u; - while (cap <= sym) - cap *= 2u; - nts = - (CfreeCgTypeId *)h->alloc(h, sizeof(*nts) * cap, _Alignof(CfreeCgTypeId)); - nas = (CfreeCgDecl *)h->alloc(h, sizeof(*nas) * cap, _Alignof(CfreeCgDecl)); - if (!nts || !nas) { - if (nts) - h->free(h, nts, sizeof(*nts) * cap); - if (nas) - h->free(h, nas, sizeof(*nas) * cap); - return; - } - memset(nts, 0, sizeof(*nts) * cap); - memset(nas, 0, sizeof(*nas) * cap); - if (g->sym_types) { - memcpy(nts, g->sym_types, sizeof(*nts) * g->sym_cap); - h->free(h, g->sym_types, sizeof(*g->sym_types) * g->sym_cap); - } - if (g->sym_attrs) { - memcpy(nas, g->sym_attrs, sizeof(*nas) * g->sym_cap); - h->free(h, g->sym_attrs, sizeof(*g->sym_attrs) * g->sym_cap); - } - g->sym_types = nts; - g->sym_attrs = nas; - g->sym_cap = cap; - g->sym_types[sym] = ty; - g->sym_attrs[sym] = decl; -} - -static CfreeCgTypeId api_sym_type(CfreeCg *g, CfreeCgSym sym) { - if (!g || sym == CFREE_CG_SYM_NONE || sym >= g->sym_cap) { - return CFREE_CG_TYPE_NONE; - } - return g->sym_types[sym]; -} - -static CfreeCgDecl api_sym_attrs(CfreeCg *g, CfreeCgSym sym) { - CfreeCgDecl decl; - memset(&decl, 0, sizeof(decl)); - decl.kind = CFREE_CG_DECL_OBJECT; - decl.sym.bind = CFREE_SB_GLOBAL; - decl.sym.visibility = CFREE_CG_VIS_DEFAULT; - if (!g || sym == CFREE_CG_SYM_NONE || sym >= g->sym_cap) - return decl; - return g->sym_attrs[sym]; -} - -static int api_sym_is_tls(CfreeCg *g, CfreeCgSym sym) { - CfreeCgDecl decl = api_sym_attrs(g, sym); - return decl.kind == CFREE_CG_DECL_OBJECT && - (decl.as.object.flags & CFREE_CG_OBJ_TLS); -} - -static RelocKind api_data_reloc_kind(int pcrel, uint32_t width) { - if (pcrel) { - if (width == 4) - return R_PC32; - if (width == 8) - return R_PC64; - } else { - if (width == 4) - return R_ABS32; - if (width == 8) - return R_ABS64; - } - return R_NONE; -} - -/* ============================================================ - * Public API: CfreeCg lifecycle - * ============================================================ */ - -static SrcLoc api_no_loc(void) { - SrcLoc loc; - loc.file_id = 0; - loc.line = 0; - loc.col = 0; - return loc; -} - -CfreeStatus cfree_cg_new(CfreeCompiler *c, CfreeObjBuilder *out, - const CfreeCodeOptions *opts, CfreeCg **cg_out) { - Heap *h; - CfreeCg *g; - MCEmitter *mc; - CGTarget *target; - Debug *debug = NULL; - int opt_level = opts ? opts->opt_level : 0; - if (!cg_out) - return CFREE_INVALID; - *cg_out = NULL; - if (!c || !out) - return CFREE_INVALID; - if (opt_level < 0 || opt_level > 2) { - compiler_panic((Compiler *)c, api_no_loc(), - "CfreeCg: unsupported opt_level %d", opt_level); - } - h = (Heap *)c->ctx->heap; - mc = mc_new((Compiler *)c, (ObjBuilder *)out); - if (!mc) - return CFREE_NOMEM; - if (opts && opts->debug_info) { - debug = debug_new((Compiler *)c, (ObjBuilder *)out); - if (!debug) { - mc_free(mc); - return CFREE_NOMEM; - } - mc->debug = debug; - } - target = cgtarget_new((Compiler *)c, (ObjBuilder *)out, mc); - if (!target) { - if (debug) - debug_free(debug); - mc_free(mc); - return CFREE_UNSUPPORTED; - } - target->debug = debug; - if (opt_level > 0) { - target = opt_cgtarget_new((Compiler *)c, target, opt_level); - if (target) - target->debug = debug; - } - g = (CfreeCg *)h->alloc(h, sizeof(CfreeCg), _Alignof(CfreeCg)); - if (!g) { - if (debug) - debug_free(debug); - cgtarget_free(target); - mc_free(mc); - return CFREE_NOMEM; - } - memset(g, 0, sizeof *g); - g->c = (Compiler *)c; - g->obj = (ObjBuilder *)out; - g->target = target; - g->mc = mc; - g->debug = debug; - *cg_out = g; - return CFREE_OK; -} - -void cfree_cg_free(CfreeCg *g) { - Heap *h; - if (!g) - return; - cgtarget_finalize(g->target); - if (g->debug) { - debug_emit(g->debug); - debug_free(g->debug); - } - cgtarget_free(g->target); - mc_free(g->mc); - h = g->c->ctx->heap; - if (g->stack) - h->free(h, g->stack, sizeof(ApiSValue) * g->cap); - if (g->locals) { - h->free(h, g->locals, sizeof(*g->locals) * g->locals_cap); - } - if (g->sym_types) { - h->free(h, g->sym_types, sizeof(*g->sym_types) * g->sym_cap); - } - if (g->sym_attrs) { - h->free(h, g->sym_attrs, sizeof(*g->sym_attrs) * g->sym_cap); - } - for (u32 c = 0; c < 3; ++c) { - if (g->slot_pools[c].free) { - h->free(h, g->slot_pools[c].free, - sizeof(FrameSlot) * g->slot_pools[c].cap); - } - } - h->free(h, g, sizeof *g); -} - -/* ============================================================ - * Source location - * ============================================================ */ - -void cfree_cg_set_loc(CfreeCg *g, CfreeSrcLoc loc) { - if (!g) - return; - g->cur_loc = *(SrcLoc *)&loc; - if (g->debug) - debug_set_pending_loc(g->debug, *(SrcLoc *)&loc); - if (g->target->set_loc) - g->target->set_loc(g->target, *(SrcLoc *)&loc); -} - -/* ============================================================ - * Function lifecycle - * ============================================================ */ - -CfreeCgSym cfree_cg_decl(CfreeCg *g, CfreeCgDecl decl) { - Compiler *c; - ObjBuilder *ob; - ObjSymId sym; - CfreeCgTypeId ty; - if (!g || !decl.linkage_name) - return CFREE_CG_SYM_NONE; - c = g->c; - ob = g->obj; - ty = resolve_type(c, decl.type); - if (!ty) - return CFREE_CG_SYM_NONE; - sym = obj_symbol_find(ob, (Sym)decl.linkage_name); - if (sym == OBJ_SYM_NONE) { - sym = obj_symbol_ex(ob, (Sym)decl.linkage_name, api_map_bind(decl.sym.bind), - api_map_vis(decl.sym.visibility), - api_decl_sym_kind(decl), OBJ_SEC_NONE, 0, 0, 0); - } - if (decl.sym.flags) { - obj_symbol_set_flags(ob, sym, (u16)decl.sym.flags); - } - api_remember_sym(g, sym, ty, decl); - return (CfreeCgSym)sym; -} - -CfreeCgSym cfree_cg_alias(CfreeCg *g, CfreeCgAlias alias) { - ObjBuilder *ob; - ObjSymId sym; - const ObjSym *ts; - CfreeCgDecl decl_attrs; - if (!g || !alias.linkage_name || alias.target == CFREE_CG_SYM_NONE) { - return CFREE_CG_SYM_NONE; - } - ob = g->obj; - sym = obj_symbol_find(ob, (Sym)alias.linkage_name); - ts = obj_symbol_get(ob, (ObjSymId)alias.target); - if (!ts) - return CFREE_CG_SYM_NONE; - if (sym == OBJ_SYM_NONE) { - sym = - obj_symbol_ex(ob, (Sym)alias.linkage_name, api_map_bind(alias.sym.bind), - api_map_vis(alias.sym.visibility), (SymKind)ts->kind, - ts->section_id, ts->value, ts->size, ts->common_align); - } else if (ts->section_id != OBJ_SEC_NONE) { - obj_symbol_define(ob, sym, ts->section_id, ts->value, ts->size); - } - if (alias.sym.flags) - obj_symbol_set_flags(ob, sym, (u16)alias.sym.flags); - decl_attrs = api_sym_attrs(g, alias.target); - decl_attrs.sym = alias.sym; - api_remember_sym(g, sym, api_sym_type(g, alias.target), decl_attrs); - return (CfreeCgSym)sym; -} - -void cfree_cg_func_begin(CfreeCg *g, CfreeCgSym cg_sym) { - Compiler *c; - ObjBuilder *ob; - CGTarget *T; - ObjSymId sym; - ObjSecId text_sec; - CfreeCgTypeId fty; - const ABIFuncInfo *abi; - CfreeCgDecl attrs; - if (!g) - return; - c = g->c; - ob = g->obj; - T = g->target; - sym = (ObjSymId)cg_sym; - fty = api_sym_type(g, cg_sym); - if (!fty) - return; - attrs = api_sym_attrs(g, cg_sym); - abi = abi_cg_func_info(c->abi, fty); - - text_sec = obj_section(ob, pool_intern_cstr(c->global, ".text"), SEC_TEXT, - SF_EXEC | SF_ALLOC, 4); - - if (sym != OBJ_SYM_NONE) { - obj_symbol_define(ob, sym, text_sec, 0, 0); - } - - memset(&g->fn_desc, 0, sizeof g->fn_desc); - g->fn_desc.sym = sym; - g->fn_desc.text_section_id = text_sec; - g->fn_desc.group_id = OBJ_GROUP_NONE; - g->fn_desc.fn_type = fty; - g->fn_desc.abi = abi; - g->fn_desc.loc = g->cur_loc; - if (attrs.as.func.flags & CFREE_CG_FUNC_NORETURN) { - g->fn_desc.flags |= CGFD_NORETURN; - } - - g->fn_ret_type = cg_type_func_ret_id(c, fty); - g->fn_abi = abi; - g->nlocals = 0; - g->sp = 0; - for (u32 i = 0; i < 3; ++i) - g->slot_pools[i].n = 0; - g->avs_in_flight = NULL; - g->avs_in_flight_n = 0; - - if (g->debug) { - DebugTypeId dt = api_debug_type(g, fty); - if (dt != DEBUG_TYPE_NONE) - debug_func_begin(g->debug, sym, dt, g->cur_loc); - } - T->func_begin(T, &g->fn_desc); - api_regalloc_begin(g); -} - -void cfree_cg_func_end(CfreeCg *g) { - if (!g) - return; - api_regalloc_finish(g); - g->target->func_end(g->target); - if (g->debug) - debug_func_end(g->debug); - g->fn_abi = NULL; - g->fn_ret_type = CFREE_CG_TYPE_NONE; - g->nscopes = 0; - memset(g->scopes, 0, sizeof g->scopes); -} - -static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, - CfreeCgCallAttrs attrs); - -static FrameSlot api_f128_temp_slot(CfreeCg *g, CfreeCgTypeId ty) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty; - fsd.size = 16; - fsd.align = 16; - fsd.kind = FS_LOCAL; - fsd.flags = FSF_ADDR_TAKEN; - return g->target->frame_slot(g->target, &fsd); -} - -static u64 api_u64_from_target_bytes(CfreeCg *g, const u8 *bytes) { - u64 v = 0; - for (u32 i = 0; i < 8; ++i) { - u32 shift = g->c->target.big_endian ? (7u - i) * 8u : i * 8u; - v |= (u64)bytes[i] << shift; - } - return v; -} - -static void api_store_f128_bytes(CfreeCg *g, FrameSlot slot, - CfreeCgTypeId ty, const u8 bytes[16]) { - CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Reg ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand base = api_op_reg(ar, ptr_ty); - MemAccess ma; - memset(&ma, 0, sizeof ma); - ma.type = i64_ty; - ma.size = 8; - ma.align = 8; - g->target->addr_of(g->target, base, api_op_local(slot, ty)); - g->target->store(g->target, api_op_indirect(ar, 0, i64_ty), - api_op_imm((i64)api_u64_from_target_bytes(g, bytes), - i64_ty), - ma); - g->target->store(g->target, api_op_indirect(ar, 8, i64_ty), - api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), - i64_ty), - ma); - api_free_reg(g, ar, RC_INT); -} - -static void api_encode_binary128_from_double(CfreeCg *g, double value, - u8 out[16]) { - union { - double d; - u64 u; - } in; - u64 lo = 0; - u64 hi = 0; - u64 frac; - u32 sign; - u32 exp; - in.d = value; - sign = (u32)(in.u >> 63); - exp = (u32)((in.u >> 52) & 0x7ffu); - frac = in.u & 0x000fffffffffffffull; - if (sign) - hi |= 1ull << 63; - if (exp == 0x7ffu) { - hi |= (u64)0x7fffu << 48; - if (frac) { - lo |= (frac & 0xfu) << 60; - hi |= frac >> 4; - hi |= 1ull << 47; - } - } else if (exp != 0 || frac != 0) { - i32 e; - u64 sig; - if (exp == 0) { - e = -1022; - sig = frac; - while ((sig & (1ull << 52)) == 0) { - sig <<= 1; - --e; - } - frac = sig & 0x000fffffffffffffull; - } else { - e = (i32)exp - 1023; - } - hi |= (u64)(u32)(e + 16383) << 48; - lo |= (frac & 0xfu) << 60; - hi |= frac >> 4; - } - for (u32 i = 0; i < 16; ++i) { - if (g->c->target.big_endian) { - u64 lane = i < 8u ? hi : lo; - u32 shift = (7u - (i & 7u)) * 8u; - out[i] = (u8)(lane >> shift); - } else { - u64 lane = i < 8u ? lo : hi; - u32 shift = (i & 7u) * 8u; - out[i] = (u8)(lane >> shift); - } - } -} - -static ApiSValue api_make_f128_const(CfreeCg *g, double value, - CfreeCgTypeId ty) { - u8 bytes[16]; - FrameSlot slot; - api_encode_binary128_from_double(g, value, bytes); - slot = api_f128_temp_slot(g, ty); - api_store_f128_bytes(g, slot, ty, bytes); - return api_make_lv(api_op_local(slot, ty), ty); -} - -static ApiSValue api_wide16_materialize_lvalue(CfreeCg *g, ApiSValue *v, - CfreeCgTypeId ty) { - if (v->op.kind == OPK_LOCAL || v->op.kind == OPK_INDIRECT) { - v->type = ty; - v->op.type = ty; - v->lvalue = 1; - return *v; - } - if (v->op.kind == OPK_GLOBAL) { - FrameSlot slot = api_f128_temp_slot(g, ty); - Operand dst_lv = api_op_local(slot, ty); - Operand dst_addr; - Operand src_addr; - AggregateAccess agg; - ApiSValue tmp = api_make_lv(dst_lv, ty); - ApiSValue src = api_make_lv(v->op, ty); - dst_addr = api_lvalue_addr(g, &tmp, cg_type_ptr_to(g->c, ty)); - src_addr = api_lvalue_addr(g, &src, cg_type_ptr_to(g->c, ty)); - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = 16; - g->target->copy_bytes(g->target, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - return api_make_lv(dst_lv, ty); - } - if (v->op.kind == OPK_REG) { - FrameSlot slot = api_f128_temp_slot(g, ty); - Operand dst = api_op_local(slot, ty); - g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty)); - return api_make_lv(dst, ty); - } - if (v->op.kind == OPK_IMM) { - u8 bytes[16]; - u64 lo = (u64)v->op.v.imm; - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 idx = g->c->target.big_endian ? 15u - i : i; - bytes[idx] = (u8)(lo >> (i * 8u)); - } - FrameSlot slot = api_f128_temp_slot(g, ty); - api_store_f128_bytes(g, slot, ty, bytes); - return api_make_lv(api_op_local(slot, ty), ty); - } - compiler_panic(g->c, g->cur_loc, - "CfreeCg: 16-byte scalar value is not addressable (kind %u, op %u)", - (unsigned)v->kind, (unsigned)v->op.kind); - return *v; -} - -static CfreeCgSym api_runtime_helper(CfreeCg *g, const char *name, - CfreeCgTypeId ret, - const CfreeCgTypeId *params, - u32 nparams) { - CfreeCgFuncParam ps[3]; - CfreeCgFuncSig sig; - CfreeCgDecl decl; - if (nparams > 3) - return CFREE_CG_SYM_NONE; - memset(ps, 0, sizeof ps); - for (u32 i = 0; i < nparams; ++i) - ps[i].type = params[i]; - memset(&sig, 0, sizeof sig); - sig.ret = ret; - sig.params = ps; - sig.nparams = nparams; - sig.call_conv = CFREE_CG_CC_TARGET_C; - memset(&decl, 0, sizeof decl); - decl.kind = CFREE_CG_DECL_FUNC; - decl.linkage_name = pool_intern_cstr(g->c->global, name); - decl.display_name = decl.linkage_name; - decl.type = cfree_cg_type_func((CfreeCompiler *)g->c, sig); - decl.sym.bind = CFREE_SB_GLOBAL; - decl.sym.visibility = CFREE_CG_VIS_DEFAULT; - return cfree_cg_decl(g, decl); -} - -static void api_runtime_call_values(CfreeCg *g, const char *name, - CfreeCgTypeId ret, - const CfreeCgTypeId *params, - u32 nparams, ApiSValue *args) { - CfreeCgCallAttrs attrs; - CfreeCgSym sym = api_runtime_helper(g, name, ret, params, nparams); - memset(&attrs, 0, sizeof attrs); - for (u32 i = 0; i < nparams; ++i) - api_push(g, args[i]); - api_call_symbol_common(g, sym, nparams, attrs); -} - -/* ============================================================ - * Locals and params - * ============================================================ */ - -static int api_source_flags_addr_taken(u32 flags) { - return (flags & CFREE_CG_LOCAL_ADDRESS_TAKEN) != 0; -} - -static int api_local_requires_memory(CfreeCg *g, CfreeCgTypeId ty, - CfreeCgLocalAttrs attrs) { - if (api_source_flags_addr_taken(attrs.flags)) - return 1; - if (api_is_wide16_scalar_type(g->c, ty)) - return 1; - return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) || - cg_type_is_ptr(g->c, ty)); -} - -static CfreeCgLocal api_local_handle(u32 index) { - u32 raw = index + 1u; - if (!raw) - return CFREE_CG_LOCAL_NONE; - return raw; -} - -static int api_grow_locals(CfreeCg *g, u32 want) { - Heap *h = g->c->ctx->heap; - ApiSourceLocal *nb; - u32 cap; - if (g->locals_cap >= want) - return 1; - cap = g->locals_cap ? g->locals_cap : 16u; - while (cap < want) - cap *= 2u; - nb = (ApiSourceLocal *)h->alloc(h, sizeof(*nb) * cap, - _Alignof(ApiSourceLocal)); - if (!nb) - return 0; - memset(nb, 0, sizeof(*nb) * cap); - if (g->locals) { - memcpy(nb, g->locals, sizeof(*nb) * g->nlocals); - h->free(h, g->locals, sizeof(*g->locals) * g->locals_cap); - } - g->locals = nb; - g->locals_cap = cap; - return 1; -} - -static ApiSourceLocal *api_local_from_handle(CfreeCg *g, CfreeCgLocal local) { - u32 index; - if (local == CFREE_CG_LOCAL_NONE) - return NULL; - index = local - 1u; - if (index >= g->nlocals) { - return NULL; - } - return &g->locals[index]; -} - -static CGLocalStorage api_frame_local_storage(CfreeCg *g, - const CGLocalDesc *d) { - FrameSlotDesc fsd; - CGLocalStorage st; - memset(&fsd, 0, sizeof fsd); - fsd.type = d->type; - fsd.name = d->name; - fsd.loc = d->loc; - fsd.size = d->size; - fsd.align = d->align; - fsd.kind = FS_LOCAL; - if (d->flags & CG_LOCAL_ADDR_TAKEN) - fsd.flags |= FSF_ADDR_TAKEN; - st.kind = CG_LOCAL_STORAGE_FRAME; - st.v.frame_slot = g->target->frame_slot(g->target, &fsd); - return st; -} - -CfreeCgLocal cfree_cg_local(CfreeCg *g, CfreeCgTypeId type, - CfreeCgLocalAttrs attrs) { - CfreeCgTypeId ty; - CGLocalDesc desc; - CGLocalStorage storage; - ApiSourceLocal *rec; - CfreeCgLocal handle; - if (!g) - return CFREE_CG_LOCAL_NONE; - ty = resolve_type(g->c, type); - if (!ty) - return CFREE_CG_LOCAL_NONE; - handle = api_local_handle(g->nlocals); - if (handle == CFREE_CG_LOCAL_NONE || !api_grow_locals(g, g->nlocals + 1u)) - return CFREE_CG_LOCAL_NONE; - memset(&desc, 0, sizeof desc); - desc.type = ty; - desc.name = (Sym)attrs.name; - desc.loc = g->cur_loc; - desc.size = abi_cg_sizeof(g->c->abi, type); - desc.align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type); - if (api_source_flags_addr_taken(attrs.flags)) - desc.flags |= CG_LOCAL_ADDR_TAKEN; - if (api_local_requires_memory(g, ty, attrs)) - desc.flags |= CG_LOCAL_MEMORY_REQUIRED; - if (g->target->local) - storage = g->target->local(g->target, &desc); - else - storage = api_frame_local_storage(g, &desc); - if (storage.kind == CG_LOCAL_STORAGE_REG) { - cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), - storage.v.reg); - } - rec = &g->locals[g->nlocals++]; - memset(rec, 0, sizeof *rec); - rec->type = ty; - rec->name = attrs.name; - rec->attrs = attrs; - rec->loc = g->cur_loc; - rec->desc = desc; - rec->storage = storage; - rec->param_index = 0; - rec->kind = API_SOURCE_LOCAL_AUTO; - return handle; -} - -CfreeCgLocal cfree_cg_param(CfreeCg *g, uint32_t index, CfreeCgTypeId type, - CfreeCgLocalAttrs attrs) { - CfreeCgTypeId ty; - CGParamDesc pd; - ApiSourceLocal *rec; - CfreeCgLocal handle; - CGLocalStorage storage; - u32 size; - u32 align; - if (!g) - return CFREE_CG_LOCAL_NONE; - ty = resolve_type(g->c, type); - if (!ty) - return CFREE_CG_LOCAL_NONE; - if (index != g->nlocals) - return CFREE_CG_LOCAL_NONE; - handle = api_local_handle(g->nlocals); - if (handle == CFREE_CG_LOCAL_NONE || !api_grow_locals(g, g->nlocals + 1u)) - return CFREE_CG_LOCAL_NONE; - - size = abi_cg_sizeof(g->c->abi, type); - align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type); - - memset(&pd, 0, sizeof pd); - pd.index = index; - pd.name = (Sym)attrs.name; - pd.type = ty; - pd.size = size; - pd.align = align; - if (api_source_flags_addr_taken(attrs.flags)) - pd.flags |= CG_LOCAL_ADDR_TAKEN; - if (api_local_requires_memory(g, ty, attrs)) - pd.flags |= CG_LOCAL_MEMORY_REQUIRED; - if (g->fn_abi && index < g->fn_abi->nparams) { - pd.abi = &g->fn_abi->params[index]; - } - pd.loc = g->cur_loc; - storage = g->target->param(g->target, &pd); - if (storage.kind == CG_LOCAL_STORAGE_REG) { - cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), - storage.v.reg); - } - - rec = &g->locals[g->nlocals++]; - memset(rec, 0, sizeof *rec); - rec->type = ty; - rec->name = attrs.name; - rec->attrs = attrs; - rec->loc = g->cur_loc; - memset(&rec->desc, 0, sizeof rec->desc); - rec->desc.type = ty; - rec->desc.name = (Sym)attrs.name; - rec->desc.loc = g->cur_loc; - rec->desc.size = size; - rec->desc.align = align; - rec->desc.flags = pd.flags; - rec->storage = storage; - rec->param_index = index; - rec->kind = API_SOURCE_LOCAL_PARAM; - return handle; -} - -/* ============================================================ - * Push operations - * ============================================================ */ - -void cfree_cg_push_int(CfreeCg *g, uint64_t value, CfreeCgTypeId type) { - CfreeCgTypeId ty; - if (!g) - return; - ty = resolve_type(g->c, type); - if (!ty) - return; - api_push(g, api_make_sv(api_op_imm((i64)value, ty), ty)); -} - -void cfree_cg_push_float(CfreeCg *g, double value, CfreeCgTypeId type) { - CfreeCgTypeId ty; - CGTarget *T; - ConstBytes cb; - union { - double d; - float f; - uint8_t b[8]; - } u; - Reg r; - Operand dst; - if (!g) - return; - ty = resolve_type(g->c, type); - if (!ty) - return; - if (api_is_f128_type(g->c, ty)) { - api_push(g, api_make_f128_const(g, value, ty)); - return; - } - T = g->target; - cb.type = ty; - cb.size = (u32)abi_cg_sizeof(g->c->abi, type); - cb.align = (u32)abi_cg_alignof(g->c->abi, type); - if (ty == builtin_id(CFREE_CG_BUILTIN_F32)) - u.f = (float)value; - else - u.d = value; - cb.bytes = u.b; - r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(r, ty); - T->load_const(T, dst, cb); - api_push(g, api_make_sv(dst, ty)); -} - -void cfree_cg_push_null(CfreeCg *g, CfreeCgTypeId ptr_type) { - CfreeCgTypeId ty; - if (!g) - return; - ty = resolve_type(g->c, ptr_type); - if (!ty) - return; - api_push(g, api_make_sv(api_op_imm(0, ty), ty)); -} - -CfreeCgSym cfree_cg_const_data(CfreeCg *g, const uint8_t *data, size_t len, - uint32_t align, CfreeCgTypeId pointee_type) { - Compiler *c; - ObjBuilder *ob; - CfreeCgTypeId pty; - Sym sec_name; - ObjSecId sec; - u32 base; - char name_buf[32]; - Sym anon_name; - ObjSymId sym; - CfreeCgDecl attrs; - if (!g) - return CFREE_CG_SYM_NONE; - c = g->c; - ob = g->obj; - pty = resolve_type(c, pointee_type); - if (!pty) - return CFREE_CG_SYM_NONE; - sec_name = pool_intern_cstr(c->global, ".rodata"); - sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, - align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); - base = obj_align_to( - ob, sec, align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); - obj_write(ob, sec, data, len); - snprintf(name_buf, sizeof(name_buf), ".Lcfree_ro.%u", g->rodata_counter++); - anon_name = pool_intern_cstr(c->global, name_buf); - sym = obj_symbol(ob, anon_name, SB_LOCAL, SK_OBJ, sec, base, (u64)len); - memset(&attrs, 0, sizeof(attrs)); - attrs.kind = CFREE_CG_DECL_OBJECT; - attrs.sym.bind = CFREE_SB_LOCAL; - attrs.sym.visibility = CFREE_CG_VIS_DEFAULT; - attrs.as.object.flags = CFREE_CG_OBJ_READONLY; - api_remember_sym(g, sym, pty, attrs); - return (CfreeCgSym)sym; -} - -static void api_push_frame_lvalue(CfreeCg *g, FrameSlot slot, - CfreeCgTypeId type) { - if (!g) - return; - api_push(g, api_make_lv(api_op_local(slot, type), type)); -} - -static void api_push_source_frame_lvalue(CfreeCg *g, CfreeCgLocal local, - FrameSlot slot, CfreeCgTypeId type) { - ApiSValue sv; - if (!g) - return; - sv = api_make_lv(api_op_local(slot, type), type); - sv.source_local = local; - api_push(g, sv); -} - -static void api_push_source_reg_lvalue(CfreeCg *g, CfreeCgLocal local, Reg reg, - CfreeCgTypeId type) { - ApiSValue sv; - if (!g) - return; - sv = api_make_lv(api_op_reg(reg, type), type); - sv.res = RES_FIXED_REG; - sv.source_local = local; - api_push(g, sv); -} - -void cfree_cg_push_local(CfreeCg *g, CfreeCgLocal local) { - ApiSourceLocal *rec; - if (!g) - return; - rec = api_local_from_handle(g, local); - if (!rec) - return; - if (rec->storage.kind == CG_LOCAL_STORAGE_REG) { - api_push_source_reg_lvalue(g, local, rec->storage.v.reg, rec->type); - } else if (rec->kind == API_SOURCE_LOCAL_AUTO) { - api_push_source_frame_lvalue(g, local, rec->storage.v.frame_slot, - rec->type); - } else { - api_push_frame_lvalue(g, rec->storage.v.frame_slot, rec->type); - } -} - -void cfree_cg_push_local_addr(CfreeCg *g, CfreeCgLocal local) { - cfree_cg_push_local(g, local); - cfree_cg_addr(g); -} - -void cfree_cg_push_symbol_addr(CfreeCg *g, CfreeCgSym sym, int64_t addend) { - CfreeCgTypeId ty; - CfreeCgTypeId ptr_ty; - if (!g) - return; - ty = api_sym_type(g, sym); - if (!ty) - ty = builtin_id(CFREE_CG_BUILTIN_VOID); - ptr_ty = cg_type_ptr_to(g->c, ty); - if (api_sym_is_tls(g, sym)) { - Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand dst = api_op_reg(r, ptr_ty); - g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); - api_push(g, api_make_sv(dst, ptr_ty)); - } else { - api_push(g, - api_make_sv(api_op_global((ObjSymId)sym, addend, ptr_ty), ptr_ty)); - } -} - -void cfree_cg_push_symbol_lvalue(CfreeCg *g, CfreeCgSym sym, int64_t addend) { - CfreeCgTypeId ty; - if (!g) - return; - ty = api_sym_type(g, sym); - if (!ty) - return; - if (api_sym_is_tls(g, sym)) { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand dst = api_op_reg(r, ptr_ty); - g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); - api_push(g, api_make_lv(api_op_indirect(r, 0, ty), ty)); - } else { - api_push(g, api_make_lv(api_op_global((ObjSymId)sym, addend, ty), ty)); - } -} - -void cfree_cg_addr_offset(CfreeCg *g, int64_t byte_offset, - CfreeCgTypeId result_type) { - ApiSValue v; - CfreeCgTypeId rty; - CfreeCgTypeId ptr_ty; - Operand base; - Operand result; - Reg rr; - int want_ptr; - int base_is_lvalue; - int free_base = 0; - if (!g) - return; - rty = resolve_type(g->c, result_type); - if (!rty) - return; - v = api_pop(g); - want_ptr = cg_type_is_ptr(g->c, rty); - base_is_lvalue = api_is_lvalue_sv(&v); - if (v.source_local != CFREE_CG_LOCAL_NONE) - api_local_const_clear(api_local_from_handle(g, v.source_local)); - api_ensure_reg(g, &v); - if (v.op.kind == OPK_GLOBAL) { - result = api_op_global(v.op.v.global.sym, - v.op.v.global.addend + byte_offset, rty); - api_push(g, want_ptr ? api_make_sv(result, rty) : api_make_lv(result, rty)); - return; - } - if (!want_ptr && v.op.kind == OPK_INDIRECT) { - i64 ofs = (i64)v.op.v.ind.ofs + byte_offset; - if (ofs >= INT32_MIN && ofs <= INT32_MAX) { - result = api_op_indirect(v.op.v.ind.base, (i32)ofs, rty); - api_push(g, api_make_lv(result, rty)); - return; - } - } - ptr_ty = want_ptr ? rty : cg_type_ptr_to(g->c, rty); - if (!base_is_lvalue && cg_type_is_ptr(g->c, api_sv_type(&v))) - ptr_ty = api_sv_type(&v); - if (base_is_lvalue) { - base = api_lvalue_addr(g, &v, ptr_ty); - free_base = 1; - } else { - base = api_force_reg(g, &v, ptr_ty); - } - rr = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - result = api_op_reg(rr, ptr_ty); - g->target->binop(g->target, BO_IADD, result, base, - api_op_imm(byte_offset, ptr_ty)); - if (free_base) - api_free_reg(g, base.v.reg, RC_INT); - api_release(g, &v); - if (want_ptr) { - result.type = rty; - api_push(g, api_make_sv(result, rty)); - } else { - api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, rty), rty)); - } -} - -/* ============================================================ - * Load / addr / store - * ============================================================ */ - -void cfree_cg_load(CfreeCg *g, CfreeCgMemAccess access) { - ApiSValue v; - CfreeCgTypeId ty; - Operand dst; - if (!g) - return; - if (access.flags & CFREE_CG_MEM_VOLATILE) - api_local_const_memory_boundary(g); - v = api_pop(g); - if (!api_is_lvalue_sv(&v)) { - api_push(g, v); - return; - } - ty = api_mem_access_type(g, access, api_sv_type(&v), "load"); - if (v.bitfield_lvalue) { - CfreeCgTypeId load_ty = ty; - Reg rr; - api_require_scalar_mem_type(g, "load", load_ty); - rr = api_alloc_reg_or_spill(g, RC_INT, load_ty); - dst = api_op_reg(rr, load_ty); - g->target->bitfield_load(g->target, dst, v.op, v.delayed.bitfield); - api_release(g, &v); - api_push(g, api_make_sv(dst, load_ty)); - return; - } - if (cg_type_is_aggregate(g->c, api_sv_type(&v))) { - u32 access_size; - u32 lvalue_size; - if (!cg_type_is_aggregate(g->c, ty)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: load scalar access from aggregate lvalue " - "requires selecting a field"); - } - access_size = api_mem_type_size(g, ty, "load"); - lvalue_size = api_mem_type_size(g, api_sv_type(&v), "load"); - if (access_size != lvalue_size) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: load aggregate type/size mismatch: access size " - "%u, lvalue size %u", - (unsigned)access_size, (unsigned)lvalue_size); - } - api_push(g, v); - return; - } - api_require_scalar_mem_type(g, "load", ty); - if (api_is_wide16_scalar_type(g->c, ty)) { - v.type = ty; - v.op.type = ty; - api_push(g, v); - return; - } - if (v.source_local != CFREE_CG_LOCAL_NONE && - api_local_const_load(g, v.source_local, access, &dst)) { - api_release(g, &v); - api_push(g, api_make_sv(dst, dst.type)); - return; - } - api_ensure_reg(g, &v); - if (v.source_local != CFREE_CG_LOCAL_NONE && v.op.kind == OPK_REG) { - dst = v.op; - dst.type = ty; - v.op = dst; - v.type = ty; - v.lvalue = 0; - v.res = RES_FIXED_REG; - api_push(g, v); - return; - } - dst = api_force_reg(g, &v, ty); - dst.type = ty; - api_push(g, api_make_sv(dst, ty)); -} - -void cfree_cg_indirect(CfreeCg *g) { - ApiSValue ptr; - CfreeCgTypeId pty; - CfreeCgTypeId pointee; - Operand ptr_op; - if (!g) - return; - ptr = api_pop(g); - pty = api_sv_type(&ptr); - pointee = cg_type_pointee(g->c, pty); - if (!pointee || cg_type_is_void(g->c, pointee)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: indirect operand is not a pointer to object"); - return; - } - ptr_op = api_force_reg(g, &ptr, pty); - api_push(g, api_make_lv(api_op_indirect(ptr_op.v.reg, 0, pointee), pointee)); -} - -void cfree_cg_addr(CfreeCg *g) { - ApiSValue v; - CfreeCgTypeId pty; - Operand dst; - if (!g) - return; - v = api_pop(g); - if (v.bitfield_lvalue) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: cannot take address of bit-field"); - return; - } - pty = cg_type_ptr_to(g->c, api_sv_type(&v)); - if (v.source_local != CFREE_CG_LOCAL_NONE) - api_local_const_address_taken(g, v.source_local); - dst = api_lvalue_addr(g, &v, pty); - api_release(g, &v); - api_push(g, api_make_sv(dst, pty)); -} - -void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) { - ApiSValue lv, rv; - CGTarget *T; - CfreeCgTypeId ty; - Operand src; - int scalar_aggregate_store = 0; - if (!g) - return; - if (access.flags & CFREE_CG_MEM_VOLATILE) - api_local_const_memory_boundary(g); - T = g->target; - rv = api_pop(g); - lv = api_pop(g); - if (!api_is_lvalue_sv(&lv)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: store destination is not an lvalue"); - return; - } - ty = api_mem_access_type(g, access, api_sv_type(&lv), "store"); - if (lv.bitfield_lvalue) { - api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); - if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (api_sv_op_is_reg_or_imm(&rv)) { - src = rv.op; - } else { - src = api_force_reg(g, &rv, api_sv_type(&rv)); - } - T->bitfield_store(T, lv.op, src, lv.delayed.bitfield); - api_release(g, &lv); - api_release(g, &rv); - return; - } - if (cg_type_is_aggregate(g->c, api_sv_type(&lv)) && - !cg_type_is_aggregate(g->c, api_sv_type(&rv)) && - !cg_type_is_aggregate(g->c, ty)) { - u32 access_size = api_mem_type_size(g, ty, "store"); - u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); - u32 value_size = api_mem_type_size(g, api_sv_type(&rv), "store"); - if (access_size != dst_size || value_size != dst_size) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: store scalar/aggregate size mismatch: access " - "size %u, destination size %u, value size %u", - (unsigned)access_size, (unsigned)dst_size, - (unsigned)value_size); - } - scalar_aggregate_store = 1; - } - if (!scalar_aggregate_store && - (cg_type_is_aggregate(g->c, ty) || - cg_type_is_aggregate(g->c, api_sv_type(&lv)) || - cg_type_is_aggregate(g->c, api_sv_type(&rv)))) { - CfreeCgTypeId ptr_ty; - Operand dst_addr, src_addr; - AggregateAccess agg; - u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); - u32 src_size = api_mem_type_size(g, api_sv_type(&rv), "store"); - u32 access_size = - cg_type_is_aggregate(g->c, ty) ? api_mem_type_size(g, ty, "store") - : dst_size; - if (!api_is_lvalue_sv(&rv)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: aggregate store source is not an lvalue"); - } - if (!cg_type_is_aggregate(g->c, api_sv_type(&lv)) || - !cg_type_is_aggregate(g->c, api_sv_type(&rv)) || - access_size != dst_size || access_size != src_size) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: store aggregate type/size mismatch: access " - "size %u, destination size %u, value size %u", - (unsigned)access_size, (unsigned)dst_size, - (unsigned)src_size); - } - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - ptr_ty = cg_type_ptr_to(g->c, api_sv_type(&lv)); - dst_addr = api_lvalue_addr(g, &lv, ptr_ty); - src_addr = api_lvalue_addr(g, &rv, ptr_ty); - memset(&agg, 0, sizeof agg); - agg.size = access_size; - agg.align = access.align ? access.align - : abi_cg_alignof(g->c->abi, api_sv_type(&lv)); - T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - api_release(g, &lv); - api_release(g, &rv); - return; - } - api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); - if (api_is_wide16_scalar_type(g->c, ty)) { - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (api_is_lvalue_sv(&rv)) { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); - Operand src_addr = api_lvalue_addr(g, &rv, ptr_ty); - AggregateAccess agg; - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = access.align ? access.align : 16; - T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - } else if (rv.op.kind == OPK_IMM) { - u8 bytes[16]; - u64 lo = (u64)rv.op.v.imm; - u64 hi = rv.op.v.imm < 0 ? ~(u64)0 : 0; - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 lo_idx = g->c->target.big_endian ? 15u - i : i; - u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; - bytes[lo_idx] = (u8)(lo >> (i * 8u)); - bytes[hi_idx] = (u8)(hi >> (i * 8u)); - } - if (lv.op.kind == OPK_LOCAL) { - api_store_f128_bytes(g, lv.op.v.frame_slot, ty, bytes); - } else { - FrameSlot slot = api_f128_temp_slot(g, ty); - ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty); - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); - Operand src_addr; - AggregateAccess agg; - api_store_f128_bytes(g, slot, ty, bytes); - src_addr = api_lvalue_addr(g, &tmp, ptr_ty); - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = access.align ? access.align : 16; - T->copy_bytes(T, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - } - } else { - src = api_force_reg(g, &rv, ty); - T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); - } - api_release(g, &lv); - api_release(g, &rv); - return; - } - api_ensure_reg(g, &lv); - api_ensure_reg(g, &rv); - if (api_sv_op_is_reg_or_imm(&rv)) { - src = rv.op; - } else { - src = api_force_reg(g, &rv, api_sv_type(&rv)); - } - if (lv.source_local != CFREE_CG_LOCAL_NONE) { - if (src.kind == OPK_IMM) { - api_local_const_store(g, lv.source_local, access, src.v.imm); - } else { - api_local_const_clear(api_local_from_handle(g, lv.source_local)); - } - } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (lv.source_local != CFREE_CG_LOCAL_NONE && lv.op.kind == OPK_REG) { - Operand dst = lv.op; - dst.type = ty; - if (src.kind == OPK_IMM) { - T->load_imm(T, dst, src.v.imm); - } else if (src.kind == OPK_REG) { - if (src.v.reg != dst.v.reg || src.cls != dst.cls) - T->copy(T, dst, src); - } else { - src = api_force_reg(g, &rv, ty); - if (src.v.reg != dst.v.reg || src.cls != dst.cls) - T->copy(T, dst, src); - } - } else { - T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); - } - api_release(g, &lv); - api_release(g, &rv); -} - -/* ============================================================ - * Stack manipulation - * ============================================================ */ - -void cfree_cg_dup(CfreeCg *g) { - ApiSValue v, dup; - ApiSValue *top; - CfreeCgTypeId ty; - Reg r; - Operand dst; - if (!g || g->sp == 0) - return; - top = &g->stack[g->sp - 1]; - api_ensure_reg(g, top); - v = *top; - if (v.res != RES_REG) { - api_push(g, v); - return; - } - top->pinned = 1; - ty = api_owned_reg_type(g, &v); - r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty); - dst = api_op_reg(r, ty); - g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); - g->stack[g->sp - 1].pinned = 0; - dup = v; - api_set_owned_reg(&dup, r); - dup.res = RES_REG; - dup.pinned = 0; - dup.spill_slot = FRAME_SLOT_NONE; - api_push(g, dup); -} - -void cfree_cg_swap(CfreeCg *g) { - ApiSValue tmp; - if (!g || g->sp < 2) - return; - tmp = g->stack[g->sp - 1]; - g->stack[g->sp - 1] = g->stack[g->sp - 2]; - g->stack[g->sp - 2] = tmp; -} - -void cfree_cg_drop(CfreeCg *g) { - ApiSValue v; - if (!g) - return; - v = api_pop(g); - api_release(g, &v); -} - -int cfree_cg_top_const_int(CfreeCg *g, int64_t *out_value) { - ApiSValue *v; - CfreeCgTypeId ty; - u32 width; - if (!g || !out_value || !g->sp) - return 0; - v = &g->stack[g->sp - 1u]; - if (v->kind != SV_OPERAND || v->op.kind != OPK_IMM) - return 0; - ty = api_sv_type(v); - if (!api_foldable_int_like_type(g->c, ty, &width)) - return 0; - *out_value = api_fold_result(g->c, ty, (u64)v->op.v.imm, width); - return 1; -} - -void cfree_cg_rot3(CfreeCg *g) { - ApiSValue a, b, c; - if (!g || g->sp < 3) - return; - a = g->stack[g->sp - 3]; - b = g->stack[g->sp - 2]; - c = g->stack[g->sp - 1]; - g->stack[g->sp - 3] = b; - g->stack[g->sp - 2] = c; - g->stack[g->sp - 1] = a; -} - -/* ============================================================ - * Arithmetic / compare / convert - * ============================================================ */ - -static const char *api_i128_binop_helper(BinOp op); -static int api_i128_cmp_is_unsigned(CmpOp op); -static void api_cg_cmp(CfreeCg *g, CmpOp cop); -static void api_f128_call_unary(CfreeCg *g, const char *name, - CfreeCgTypeId ret, CfreeCgTypeId param); - -static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) { - ApiSValue b, a; - CGTarget *T; - CfreeCgTypeId ty; - Operand ra, rb; - Reg rr; - Operand dst; - ApiSValue folded_sv; - i64 folded; - if (!g) - return; - T = g->target; - b = api_pop(g); - a = api_pop(g); - ty = a.type ? a.type : b.type; - - if (api_is_i128_type(g->c, ty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - const char *name = api_i128_binop_helper(iop); - if (!name) - compiler_panic(g->c, g->cur_loc, "CfreeCg: i128 binop unsupported"); - args[0] = a; - args[1] = b; - ps[0] = i128; - ps[1] = (iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S) ? i32 : i128; - api_runtime_call_values(g, name, i128, ps, 2, args); - return; - } - - if (!flags && api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && - api_try_fold_int_binop(g, iop, ty, a.op.v.imm, b.op.v.imm, &folded)) { - api_release(g, &a); - api_release(g, &b); - api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); - return; - } - - if (api_can_delay_int_arith(g, ty, flags) && - api_try_fold_arith_chain(g, iop, ty, &a, &b, &folded_sv)) { - api_release(g, &a); - api_release(g, &b); - api_push(g, folded_sv); - return; - } - - if (api_type_class(ty) == RC_FP) { - ra = api_force_reg(g, &a, ty); - rb = api_force_reg(g, &b, ty); - } else { - ra = api_force_reg_unless_imm(g, &a, ty); - rb = api_force_reg_unless_imm(g, &b, ty); - } - - if (api_can_delay_int_arith(g, ty, flags) && - api_try_collapse_binop_identity(g, iop, ty, &a, &b, &folded_sv)) { - api_release(g, &a); - api_release(g, &b); - api_push(g, folded_sv); - return; - } - - if (api_can_delay_int_arith(g, ty, flags) && - (ra.kind == OPK_REG || rb.kind == OPK_REG) && - (ra.kind == OPK_REG || ra.kind == OPK_IMM) && - (rb.kind == OPK_REG || rb.kind == OPK_IMM)) { - int a_owned = api_sv_owns_operand_reg(&a, &ra); - int b_owned = api_sv_owns_operand_reg(&b, &rb); - api_push(g, api_make_arith_binop(iop, ra, rb, ty, a_owned, b_owned)); - if (a_owned) - a.res = RES_INHERENT; - if (b_owned) - b.res = RES_INHERENT; - api_release(g, &a); - api_release(g, &b); - return; - } - - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); - T->binop(T, iop, dst, ra, rb); - api_release(g, &a); - api_release(g, &b); - api_push(g, api_make_sv(dst, ty)); -} - -static void api_cg_unop(CfreeCg *g, UnOp iop, u32 flags) { - ApiSValue a; - CGTarget *T; - CfreeCgTypeId ty; - Operand ra; - Reg rr; - Operand dst; - ApiSValue folded_sv; - i64 folded; - if (!g) - return; - T = g->target; - a = api_pop(g); - ty = a.type ? a.type : a.op.type; - - if (api_is_i128_type(g->c, ty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - if (iop == UO_NEG || iop == UO_BNOT) { - const char *name = (iop == UO_NEG) ? "__negti2" : "__cfree_notti3"; - api_push(g, a); - api_f128_call_unary(g, name, i128, i128); - return; - } - if (iop == UO_NOT) { - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - args[0] = a; - args[1] = api_make_sv(api_op_imm(0, i128), i128); - api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, CMP_EQ); - return; - } - } - - if (!flags && api_sv_op_is(&a, OPK_IMM) && - api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) { - api_release(g, &a); - api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); - return; - } - - if (api_can_delay_int_arith(g, ty, flags) && - api_try_fold_unary_chain(&a, iop, ty, &folded_sv)) { - api_release(g, &a); - api_push(g, folded_sv); - return; - } - - ra = api_force_reg_unless_imm(g, &a, ty); - if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_REG) { - int a_owned = api_sv_owns_operand_reg(&a, &ra); - api_push(g, api_make_arith_unop(iop, ra, ty, a_owned)); - if (a_owned) - a.res = RES_INHERENT; - api_release(g, &a); - return; - } - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); - T->unop(T, iop, dst, ra); - api_release(g, &a); - api_push(g, api_make_sv(dst, ty)); -} - -static void api_cg_cmp(CfreeCg *g, CmpOp cop) { - ApiSValue b, a; - CGTarget *T; - CfreeCgTypeId opty; - CfreeCgTypeId i32; - Operand ra, rb; - Reg rr; - Operand dst; - i64 folded; - if (!g) - return; - T = g->target; - b = api_pop(g); - a = api_pop(g); - opty = a.type ? a.type : b.type; - i32 = builtin_id(CFREE_CG_BUILTIN_I32); - - if (api_is_i128_type(g->c, opty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - CmpOp icmp = CMP_EQ; - const char *name = api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2" - : "__cfree_cmpti2"; - switch (cop) { - case CMP_EQ: icmp = CMP_EQ; break; - case CMP_NE: icmp = CMP_NE; break; - case CMP_LT_S: - case CMP_LT_U: icmp = CMP_LT_S; break; - case CMP_LE_S: - case CMP_LE_U: icmp = CMP_LE_S; break; - case CMP_GT_S: - case CMP_GT_U: icmp = CMP_GT_S; break; - case CMP_GE_S: - case CMP_GE_U: icmp = CMP_GE_S; break; - default: icmp = CMP_EQ; break; - } - args[0] = a; - args[1] = b; - api_runtime_call_values(g, name, i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, icmp); - return; - } - - if (api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && - api_try_fold_int_cmp(g, cop, opty, a.op.v.imm, b.op.v.imm, &folded)) { - api_release(g, &a); - api_release(g, &b); - api_push(g, api_make_sv(api_op_imm(folded, i32), i32)); - return; - } - - ra = api_force_reg_unless_imm(g, &a, opty); - rb = api_force_reg_unless_imm(g, &b, opty); - if (api_type_class(opty) != RC_FP) { - api_push(g, api_make_cmp(cop, ra, rb, i32, - api_sv_owns_operand_reg(&a, &ra), - api_sv_owns_operand_reg(&b, &rb))); - return; - } - rr = api_alloc_reg_or_spill(g, RC_INT, i32); - dst = api_op_reg(rr, i32); - T->cmp(T, cop, dst, ra, rb); - api_release(g, &a); - api_release(g, &b); - api_push(g, api_make_sv(dst, i32)); -} - -static void api_cg_convert_kind(CfreeCg *g, CfreeCgTypeId dst_type, - ConvKind ck) { - ApiSValue v; - CGTarget *T; - CfreeCgTypeId sty; - CfreeCgTypeId dty; - Operand src; - Reg rr; - Operand dst; - if (!g) - return; - T = g->target; - dty = resolve_type(g->c, dst_type); - if (!dty) - return; - v = api_pop(g); - dty = api_unalias_type(g->c, dty); - sty = api_unalias_type(g->c, v.type ? v.type : v.op.type); - if (!sty) { - api_release(g, &v); - return; - } - if (sty == dty) { - v.type = dty; - v.op.type = dty; - api_push(g, v); - return; - } - if (api_is_i128_type(g->c, sty) && api_type_is_bool(g->c, dty) && - ck != CV_BITCAST) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - ApiSValue r; - args[0] = v; - args[1] = api_make_sv(api_op_imm(0, i128), i128); - api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, CMP_NE); - r = api_pop(g); - r.type = dty; - r.op.type = dty; - api_push(g, r); - return; - } - if (api_is_i128_type(g->c, dty) && !api_is_i128_type(g->c, sty) && - ck != CV_BITCAST) { - u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); - CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); - FrameSlot slot = api_f128_temp_slot(g, dty); - Operand dst_lv = api_op_local(slot, dty); - if (api_sv_op_is(&v, OPK_IMM)) { - u8 bytes[16]; - u64 lo = (u64)v.op.v.imm; - u64 hi = 0; - if (ck == CV_SEXT && sz <= 8) { - u32 bits = sz * 8u; - u64 mask = bits >= 64u ? ~(u64)0 : ((1ull << bits) - 1ull); - u64 sign = 1ull << (bits - 1u); - u64 u = lo & mask; - if (u & sign) - u |= ~mask; - lo = u; - hi = (u & (1ull << 63)) ? ~(u64)0 : 0; - } - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 lo_idx = g->c->target.big_endian ? 15u - i : i; - u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; - bytes[lo_idx] = (u8)(lo >> (i * 8u)); - bytes[hi_idx] = (u8)(hi >> (i * 8u)); - } - api_store_f128_bytes(g, slot, dty, bytes); - api_release(g, &v); - api_push(g, api_make_lv(dst_lv, dty)); - return; - } - { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); - CfreeCgTypeId src_ty = sty; - Operand src = api_force_reg(g, &v, sty); - Operand low = src; - Operand base; - Reg low_tmp = REG_NONE; - Reg ar; - MemAccess ma; - memset(&ma, 0, sizeof ma); - ma.type = i64_ty; - ma.size = 8; - ma.align = 8; - if (sz < 8) { - low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty); - low = api_op_reg(low_tmp, i64_ty); - T->convert(T, ck == CV_SEXT ? CV_SEXT : CV_ZEXT, low, src); - src_ty = i64_ty; - } else { - low.type = i64_ty; - } - ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - base = api_op_reg(ar, ptr_ty); - T->addr_of(T, base, dst_lv); - T->store(T, api_op_indirect(ar, 0, i64_ty), low, ma); - if (ck == CV_SEXT) { - Reg hr = api_alloc_reg_or_spill(g, RC_INT, i64_ty); - Operand high = api_op_reg(hr, i64_ty); - T->binop(T, BO_SHR_S, high, low, api_op_imm(63, i64_ty)); - T->store(T, api_op_indirect(ar, 8, i64_ty), high, ma); - api_free_reg(g, hr, RC_INT); - } else { - T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma); - } - if (low_tmp != REG_NONE) - api_free_reg(g, low_tmp, RC_INT); - (void)src_ty; - api_free_reg(g, ar, RC_INT); - api_release(g, &v); - api_push(g, api_make_lv(dst_lv, dty)); - } - return; - } - if (api_is_i128_type(g->c, sty) && !api_is_i128_type(g->c, dty) && - ck == CV_TRUNC && abi_cg_sizeof(g->c->abi, dty) <= 8) { - Reg rr = api_alloc_reg_or_spill(g, RC_INT, dty); - Operand dst = api_op_reg(rr, dty); - if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || - v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { - ApiSValue lv = v; - lv.lvalue = 1; - T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty)); - } else if (v.op.kind == OPK_IMM) { - T->load_imm(T, dst, v.op.v.imm); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported i128 truncation source"); - } - api_release(g, &v); - api_push(g, api_make_sv(dst, dty)); - return; - } - if (ck == CV_BITCAST && - abi_cg_sizeof(g->c->abi, sty) == abi_cg_sizeof(g->c->abi, dst_type) && - api_type_class(sty) == api_type_class(dty)) { - v.type = dty; - v.op.type = dty; - api_push(g, v); - return; - } - if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && - abi_cg_sizeof(g->c->abi, dty) == 16 && - (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { - FrameSlot slot = api_f128_temp_slot(g, dty); - Operand dst_lv = api_op_local(slot, dty); - if (api_is_lvalue_sv(&v) || - v.op.kind == OPK_LOCAL || v.op.kind == OPK_INDIRECT || - v.op.kind == OPK_GLOBAL) { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); - ApiSValue src_lv = v; - Operand dst_addr; - Operand src_addr; - AggregateAccess agg; - src_lv.lvalue = 1; - dst_addr = api_lvalue_addr(g, &(ApiSValue){.op = dst_lv, - .type = dty, - .kind = SV_OPERAND, - .lvalue = 1}, - ptr_ty); - src_addr = api_lvalue_addr(g, &src_lv, cg_type_ptr_to(g->c, sty)); - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = 16; - g->target->copy_bytes(g->target, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - } else if (v.op.kind == OPK_REG) { - g->target->store(g->target, dst_lv, v.op, - api_mem_for_lvalue(g, &dst_lv, sty)); - } else if (v.op.kind == OPK_IMM) { - u8 bytes[16]; - u64 lo = (u64)v.op.v.imm; - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 idx = g->c->target.big_endian ? 15u - i : i; - bytes[idx] = (u8)(lo >> (i * 8u)); - } - api_store_f128_bytes(g, slot, dty, bytes); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported 16-byte bitcast source"); - } - api_release(g, &v); - api_push(g, api_make_lv(dst_lv, dty)); - return; - } - - src = api_force_reg(g, &v, sty); - rr = api_alloc_reg_or_spill(g, api_type_class(dty), dty); - dst = api_op_reg(rr, dty); - T->convert(T, ck, dst, src); - api_release(g, &v); - api_push(g, api_make_sv(dst, dty)); -} - -void cfree_cg_int_binop(CfreeCg *g, CfreeCgIntBinOp op, uint32_t flags) { - api_cg_binop(g, api_map_int_binop(op), flags); -} - -void cfree_cg_int_unop(CfreeCg *g, CfreeCgIntUnOp op, uint32_t flags) { - api_cg_unop(g, api_map_int_unop(op), flags); -} - -void cfree_cg_int_cmp(CfreeCg *g, CfreeCgIntCmpOp op) { - api_cg_cmp(g, api_map_int_cmp(op)); -} - -static const char *api_i128_binop_helper(BinOp op) { - switch (op) { - case BO_IADD: return "__cfree_addti3"; - case BO_ISUB: return "__cfree_subti3"; - case BO_IMUL: return "__multi3"; - case BO_SDIV: return "__divti3"; - case BO_UDIV: return "__udivti3"; - case BO_SREM: return "__modti3"; - case BO_UREM: return "__umodti3"; - case BO_AND: return "__cfree_andti3"; - case BO_OR: return "__cfree_orti3"; - case BO_XOR: return "__cfree_xorti3"; - case BO_SHL: return "__ashlti3"; - case BO_SHR_U: return "__lshrti3"; - case BO_SHR_S: return "__ashrti3"; - case BO_FADD: - case BO_FSUB: - case BO_FMUL: - case BO_FDIV: - default: - return NULL; - } -} - -static int api_i128_cmp_is_unsigned(CmpOp op) { - return op == CMP_LT_U || op == CMP_LE_U || op == CMP_GT_U || op == CMP_GE_U; -} - -static const char *api_f128_binop_helper(CfreeCgFpBinOp op) { - switch (op) { - case CFREE_CG_FP_ADD: return "__addtf3"; - case CFREE_CG_FP_SUB: return "__subtf3"; - case CFREE_CG_FP_MUL: return "__multf3"; - case CFREE_CG_FP_DIV: return "__divtf3"; - case CFREE_CG_FP_REM: return NULL; - } - return NULL; -} - -static int api_f128_stack_top(CfreeCg *g, u32 depth) { - if (!g || g->sp <= depth) - return 0; - return api_is_f128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); -} - -static void api_f128_call_unary(CfreeCg *g, const char *name, - CfreeCgTypeId ret, CfreeCgTypeId param) { - ApiSValue args[1]; - CfreeCgTypeId ps[1]; - args[0] = api_pop(g); - ps[0] = param; - api_runtime_call_values(g, name, ret, ps, 1, args); -} - -void cfree_cg_fp_binop(CfreeCg *g, CfreeCgFpBinOp op, uint32_t flags) { - (void)flags; - if (op == CFREE_CG_FP_REM) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); - return; - } - if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { - CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - const char *name = api_f128_binop_helper(op); - if (!name) - compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); - args[1] = api_pop(g); - args[0] = api_pop(g); - ps[0] = f128; - ps[1] = f128; - api_runtime_call_values(g, name, f128, ps, 2, args); - return; - } - api_cg_binop(g, api_map_fp_binop(op), 0); -} - -void cfree_cg_fp_unop(CfreeCg *g, CfreeCgFpUnOp op, uint32_t flags) { - (void)flags; - (void)op; - if (api_f128_stack_top(g, 0)) { - CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - args[1] = api_pop(g); - args[0] = api_make_f128_const(g, 0.0, f128); - ps[0] = f128; - ps[1] = f128; - api_runtime_call_values(g, "__subtf3", f128, ps, 2, args); - return; - } - api_cg_unop(g, UO_NEG, 0); -} - -void cfree_cg_fp_cmp(CfreeCg *g, CfreeCgFpCmpOp op) { - if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { - CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - const char *name = "__eqtf2"; - CmpOp cmp = CMP_EQ; - switch (op) { - case CFREE_CG_FP_OEQ: - case CFREE_CG_FP_UEQ: - name = "__eqtf2"; - cmp = CMP_EQ; - break; - case CFREE_CG_FP_ONE: - case CFREE_CG_FP_UNE: - name = "__netf2"; - cmp = CMP_NE; - break; - case CFREE_CG_FP_OLT: - case CFREE_CG_FP_ULT: - name = "__lttf2"; - cmp = CMP_LT_S; - break; - case CFREE_CG_FP_OLE: - case CFREE_CG_FP_ULE: - name = "__letf2"; - cmp = CMP_LE_S; - break; - case CFREE_CG_FP_OGT: - case CFREE_CG_FP_UGT: - name = "__gttf2"; - cmp = CMP_GT_S; - break; - case CFREE_CG_FP_OGE: - case CFREE_CG_FP_UGE: - name = "__getf2"; - cmp = CMP_GE_S; - break; - } - args[1] = api_pop(g); - args[0] = api_pop(g); - ps[0] = f128; - ps[1] = f128; - api_runtime_call_values(g, name, i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, cmp); - return; - } - api_cg_cmp(g, api_map_fp_cmp(op)); -} - -void cfree_cg_sext(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_SEXT); -} - -void cfree_cg_zext(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_ZEXT); -} - -void cfree_cg_trunc(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_TRUNC); -} - -void cfree_cg_ptr_to_int(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_BITCAST); -} - -void cfree_cg_int_to_ptr(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_BITCAST); -} - -void cfree_cg_bitcast(CfreeCg *g, CfreeCgTypeId dst) { - api_cg_convert_kind(g, dst, CV_BITCAST); -} - -void cfree_cg_fpext(CfreeCg *g, CfreeCgTypeId dst) { - CfreeCgTypeId dty = resolve_type(g->c, dst); - if (api_is_f128_type(g->c, dty)) { - ApiSValue v = api_pop(g); - CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); - const char *name = sty == builtin_id(CFREE_CG_BUILTIN_F32) - ? "__extendsftf2" - : "__extenddftf2"; - api_push(g, v); - api_f128_call_unary(g, name, dty, sty); - return; - } - api_cg_convert_kind(g, dst, CV_FEXT); -} - -void cfree_cg_fptrunc(CfreeCg *g, CfreeCgTypeId dst) { - CfreeCgTypeId dty = resolve_type(g->c, dst); - if (api_f128_stack_top(g, 0)) { - ApiSValue v = api_pop(g); - CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); - const char *name = dty == builtin_id(CFREE_CG_BUILTIN_F32) - ? "__trunctfsf2" - : "__trunctfdf2"; - api_push(g, v); - api_f128_call_unary(g, name, dty, f128); - return; - } - api_cg_convert_kind(g, dst, CV_FTRUNC); -} - -void cfree_cg_sint_to_float(CfreeCg *g, CfreeCgTypeId dst, - CfreeCgRounding rounding) { - (void)rounding; - if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { - ApiSValue v = api_pop(g); - CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); - u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); - CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) - : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) - : builtin_id(CFREE_CG_BUILTIN_I32)); - const char *name = sz > 8 ? "__floattitf" - : (sz > 4 ? "__floatditf" : "__floatsitf"); - api_push(g, v); - api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); - return; - } - api_cg_convert_kind(g, dst, CV_ITOF_S); -} - -void cfree_cg_uint_to_float(CfreeCg *g, CfreeCgTypeId dst, - CfreeCgRounding rounding) { - (void)rounding; - if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { - ApiSValue v = api_pop(g); - CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); - u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); - CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) - : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) - : builtin_id(CFREE_CG_BUILTIN_I32)); - const char *name = sz > 8 ? "__floatuntitf" - : (sz > 4 ? "__floatunditf" : "__floatunsitf"); - api_push(g, v); - api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); - return; - } - api_cg_convert_kind(g, dst, CV_ITOF_U); -} - -void cfree_cg_float_to_sint(CfreeCg *g, CfreeCgTypeId dst, - CfreeCgRounding rounding) { - (void)rounding; - if (api_f128_stack_top(g, 0)) { - CfreeCgTypeId dty = resolve_type(g->c, dst); - u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); - CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) - : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) - : builtin_id(CFREE_CG_BUILTIN_I32)); - const char *name = sz > 8 ? "__fixtfti" - : (sz > 4 ? "__fixtfdi" : "__fixtfsi"); - api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); - if (rty != dty) - api_cg_convert_kind(g, dty, CV_TRUNC); - return; - } - api_cg_convert_kind(g, dst, CV_FTOI_S); -} - -void cfree_cg_float_to_uint(CfreeCg *g, CfreeCgTypeId dst, - CfreeCgRounding rounding) { - (void)rounding; - if (api_f128_stack_top(g, 0)) { - CfreeCgTypeId dty = resolve_type(g->c, dst); - u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); - CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) - : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) - : builtin_id(CFREE_CG_BUILTIN_I32)); - const char *name = sz > 8 ? "__fixunstfti" - : (sz > 4 ? "__fixunstfdi" : "__fixunstfsi"); - api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); - if (rty != dty) - api_cg_convert_kind(g, dty, CV_TRUNC); - return; - } - api_cg_convert_kind(g, dst, CV_FTOI_U); -} - -/* ============================================================ - * Intrinsics (stub) - * ============================================================ */ - -static IntrinKind api_map_intrinsic(CfreeCg *g, CfreeCgIntrinsic intrin, - CfreeCgTypeId result_type) { - u32 size = result_type ? abi_cg_sizeof(g->c->abi, result_type) : 0; - switch (intrin) { - case CFREE_CG_INTRIN_TRAP: - return INTRIN_TRAP; - case CFREE_CG_INTRIN_CLZ: - return INTRIN_CLZ; - case CFREE_CG_INTRIN_CTZ: - return INTRIN_CTZ; - case CFREE_CG_INTRIN_POPCOUNT: - return INTRIN_POPCOUNT; - case CFREE_CG_INTRIN_BSWAP: - if (size <= 2) - return INTRIN_BSWAP16; - if (size <= 4) - return INTRIN_BSWAP32; - return INTRIN_BSWAP64; - case CFREE_CG_INTRIN_SETJMP: - return INTRIN_SETJMP; - case CFREE_CG_INTRIN_LONGJMP: - return INTRIN_LONGJMP; - case CFREE_CG_INTRIN_SADD_OVERFLOW: - return INTRIN_SADD_OVERFLOW; - case CFREE_CG_INTRIN_UADD_OVERFLOW: - return INTRIN_UADD_OVERFLOW; - case CFREE_CG_INTRIN_SSUB_OVERFLOW: - return INTRIN_SSUB_OVERFLOW; - case CFREE_CG_INTRIN_USUB_OVERFLOW: - return INTRIN_USUB_OVERFLOW; - case CFREE_CG_INTRIN_SMUL_OVERFLOW: - return INTRIN_SMUL_OVERFLOW; - case CFREE_CG_INTRIN_UMUL_OVERFLOW: - return INTRIN_UMUL_OVERFLOW; - case CFREE_CG_INTRIN_PREFETCH: - return INTRIN_PREFETCH; - case CFREE_CG_INTRIN_EXPECT: - return INTRIN_EXPECT; - case CFREE_CG_INTRIN_ASSUME_ALIGNED: - return INTRIN_ASSUME_ALIGNED; - case CFREE_CG_INTRIN_FMA: - case CFREE_CG_INTRIN_SYSCALL: - case CFREE_CG_INTRIN_IRQ_SAVE: - case CFREE_CG_INTRIN_IRQ_RESTORE: - case CFREE_CG_INTRIN_IRQ_DISABLE: - case CFREE_CG_INTRIN_IRQ_ENABLE: - case CFREE_CG_INTRIN_DMB: - case CFREE_CG_INTRIN_DSB: - case CFREE_CG_INTRIN_ISB: - case CFREE_CG_INTRIN_DCACHE_CLEAN: - case CFREE_CG_INTRIN_DCACHE_INVALIDATE: - case CFREE_CG_INTRIN_DCACHE_CLEAN_INVALIDATE: - case CFREE_CG_INTRIN_ICACHE_INVALIDATE: - case CFREE_CG_INTRIN_CPU_NOP: - case CFREE_CG_INTRIN_CPU_YIELD: - case CFREE_CG_INTRIN_WFI: - case CFREE_CG_INTRIN_WFE: - case CFREE_CG_INTRIN_SEV: - case CFREE_CG_INTRIN_CORO_SWITCH: - return INTRIN_NONE; - } - return INTRIN_NONE; -} - -static int api_intrinsic_is_void(CfreeCgIntrinsic intrin) { - return intrin == CFREE_CG_INTRIN_TRAP || intrin == CFREE_CG_INTRIN_LONGJMP || - intrin == CFREE_CG_INTRIN_PREFETCH; -} - -static int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin) { - return intrin == CFREE_CG_INTRIN_SADD_OVERFLOW || - intrin == CFREE_CG_INTRIN_UADD_OVERFLOW || - intrin == CFREE_CG_INTRIN_SSUB_OVERFLOW || - intrin == CFREE_CG_INTRIN_USUB_OVERFLOW || - intrin == CFREE_CG_INTRIN_SMUL_OVERFLOW || - intrin == CFREE_CG_INTRIN_UMUL_OVERFLOW; -} - -void cfree_cg_intrinsic(CfreeCg *g, CfreeCgIntrinsic intrin, uint32_t nargs, - CfreeCgTypeId result_type) { - CGTarget *T; - CfreeCgTypeId rty; - CfreeCgTypeId int_ty; - IntrinKind kind; - ApiSValue *svs; - Operand *args; - Operand dsts[2]; - u32 ndst = 0; - Heap *h; - if (!g) - return; - T = g->target; - h = g->c->ctx->heap; - rty = resolve_type(g->c, result_type); - int_ty = builtin_id(CFREE_CG_BUILTIN_I32); - kind = api_map_intrinsic(g, intrin, result_type); - if (kind == INTRIN_NONE) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported intrinsic"); - return; - } - - svs = NULL; - args = NULL; - if (nargs) { - svs = (ApiSValue *)h->alloc(h, sizeof(*svs) * nargs, _Alignof(ApiSValue)); - args = (Operand *)h->alloc(h, sizeof(*args) * nargs, _Alignof(Operand)); - memset(args, 0, sizeof(*args) * nargs); - for (u32 i = 0; i < nargs; ++i) { - u32 idx = nargs - 1u - i; - CfreeCgTypeId aty; - svs[idx] = api_pop(g); - aty = api_sv_type(&svs[idx]); - if (api_sv_op_is(&svs[idx], OPK_IMM) && - (intrin == CFREE_CG_INTRIN_EXPECT || - intrin == CFREE_CG_INTRIN_ASSUME_ALIGNED || - intrin == CFREE_CG_INTRIN_PREFETCH)) { - args[idx] = svs[idx].op; - } else { - args[idx] = api_force_reg(g, &svs[idx], aty); - } - } - } - - if (api_intrinsic_is_overflow(intrin)) { - CfreeCgTypeId vty = rty ? rty : (nargs ? api_sv_type(&svs[0]) : int_ty); - CfreeCgTypeId bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); - Reg rr = api_alloc_reg_or_spill(g, api_type_class(vty), vty); - Reg ok = api_alloc_reg_or_spill(g, RC_INT, bool_ty); - dsts[0] = api_op_reg(rr, vty); - dsts[1] = api_op_reg(ok, bool_ty); - ndst = 2; - } else if (!api_intrinsic_is_void(intrin) && !cg_type_is_void(g->c, rty)) { - Reg rr = api_alloc_reg_or_spill(g, api_type_class(rty), rty); - dsts[0] = api_op_reg(rr, rty); - ndst = 1; - } - - T->intrinsic(T, kind, ndst ? dsts : NULL, ndst, args, nargs); - - for (u32 i = 0; i < nargs; ++i) - api_release(g, &svs[i]); - if (svs) - h->free(h, svs, sizeof(*svs) * nargs); - if (args) - h->free(h, args, sizeof(*args) * nargs); - - if (api_intrinsic_is_overflow(intrin)) { - api_push(g, api_make_sv(dsts[0], dsts[0].type)); - api_push(g, api_make_sv(dsts[1], dsts[1].type)); - } else if (ndst == 1) { - api_push(g, api_make_sv(dsts[0], rty)); - } -} - -/* ============================================================ - * Atomics (stub) - * ============================================================ */ - -static CfreeCgTypeId api_atomic_pointee(CfreeCg *g, CfreeCgTypeId pty, - const char *who) { - CfreeCgTypeId pointee = cg_type_pointee(g->c, pty); - if (!pointee) { - compiler_panic(g->c, g->cur_loc, "%s: operand is not a pointer", who); - return builtin_id(CFREE_CG_BUILTIN_I32); - } - return pointee; -} - -static MemAccess api_mem_for_atomic(CfreeCg *g, CfreeCgTypeId val_ty) { - MemAccess ma; - api_require_scalar_mem_type(g, "atomic memory access", val_ty); - if (api_mem_type_size(g, val_ty, "atomic memory access") > 8u) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: atomic memory access size exceeds 8 bytes"); - } - memset(&ma, 0, sizeof ma); - ma.type = val_ty; - ma.size = val_ty ? abi_cg_sizeof(g->c->abi, val_ty) : 0; - ma.align = val_ty ? abi_cg_alignof(g->c->abi, val_ty) : 0; - ma.flags = MF_ATOMIC; - ma.alias.kind = (u8)ALIAS_UNKNOWN; - return ma; -} - -int cfree_cg_atomic_is_legal(CfreeCompiler *c, CfreeCgMemAccess access, - CfreeCgMemOrder order) { - CfreeCgTypeId ty = resolve_type(c, access.type); - (void)order; - if (!ty) - return 0; - if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) - return 0; - return abi_cg_sizeof(c->abi, access.type) <= 8; -} - -int cfree_cg_atomic_is_lock_free(CfreeCompiler *c, CfreeCgMemAccess access) { - CfreeCgTypeId ty = resolve_type(c, access.type); - if (!ty) - return 0; - if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) - return 0; - return abi_cg_sizeof(c->abi, access.type) <= (u32)c->target.ptr_size; -} - -void cfree_cg_atomic_load(CfreeCg *g, CfreeCgMemAccess access, - CfreeCgMemOrder order) { - ApiSValue ptr; - CfreeCgTypeId pty, val_ty; - Operand addr, dst; - Reg rr; - if (!g) - return; - api_local_const_memory_boundary(g); - ptr = api_pop(g); - pty = api_sv_type(&ptr); - val_ty = resolve_type(g->c, access.type); - if (!val_ty) - val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_load"); - api_require_pointer_value(g, "atomic_load pointer", pty); - addr = api_force_reg(g, &ptr, pty); - rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); - dst = api_op_reg(rr, val_ty); - g->target->atomic_load(g->target, dst, addr, api_mem_for_atomic(g, val_ty), - api_map_mem_order(order)); - api_release(g, &ptr); - api_push(g, api_make_sv(dst, val_ty)); -} - -void cfree_cg_atomic_store(CfreeCg *g, CfreeCgMemAccess access, - CfreeCgMemOrder order) { - ApiSValue val, ptr; - CfreeCgTypeId pty, val_ty; - Operand addr, src; - if (!g) - return; - api_local_const_memory_boundary(g); - val = api_pop(g); - ptr = api_pop(g); - pty = api_sv_type(&ptr); - val_ty = resolve_type(g->c, access.type); - if (!val_ty) - val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_store"); - api_require_pointer_value(g, "atomic_store pointer", pty); - api_validate_memory_value(g, "atomic_store", val_ty, api_sv_type(&val)); - addr = api_force_reg(g, &ptr, pty); - src = api_sv_op_is_reg_or_imm(&val) - ? val.op - : api_force_reg(g, &val, val_ty); - g->target->atomic_store(g->target, addr, src, api_mem_for_atomic(g, val_ty), - api_map_mem_order(order)); - api_release(g, &val); - api_release(g, &ptr); -} - -void cfree_cg_atomic_rmw(CfreeCg *g, CfreeCgMemAccess access, - CfreeCgAtomicOp op, CfreeCgMemOrder order) { - ApiSValue val, ptr; - CfreeCgTypeId pty, val_ty; - Operand addr, vop, dst; - Reg rr; - if (!g) - return; - api_local_const_memory_boundary(g); - val = api_pop(g); - ptr = api_pop(g); - pty = api_sv_type(&ptr); - val_ty = resolve_type(g->c, access.type); - if (!val_ty) - val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_rmw"); - api_require_pointer_value(g, "atomic_rmw pointer", pty); - api_validate_memory_value(g, "atomic_rmw", val_ty, api_sv_type(&val)); - addr = api_force_reg(g, &ptr, pty); - vop = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); - rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); - dst = api_op_reg(rr, val_ty); - g->target->atomic_rmw(g->target, api_map_atomic_op(op), dst, addr, vop, - api_mem_for_atomic(g, val_ty), - api_map_mem_order(order)); - api_release(g, &val); - api_release(g, &ptr); - api_push(g, api_make_sv(dst, val_ty)); -} - -static int api_take_dead_owned_reg(ApiSValue *sv, u8 cls, Reg avoid, Reg *out) { - Reg r; - if (sv->res != RES_REG || sv->pinned) - return 0; - if (api_class_of_sv(sv) != cls) - return 0; - r = api_reg_of_sv(sv); - if (r == (Reg)REG_NONE || r == avoid) - return 0; - sv->res = RES_INHERENT; - *out = r; - return 1; -} - -static Reg api_alloc_dead_input_or_spill(CfreeCg *g, ApiSValue *a, - ApiSValue *b, ApiSValue *c, u8 cls, - CfreeCgTypeId ty, Reg avoid) { - Reg r; - if (api_take_dead_owned_reg(a, cls, avoid, &r)) - return r; - if (api_take_dead_owned_reg(b, cls, avoid, &r)) - return r; - if (api_take_dead_owned_reg(c, cls, avoid, &r)) - return r; - return api_alloc_reg_or_spill(g, cls, ty); -} - -void cfree_cg_atomic_cmpxchg(CfreeCg *g, CfreeCgMemAccess access, - CfreeCgMemOrder success, CfreeCgMemOrder failure, - int weak) { - ApiSValue desired, expected, ptr; - CfreeCgTypeId pty, val_ty, bool_ty; - Operand addr, exp_op, des_op, prior, ok; - Reg pr, kr; - if (!g) - return; - api_local_const_memory_boundary(g); - (void)weak; - desired = api_pop(g); - expected = api_pop(g); - ptr = api_pop(g); - pty = api_sv_type(&ptr); - val_ty = resolve_type(g->c, access.type); - if (!val_ty) - val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_cmpxchg"); - api_require_pointer_value(g, "atomic_cmpxchg pointer", pty); - api_validate_memory_value(g, "atomic_cmpxchg expected", val_ty, - api_sv_type(&expected)); - api_validate_memory_value(g, "atomic_cmpxchg desired", val_ty, - api_sv_type(&desired)); - addr = api_force_reg(g, &ptr, pty); - exp_op = api_sv_op_is_reg_or_imm(&expected) - ? expected.op - : api_force_reg(g, &expected, val_ty); - des_op = api_sv_op_is_reg_or_imm(&desired) - ? desired.op - : api_force_reg(g, &desired, val_ty); - bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); - pr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, - api_type_class(val_ty), val_ty, - (Reg)REG_NONE); - kr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, RC_INT, - bool_ty, pr); - prior = api_op_reg(pr, val_ty); - ok = api_op_reg(kr, bool_ty); - g->target->atomic_cas(g->target, prior, ok, addr, exp_op, des_op, - api_mem_for_atomic(g, val_ty), - api_map_mem_order(success), api_map_mem_order(failure)); - api_release(g, &desired); - api_release(g, &expected); - api_release(g, &ptr); - api_push(g, api_make_sv(prior, val_ty)); - api_push(g, api_make_sv(ok, bool_ty)); -} - -void cfree_cg_atomic_fence(CfreeCg *g, CfreeCgMemOrder order) { - if (!g) - return; - api_local_const_memory_boundary(g); - g->target->fence(g->target, api_map_mem_order(order)); -} - -/* ============================================================ - * Inline asm (stub) - * ============================================================ */ - -static const char *api_sym_cstr(CfreeCg *g, CfreeSym sym) { - size_t len; - const char *s; - if (!sym) - return ""; - s = pool_str(g->c->global, (Sym)sym, &len); - (void)len; - return s ? s : ""; -} - -static int api_asm_parse_match_index(const char *s) { - int n; - if (!s || s[0] < '0' || s[0] > '9') - return -1; - n = 0; - for (const char *p = s; *p >= '0' && *p <= '9'; ++p) { - n = n * 10 + (*p - '0'); - } - return n; -} - -static const char *api_asm_constraint_body(const char *s) { - if (!s) - return ""; - if (s[0] == '=' && s[1] == '&') - return s + 2; - if (s[0] == '=' || s[0] == '+' || s[0] == '&') - return s + 1; - return s; -} - -static int api_asm_is_early_clobber(const char *s) { - if (!s) - return 0; - return (s[0] == '=' && s[1] == '&') || s[0] == '&'; -} - -static void api_asm_spill_sv(CfreeCg *g, ApiSValue *sv, Reg phys, - RegClass cls) { - FrameSlot slot = api_take_spill_slot(g, cls); - Operand victim_reg = api_op_reg(phys, api_owned_reg_type(g, sv)); - g->target->spill_reg(g->target, victim_reg, slot, api_mem_for_spill(g, sv)); - api_free_reg(g, phys, cls); - sv->spill_slot = slot; - sv->res = RES_SPILLED; - api_set_owned_reg(sv, (Reg)REG_NONE); -} - -void cfree_cg_inline_asm(CfreeCg *g, CfreeCgInlineAsm asm_block) { - static const char *const match_strs[10] = {"0", "1", "2", "3", "4", - "5", "6", "7", "8", "9"}; - CGTarget *T; - Heap *h; - CfreeCgTypeId fallback_ty; - AsmConstraint *outs; - AsmConstraint *ins; - Sym *clobs; - ApiSValue *in_svs; - Operand *in_ops; - Operand *out_ops; - u8 *out_reg_owned; - const char *tmpl_str; - Sym sym_memory; - int has_memory_clobber; - uint32_t ninout; - uint32_t total_inputs; - CfreeSym tmpl = asm_block.tmpl; - const CfreeCgAsmOperand *outputs = asm_block.outputs; - uint32_t noutputs = asm_block.noutputs; - const CfreeCgAsmOperand *inputs = asm_block.inputs; - uint32_t ninputs = asm_block.ninputs; - const CfreeSym *clobbers = asm_block.clobbers; - uint32_t nclobbers = asm_block.nclobbers; - (void)asm_block.flags; - (void)asm_block.clobber_abi_sets; - if (!g) - return; - api_local_const_memory_boundary(g); - T = g->target; - h = g->c->ctx->heap; - fallback_ty = builtin_id(CFREE_CG_BUILTIN_I64); - tmpl_str = api_sym_cstr(g, tmpl); - ninout = 0; - - outs = NULL; - ins = NULL; - clobs = NULL; - in_svs = NULL; - in_ops = NULL; - out_ops = NULL; - out_reg_owned = NULL; - - if (noutputs) { - outs = (AsmConstraint *)h->alloc(h, sizeof(*outs) * noutputs, - _Alignof(AsmConstraint)); - memset(outs, 0, sizeof(*outs) * noutputs); - for (u32 i = 0; i < noutputs; ++i) { - outs[i].str = api_sym_cstr(g, outputs[i].constraint); - outs[i].name = (Sym)outputs[i].name; - outs[i].type = resolve_type(g->c, outputs[i].type); - outs[i].dir = (u8)api_map_asm_dir(outputs[i].dir); - if (!outs[i].type) - outs[i].type = fallback_ty; - if (outs[i].dir == ASM_INOUT) { - if (i >= 10) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm inout output index exceeds matching " - "constraint range"); - } - ninout++; - } - } - out_ops = - (Operand *)h->alloc(h, sizeof(*out_ops) * noutputs, _Alignof(Operand)); - memset(out_ops, 0, sizeof(*out_ops) * noutputs); - out_reg_owned = (u8 *)h->alloc(h, noutputs, 1); - memset(out_reg_owned, 0, noutputs); - } - - total_inputs = ninputs + ninout; - if (total_inputs) { - uint32_t inout_index; - ins = (AsmConstraint *)h->alloc(h, sizeof(*ins) * total_inputs, - _Alignof(AsmConstraint)); - memset(ins, 0, sizeof(*ins) * total_inputs); - in_svs = (ApiSValue *)h->alloc(h, sizeof(*in_svs) * total_inputs, - _Alignof(ApiSValue)); - in_ops = (Operand *)h->alloc(h, sizeof(*in_ops) * total_inputs, - _Alignof(Operand)); - memset(in_ops, 0, sizeof(*in_ops) * total_inputs); - for (u32 i = 0; i < ninputs; ++i) { - ins[i].str = api_sym_cstr(g, inputs[i].constraint); - ins[i].name = (Sym)inputs[i].name; - ins[i].type = resolve_type(g->c, inputs[i].type); - ins[i].dir = (u8)api_map_asm_dir(inputs[i].dir); - if (!ins[i].type) - ins[i].type = fallback_ty; - } - inout_index = ninputs; - for (u32 i = 0; i < noutputs; ++i) { - if (outs[i].dir != ASM_INOUT) - continue; - ins[inout_index].str = match_strs[i]; - ins[inout_index].type = outs[i].type ? outs[i].type : fallback_ty; - ins[inout_index].dir = ASM_IN; - inout_index++; - } - for (u32 i = 0; i < total_inputs; ++i) { - u32 idx = total_inputs - 1u - i; - in_svs[idx] = api_pop(g); - api_ensure_reg(g, &in_svs[idx]); - } - } - - if (nclobbers) { - clobs = (Sym *)h->alloc(h, sizeof(*clobs) * nclobbers, _Alignof(Sym)); - for (u32 i = 0; i < nclobbers; ++i) clobs[i] = (Sym)clobbers[i]; - } - - for (u32 i = 0; i < noutputs; ++i) { - const char *body = api_asm_constraint_body(outs[i].str); - if (api_asm_is_early_clobber(outs[i].str)) - continue; - if (body[0] == 'r') { - CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; - Reg r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); - out_ops[i] = api_op_reg(r, oty); - out_reg_owned[i] = 1; - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported asm output constraint"); - } - } - - for (u32 i = 0; i < total_inputs; ++i) { - const char *s = ins[i].str ? ins[i].str : ""; - int matched = api_asm_parse_match_index(s); - CfreeCgTypeId ity = api_sv_type(&in_svs[i]); - if (matched >= 0) { - Operand bound; - if ((u32)matched >= noutputs) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm matching constraint out of range"); - continue; - } - if (api_asm_is_early_clobber(outs[matched].str)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm matching input uses early-clobber output"); - continue; - } - bound = out_ops[matched]; - if (api_sv_op_is(&in_svs[i], OPK_REG) && - in_svs[i].op.v.reg == bound.v.reg) { - } else if (api_sv_op_is(&in_svs[i], OPK_IMM)) { - T->load_imm(T, bound, in_svs[i].op.v.imm); - } else { - Operand src = api_force_reg(g, &in_svs[i], ity); - T->copy(T, bound, src); - } - in_ops[i] = bound; - } else if (s[0] == 'r') { - in_ops[i] = api_force_reg(g, &in_svs[i], ity); - } else if (s[0] == 'i') { - if (!api_sv_op_is(&in_svs[i], OPK_IMM)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm 'i' constraint requires an immediate"); - } - in_ops[i] = in_svs[i].op; - } else if (s[0] == 'm') { - if (api_sv_op_is(&in_svs[i], OPK_INDIRECT)) { - in_ops[i] = in_svs[i].op; - } else if (api_is_lvalue_sv(&in_svs[i])) { - CfreeCgTypeId pty = - cg_type_ptr_to(g->c, ity ? ity : builtin_id(CFREE_CG_BUILTIN_VOID)); - Operand dst = api_lvalue_addr(g, &in_svs[i], pty); - in_svs[i].op = api_op_indirect(dst.v.reg, 0, ity); - in_svs[i].res = RES_REG; - in_ops[i] = in_svs[i].op; - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm 'm' constraint requires an lvalue"); - } - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported asm input constraint"); - } - } - - for (u32 i = 0; i < noutputs; ++i) { - const char *body; - CfreeCgTypeId oty; - Reg r; - if (!api_asm_is_early_clobber(outs[i].str)) - continue; - body = api_asm_constraint_body(outs[i].str); - if (body[0] != 'r') { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported early-clobber asm output"); - continue; - } - oty = outs[i].type ? outs[i].type : fallback_ty; - r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); - for (u32 k = 0; k < total_inputs; ++k) { - if ((in_ops[k].kind == OPK_REG && in_ops[k].v.reg == r) || - (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm early-clobber register collision"); - } - } - out_ops[i] = api_op_reg(r, oty); - out_reg_owned[i] = 1; - } - - sym_memory = pool_intern_cstr(g->c->global, "memory"); - has_memory_clobber = 0; - for (u32 i = 0; i < nclobbers; ++i) { - if (clobs[i] == sym_memory) - has_memory_clobber = 1; - } - if (has_memory_clobber) { - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue *sv = &g->stack[i]; - Reg phys; - RegClass cls; - if (sv->res != RES_REG) - continue; - phys = api_reg_of_sv(sv); - cls = (RegClass)api_class_of_sv(sv); - api_asm_spill_sv(g, sv, phys, cls); - } - } else if (T->resolve_reg_name) { - for (u32 i = 0; i < nclobbers; ++i) { - Reg phys; - RegClass cls; - if (T->resolve_reg_name(T, clobs[i], &phys, &cls) != 0) - continue; - for (u32 k = 0; k < noutputs; ++k) { - if (out_ops[k].kind == OPK_REG && out_ops[k].cls == cls && - (Reg)out_ops[k].v.reg == phys) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm clobber overlaps output"); - } - } - for (u32 k = 0; k < total_inputs; ++k) { - if (in_ops[k].kind == OPK_REG && in_ops[k].cls == cls && - (Reg)in_ops[k].v.reg == phys) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm clobber overlaps input"); - } - } - for (u32 k = 0; k < g->sp; ++k) { - ApiSValue *sv = &g->stack[k]; - if (sv->res != RES_REG) - continue; - if (api_class_of_sv(sv) != (u8)cls) - continue; - if ((Reg)api_reg_of_sv(sv) != phys) - continue; - api_asm_spill_sv(g, sv, phys, cls); - } - } - } - - T->asm_block(T, tmpl_str, outs, noutputs, out_ops, ins, total_inputs, in_ops, - clobs, nclobbers); - - for (u32 i = 0; i < total_inputs; ++i) api_release(g, &in_svs[i]); - for (u32 i = 0; i < noutputs; ++i) { - CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; - ApiSValue sv = api_make_sv(out_ops[i], oty); - if (!out_reg_owned[i] && sv.res == RES_REG) - sv.res = RES_INHERENT; - api_push(g, sv); - } - - if (outs) - h->free(h, outs, sizeof(*outs) * noutputs); - if (ins) - h->free(h, ins, sizeof(*ins) * total_inputs); - if (clobs) - h->free(h, clobs, sizeof(*clobs) * nclobbers); - if (in_svs) - h->free(h, in_svs, sizeof(*in_svs) * total_inputs); - if (in_ops) - h->free(h, in_ops, sizeof(*in_ops) * total_inputs); - if (out_ops) - h->free(h, out_ops, sizeof(*out_ops) * noutputs); - if (out_reg_owned) - h->free(h, out_reg_owned, noutputs); -} - -void cfree_cg_file_scope_asm(CfreeCg *g, const char *asm_source, - size_t asm_source_len) { - AsmLexer *lex; - if (!g || !asm_source) - return; - api_local_const_memory_boundary(g); - lex = asm_lex_open_mem(g->c, "<file-scope asm>", asm_source, asm_source_len); - if (!lex) - compiler_panic(g->c, api_no_loc(), "CfreeCg: file-scope asm out of memory"); - asm_parse(g->c, lex, g->mc); - asm_lex_close(lex); -} - -/* ============================================================ - * Labels / branches - * ============================================================ */ - -CfreeCgLabel cfree_cg_label_new(CfreeCg *g) { - if (!g) - return CFREE_CG_LABEL_NONE; - return (CfreeCgLabel)g->target->label_new(g->target); -} - -void cfree_cg_label_place(CfreeCg *g, CfreeCgLabel label) { - if (!g) - return; - api_local_const_control_boundary(g); - g->target->label_place(g->target, (Label)label); -} - -void cfree_cg_jump(CfreeCg *g, CfreeCgLabel label) { - if (!g) - return; - api_local_const_control_boundary(g); - g->target->jump(g->target, (Label)label); -} - -static void api_branch_if(CfreeCg *g, ApiSValue *v, int branch_when_true, - Label label) { - CGTarget *T; - CfreeCgTypeId ty; - if (!g) - return; - api_local_const_control_boundary(g); - T = g->target; - ty = v->type ? v->type : builtin_id(CFREE_CG_BUILTIN_I32); - if (v->op.kind == OPK_IMM && v->kind == SV_OPERAND) { - if ((v->op.v.imm != 0) == !!branch_when_true) - T->jump(T, label); - api_release(g, v); - return; - } - if (v->kind == SV_CMP) { - CmpOp op = - branch_when_true ? v->delayed.cmp.op : api_invert_cmp(v->delayed.cmp.op); - T->cmp_branch(T, op, v->delayed.cmp.a, v->delayed.cmp.b, label); - api_release(g, v); - return; - } - if (api_is_i128_type(g->c, ty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - ApiSValue cmp; - args[0] = *v; - args[1] = api_make_sv(api_op_imm(0, i128), i128); - api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); - cmp = api_pop(g); - api_branch_if(g, &cmp, branch_when_true, label); - return; - } - { - Operand a = api_force_reg(g, v, ty); - Operand zero = api_op_imm(0, ty); - T->cmp_branch(T, branch_when_true ? CMP_NE : CMP_EQ, a, zero, label); - api_release(g, v); - } -} - -void cfree_cg_branch_true(CfreeCg *g, CfreeCgLabel label) { - ApiSValue v; - if (!g) - return; - v = api_pop(g); - api_branch_if(g, &v, 1, (Label)label); -} - -void cfree_cg_branch_false(CfreeCg *g, CfreeCgLabel label) { - ApiSValue v; - if (!g) - return; - v = api_pop(g); - api_branch_if(g, &v, 0, (Label)label); -} - -void cfree_cg_switch(CfreeCg *g, CfreeCgSwitch sw) { - ApiSValue selector; - CfreeCgTypeId ty; - Operand sel; - if (!g) - return; - if (g->sp == 0) - return; - api_local_const_control_boundary(g); - selector = api_pop(g); - ty = resolve_type(g->c, sw.selector_type); - if (!ty) - ty = api_sv_type(&selector); - sel = api_force_reg_unless_imm(g, &selector, ty); - for (u32 i = 0; i < sw.ncases; ++i) { - Operand imm = api_op_imm((i64)sw.cases[i].value, ty); - g->target->cmp_branch(g->target, CMP_EQ, sel, imm, - (Label)sw.cases[i].label); - } - if (sw.default_label != CFREE_CG_LABEL_NONE) { - g->target->jump(g->target, (Label)sw.default_label); - } - api_release(g, &selector); -} - -void cfree_cg_push_label_addr(CfreeCg *g, CfreeCgLabel label, - CfreeCgTypeId ptr_type) { - CfreeCgTypeId ty; - if (!g) - return; - ty = resolve_type(g->c, ptr_type); - if (!ty) - return; - api_push(g, api_make_sv(api_op_imm((i64)label, ty), ty)); -} - -void cfree_cg_computed_goto(CfreeCg *g, const CfreeCgLabel *valid_targets, - uint32_t ntargets) { - ApiSValue target; - CfreeCgTypeId target_ty; - Operand target_op; - if (!g) - return; - api_local_const_control_boundary(g); - target = api_pop(g); - target_ty = api_sv_type(&target); - target_op = api_force_reg(g, &target, target_ty); - for (uint32_t i = 0; i < ntargets; ++i) { - Operand imm = api_op_imm((i64)valid_targets[i], target_ty); - g->target->cmp_branch(g->target, CMP_EQ, target_op, imm, - (Label)valid_targets[i]); - } - api_release(g, &target); - g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); -} - -void cfree_cg_unreachable(CfreeCg *g) { - if (!g) - return; - api_local_const_control_boundary(g); - g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); -} - -/* ============================================================ - * Scopes / structured control flow - * ============================================================ */ - -static CfreeCgScope api_scope_handle(u32 idx, u32 generation) { - return (CfreeCgScope)((generation << 8) | ((idx + 1u) & 0xffu)); -} - -static ApiCgScope *api_scope_from_handle(CfreeCg *g, CfreeCgScope scope, - int require_top, const char *who) { - u32 slot; - u32 generation; - ApiCgScope *s; - if (!g || scope == 0) - return NULL; - slot = ((u32)scope & 0xffu); - generation = ((u32)scope >> 8); - if (slot == 0 || slot > API_CG_MAX_SCOPES) { - compiler_panic(g->c, g->cur_loc, "%s: invalid scope handle", who); - return NULL; - } - slot--; - if (slot >= g->nscopes) { - compiler_panic(g->c, g->cur_loc, "%s: stale scope handle", who); - return NULL; - } - if (require_top && slot + 1u != g->nscopes) { - compiler_panic(g->c, g->cur_loc, "%s: non-LIFO scope end", who); - return NULL; - } - s = &g->scopes[slot]; - if (!s->active || s->generation != generation) { - compiler_panic(g->c, g->cur_loc, "%s: stale scope handle", who); - return NULL; - } - return s; -} - -static int api_scope_has_result(const ApiCgScope *s) { - return s->result_type != CFREE_CG_TYPE_NONE; -} - -static void api_scope_store_result(CfreeCg *g, ApiCgScope *s, - ApiSValue *result) { - Operand dst; - Operand src; - if (!api_scope_has_result(s)) - return; - dst = api_op_local(s->result_slot, s->result_type); - src = api_sv_op_is_reg_or_imm(result) - ? result->op - : api_force_reg(g, result, s->result_type); - g->target->store(g->target, dst, src, - api_mem_for_lvalue(g, &dst, s->result_type)); - api_release(g, result); -} - -static void api_scope_push_result(CfreeCg *g, ApiCgScope *s) { - Operand dst; - Operand src; - Reg r; - if (!api_scope_has_result(s)) - return; - r = api_alloc_reg_or_spill(g, api_type_class(s->result_type), s->result_type); - dst = api_op_reg(r, s->result_type); - src = api_op_local(s->result_slot, s->result_type); - g->target->load(g->target, dst, src, - api_mem_for_lvalue(g, &src, s->result_type)); - api_push(g, api_make_sv(dst, s->result_type)); -} - -CfreeCgScope cfree_cg_scope_begin(CfreeCg *g, CfreeCgTypeId result_type) { - Label break_lbl, cont_lbl; - CGScopeDesc d; - ApiCgScope *s; - CGScope target_scope; - u32 idx; - if (!g) - return 0; - break_lbl = g->target->label_new(g->target); - cont_lbl = g->target->label_new(g->target); - api_local_const_control_boundary(g); - g->target->label_place(g->target, cont_lbl); - - if (g->nscopes >= API_CG_MAX_SCOPES) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: too many nested scopes"); - return 0; - } - idx = g->nscopes; - s = &g->scopes[idx]; - s->break_lbl = break_lbl; - s->continue_lbl = cont_lbl; - s->result_type = resolve_type(g->c, result_type); - s->generation = ++g->scope_generation; - if (s->generation == 0) - s->generation = ++g->scope_generation; - s->active = 1; - g->nscopes++; - - memset(&d, 0, sizeof d); - d.kind = (u8)SCOPE_LOOP; - d.break_label = break_lbl; - d.continue_label = cont_lbl; - d.result_type = s->result_type; - target_scope = g->target->scope_begin(g->target, &d); - s->target_scope = target_scope; - s->result_slot = FRAME_SLOT_NONE; - if (api_scope_has_result(s)) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = s->result_type; - fsd.size = abi_cg_sizeof(g->c->abi, result_type); - fsd.align = abi_cg_alignof(g->c->abi, result_type); - fsd.kind = FS_LOCAL; - s->result_slot = g->target->frame_slot(g->target, &fsd); - } - - return api_scope_handle(idx, s->generation); -} - -void cfree_cg_scope_end(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s = api_scope_from_handle(g, scope, 1, "CfreeCg: scope_end"); - if (!s) - return; - if (api_scope_has_result(s)) { - ApiSValue result = api_pop(g); - api_scope_store_result(g, s, &result); - } - api_local_const_control_boundary(g); - g->target->label_place(g->target, s->break_lbl); - g->target->scope_end(g->target, s->target_scope); - api_scope_push_result(g, s); - s->active = 0; - g->nscopes--; -} - -void cfree_cg_break(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s = api_scope_from_handle(g, scope, 0, "CfreeCg: break"); - if (!s) - return; - if (api_scope_has_result(s)) { - ApiSValue result = api_pop(g); - api_scope_store_result(g, s, &result); - } - api_local_const_control_boundary(g); - g->target->jump(g->target, s->break_lbl); -} - -void cfree_cg_break_true(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s; - ApiSValue cond; - if (!g || scope == 0) - return; - s = api_scope_from_handle(g, scope, 0, "CfreeCg: break_true"); - if (!s) - return; - cond = api_pop(g); - - if (api_scope_has_result(s)) { - ApiSValue result = api_pop(g); - if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { - if (cond.op.v.imm != 0) { - api_scope_store_result(g, s, &result); - api_local_const_control_boundary(g); - g->target->jump(g->target, s->break_lbl); - } else { - api_release(g, &result); - } - api_release(g, &cond); - } else { - Label skip = g->target->label_new(g->target); - api_branch_if(g, &cond, 0, skip); - api_scope_store_result(g, s, &result); - api_local_const_control_boundary(g); - g->target->jump(g->target, s->break_lbl); - api_local_const_control_boundary(g); - g->target->label_place(g->target, skip); - } - } else { - api_branch_if(g, &cond, 1, s->break_lbl); - } -} - -void cfree_cg_break_false(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s; - ApiSValue cond; - if (!g || scope == 0) - return; - s = api_scope_from_handle(g, scope, 0, "CfreeCg: break_false"); - if (!s) - return; - cond = api_pop(g); - - if (api_scope_has_result(s)) { - ApiSValue result = api_pop(g); - if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { - if (cond.op.v.imm == 0) { - api_scope_store_result(g, s, &result); - api_local_const_control_boundary(g); - g->target->jump(g->target, s->break_lbl); - } else { - api_release(g, &result); - } - api_release(g, &cond); - } else { - Label skip = g->target->label_new(g->target); - api_branch_if(g, &cond, 1, skip); - api_scope_store_result(g, s, &result); - api_local_const_control_boundary(g); - g->target->jump(g->target, s->break_lbl); - api_local_const_control_boundary(g); - g->target->label_place(g->target, skip); - } - } else { - api_branch_if(g, &cond, 0, s->break_lbl); - } -} - -void cfree_cg_continue(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue"); - if (!s) - return; - api_local_const_control_boundary(g); - g->target->jump(g->target, s->continue_lbl); -} - -void cfree_cg_continue_true(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s; - ApiSValue v; - if (!g || scope == 0) - return; - s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue_true"); - if (!s) - return; - v = api_pop(g); - api_branch_if(g, &v, 1, s->continue_lbl); -} - -void cfree_cg_continue_false(CfreeCg *g, CfreeCgScope scope) { - ApiCgScope *s; - ApiSValue v; - if (!g || scope == 0) - return; - s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue_false"); - if (!s) - return; - v = api_pop(g); - api_branch_if(g, &v, 0, s->continue_lbl); -} - -/* ============================================================ - * Dynamic stack allocation / variadics (stubs) - * ============================================================ */ - -void cfree_cg_alloca(CfreeCg *g, uint32_t align, - CfreeCgTypeId result_ptr_type) { - ApiSValue sz; - CGTarget *T; - CfreeCgTypeId pty; - Operand sz_op; - Reg rr; - Operand dst; - if (!g) - return; - T = g->target; - sz = api_pop(g); - pty = resolve_type(g->c, result_ptr_type); - if (!pty) - pty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); - sz_op = - api_sv_op_is(&sz, OPK_IMM) ? sz.op : api_force_reg(g, &sz, api_sv_type(&sz)); - rr = api_alloc_reg_or_spill(g, RC_INT, pty); - dst = api_op_reg(rr, pty); - T->alloca_(T, dst, sz_op, align ? align : 16); - api_release(g, &sz); - api_push(g, api_make_sv(dst, pty)); -} - -void cfree_cg_vararg_start(CfreeCg *g) { - ApiSValue ap; - CGTarget *T; - Operand ap_op; - if (!g) - return; - T = g->target; - ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); - T->va_start_(T, ap_op); - api_release(g, &ap); -} - -void cfree_cg_vararg_next(CfreeCg *g, CfreeCgTypeId type) { - ApiSValue ap; - CGTarget *T; - CfreeCgTypeId ty; - Operand ap_op; - Reg rr; - Operand dst; - if (!g) - return; - T = g->target; - ty = resolve_type(g->c, type); - if (!ty) - return; - ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); - T->va_arg_(T, dst, ap_op, ty); - api_release(g, &ap); - api_push(g, api_make_sv(dst, ty)); -} - -void cfree_cg_vararg_end(CfreeCg *g) { - ApiSValue ap; - CGTarget *T; - Operand ap_op; - if (!g) - return; - T = g->target; - ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); - T->va_end_(T, ap_op); - api_release(g, &ap); -} - -void cfree_cg_vararg_copy(CfreeCg *g) { - ApiSValue src, dst; - CGTarget *T; - Operand src_op, dst_op; - if (!g) - return; - T = g->target; - src = api_pop(g); - dst = api_pop(g); - src_op = api_force_reg(g, &src, api_sv_type(&src)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); - T->va_copy_(T, dst_op, src_op); - api_release(g, &src); - api_release(g, &dst); -} - -/* ============================================================ - * Memory operations (stubs) - * ============================================================ */ - -void cfree_cg_memcpy(CfreeCg *g, uint64_t size, CfreeCgMemAccess dst_access, - CfreeCgMemAccess src_access) { - ApiSValue src, dst; - CGTarget *T; - AggregateAccess agg; - Operand dst_op, src_op; - if (!g) - return; - api_local_const_memory_boundary(g); - (void)src_access; - if (size > UINT32_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memcpy size exceeds CGTarget"); - return; - } - T = g->target; - src = api_pop(g); - dst = api_pop(g); - api_require_pointer_value(g, "memcpy destination", api_sv_type(&dst)); - api_require_pointer_value(g, "memcpy source", api_sv_type(&src)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); - src_op = api_force_reg(g, &src, api_sv_type(&src)); - memset(&agg, 0, sizeof agg); - agg.size = (u32)size; - agg.align = dst_access.align ? dst_access.align : (u32)size; - T->copy_bytes(T, dst_op, src_op, agg); - api_release(g, &dst); - api_release(g, &src); -} - -void cfree_cg_memmove(CfreeCg *g, uint64_t size, CfreeCgMemAccess dst_access, - CfreeCgMemAccess src_access) { - ApiSValue src, dst; - Operand args[3]; - if (!g) - return; - api_local_const_memory_boundary(g); - (void)dst_access; - (void)src_access; - if (size > INT64_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memmove size exceeds CGTarget"); - return; - } - src = api_pop(g); - dst = api_pop(g); - api_require_pointer_value(g, "memmove destination", api_sv_type(&dst)); - api_require_pointer_value(g, "memmove source", api_sv_type(&src)); - args[0] = api_force_reg(g, &dst, api_sv_type(&dst)); - args[1] = api_force_reg(g, &src, api_sv_type(&src)); - args[2] = api_op_imm((i64)size, builtin_id(CFREE_CG_BUILTIN_I64)); - g->target->intrinsic(g->target, INTRIN_MEMMOVE, NULL, 0, args, 3); - api_release(g, &dst); - api_release(g, &src); -} - -void cfree_cg_memset(CfreeCg *g, uint8_t val, uint64_t size, - CfreeCgMemAccess dst_access) { - ApiSValue dst; - CGTarget *T; - AggregateAccess agg; - Operand dst_op, byte_val; - if (!g) - return; - api_local_const_memory_boundary(g); - if (size > UINT32_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memset size exceeds CGTarget"); - return; - } - T = g->target; - dst = api_pop(g); - api_require_pointer_value(g, "memset destination", api_sv_type(&dst)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); - byte_val = api_op_imm((i64)val, CFREE_CG_TYPE_NONE); - memset(&agg, 0, sizeof agg); - agg.size = (u32)size; - agg.align = dst_access.align ? dst_access.align : (u32)size; - T->set_bytes(T, dst_op, byte_val, agg); - api_release(g, &dst); -} - -void cfree_cg_index(CfreeCg *g, uint64_t offset) { - ApiSValue idx, base; - CGTarget *T; - CfreeCgTypeId base_ty, base_ptr_ty, elem_ty, idx_ty; - const CgType *base_info; - u32 elemsz; - int free_base_op = 0; - Operand base_op, idx_op, result; - Reg rr; - if (!g) - return; - if (offset > INT64_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: index offset too large"); - return; - } - T = g->target; - idx = api_pop(g); - base = api_pop(g); - api_ensure_reg(g, &base); - base_ty = api_sv_type(&base); - base_info = cg_type_get(g->c, base_ty); - if (base_info && base_info->kind == CFREE_CG_TYPE_PTR) { - elem_ty = base_info->ptr.pointee; - base_ptr_ty = base_ty; - } else if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY && - api_is_lvalue_sv(&base)) { - elem_ty = base_info->array.elem; - base_ptr_ty = cg_type_ptr_to(g->c, elem_ty); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: index base is not a pointer or array lvalue"); - return; - } - elemsz = (u32)abi_cg_sizeof(g->c->abi, elem_ty); - idx_ty = idx.type ? idx.type : idx.op.type; - if (!idx_ty) - idx_ty = builtin_id(CFREE_CG_BUILTIN_I32); - if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY) { - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - base_op = api_op_reg(rr, base_ptr_ty); - T->addr_of(T, base_op, base.op); - api_release(g, &base); - free_base_op = 1; - } else { - base_op = api_force_reg(g, &base, base_ptr_ty); - } - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - result = api_op_reg(rr, base_ptr_ty); - if (idx_op.kind == OPK_IMM) { - i64 total_offset = idx_op.v.imm * (i64)elemsz + (i64)offset; - T->binop(T, BO_IADD, result, base_op, - api_op_imm(total_offset, base_ptr_ty)); - } else { - Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); - Operand scaled = api_op_reg(sr, idx_ty); - T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty)); - if (offset > 0) { - T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); - } - T->binop(T, BO_IADD, result, base_op, scaled); - api_free_reg(g, sr, RC_INT); - } - if (free_base_op) - api_free_reg(g, base_op.v.reg, RC_INT); - if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) - api_release(g, &base); - api_release(g, &idx); - api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty)); -} - -void cfree_cg_field(CfreeCg *g, uint32_t field_index) { - ApiSValue base; - CGTarget *T; - CfreeCgTypeId rec_ty; - CfreeCgTypeId field_ty; - CfreeCgTypeId rec_ptr_ty; - const CgType *rec_info; - const ABIRecordLayout *layout; - u32 field_offset; - Operand result; - Reg rr; - if (!g) - return; - T = g->target; - base = api_pop(g); - api_ensure_reg(g, &base); - rec_ty = api_sv_type(&base); - if (!api_is_lvalue_sv(&base)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: field base is not an lvalue"); - return; - } - layout = abi_cg_record_layout(g->c->abi, rec_ty); - if (!layout || field_index >= layout->nfields) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid field index"); - return; - } - rec_info = cg_type_get(g->c, rec_ty); - if (!rec_info || rec_info->kind != CFREE_CG_TYPE_RECORD || - field_index >= rec_info->record.nfields) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid record base"); - return; - } - field_ty = rec_info->record.fields[field_index].type; - rec_ptr_ty = cg_type_ptr_to(g->c, rec_ty); - field_offset = layout->fields[field_index].offset; - if (layout->fields[field_index].bit_width != 0 || - (rec_info->record.fields[field_index].flags & - CFREE_CG_FIELD_BITFIELD) != 0) { - Operand base_addr; - ApiSValue sv; - BitFieldAccess bf; - if (layout->fields[field_index].bit_width == 0) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: zero-width bit-field access"); - api_release(g, &base); - return; - } - base_addr = api_lvalue_addr(g, &base, rec_ptr_ty); - memset(&bf, 0, sizeof bf); - bf.field_type = field_ty; - bf.storage = api_mem_for_lvalue(g, &base_addr, field_ty); - bf.storage.size = layout->fields[field_index].storage_size; - bf.storage_offset = layout->fields[field_index].offset; - bf.bit_offset = layout->fields[field_index].bit_offset; - bf.bit_width = layout->fields[field_index].bit_width; - bf.signed_ = rec_info->record.fields[field_index].bit_signed != 0; - sv = api_make_lv(base_addr, field_ty); - sv.bitfield_lvalue = 1; - sv.delayed.bitfield = bf; - api_release(g, &base); - api_push(g, sv); - return; - } - if (base.op.kind == OPK_GLOBAL) { - result = - api_op_global(base.op.v.global.sym, - base.op.v.global.addend + (i64)field_offset, field_ty); - api_push(g, api_make_lv(result, field_ty)); - } else if (base.op.kind == OPK_INDIRECT && field_offset <= (u32)INT32_MAX && - base.op.v.ind.ofs <= INT32_MAX - (i32)field_offset) { - result = api_op_indirect(base.op.v.ind.base, - base.op.v.ind.ofs + (i32)field_offset, field_ty); - api_push(g, api_make_lv(result, field_ty)); - } else { - Operand base_addr; - rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - base_addr = api_op_reg(rr, rec_ptr_ty); - T->addr_of(T, base_addr, base.op); - api_release(g, &base); - if (field_offset == 0) { - result = base_addr; - } else { - Reg fr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - result = api_op_reg(fr, rec_ptr_ty); - T->binop(T, BO_IADD, result, base_addr, - api_op_imm((i64)field_offset, rec_ptr_ty)); - api_free_reg(g, base_addr.v.reg, RC_INT); - } - api_push(g, - api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); - } -} - -/* ============================================================ - * Calls / return - * ============================================================ */ - -/* Shared scaffolding for cfree_cg_call / cfree_cg_call_symbol. The two - * public entry points differ only in how the callee is obtained and in - * their pre-call stack-depth check; everything else (arg packaging, return - * storage allocation, post-call release, result push) is identical. These - * helpers carry the common shape and are the natural targets for any future - * change that wants to vary call-shape policy (e.g. an ABI-driven storage - * decision). */ - -static CGABIValue *api_alloc_call_args(CfreeCg *g, u32 nargs) { - CGABIValue *avs = NULL; - if (nargs) { - avs = arena_array(g->c->tu, CGABIValue, nargs); - memset(avs, 0, sizeof(CGABIValue) * nargs); - } - g->avs_in_flight = avs; - g->avs_in_flight_n = nargs; - return avs; -} - -static void api_pack_call_arg(CfreeCg *g, CGABIValue *av, CfreeCgTypeId fty, - const ABIFuncInfo *abi, u32 idx) { - ApiSValue arg = api_pop(g); - int is_vararg = (idx >= abi->nparams); - CfreeCgTypeId aty = is_vararg - ? (arg.type ? arg.type : api_sv_type(&arg)) - : cg_type_func_param_id(g->c, fty, idx); - if (!aty) - aty = arg.type; - - av->type = aty; - av->abi = is_vararg ? NULL : &abi->params[idx]; - - if (api_is_wide16_scalar_type(g->c, aty)) { - ApiSValue lv = api_wide16_materialize_lvalue(g, &arg, aty); - av->storage = lv.op; - av->storage.type = aty; - av->size = 16; - } else if (cg_type_is_aggregate(g->c, aty)) { - api_ensure_reg(g, &arg); - Operand st = arg.op; - st.type = aty; - av->storage = st; - av->size = abi_cg_sizeof(g->c->abi, aty); - } else { - api_ensure_reg(g, &arg); - av->storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) - ? api_force_reg(g, &arg, aty) - : arg.op; - } -} - -static void api_alloc_call_ret_storage(CfreeCg *g, CGTarget *T, - CfreeCgTypeId ret_ty, Operand *out) { - if (api_arg_storage_must_be_addr(g->c, ret_ty)) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ret_ty; - fsd.size = abi_cg_sizeof(g->c->abi, ret_ty); - fsd.align = abi_cg_alignof(g->c->abi, ret_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_ADDR_TAKEN; - FrameSlot slot = T->frame_slot(T, &fsd); - *out = api_op_local(slot, ret_ty); - } else { - Reg r = api_alloc_reg_or_spill(g, api_type_class(ret_ty), ret_ty); - *out = api_op_reg(r, ret_ty); - } -} - -static void api_release_call_args(CfreeCg *g, CGABIValue *avs, u32 nargs) { - for (u32 i = 0; i < nargs; ++i) { - api_release_arg_storage(g, &avs[i].storage); - } - g->avs_in_flight = NULL; - g->avs_in_flight_n = 0; -} - -static void api_push_call_result(CfreeCg *g, Operand ret_storage, - CfreeCgTypeId ret_ty) { - if (ret_storage.kind == OPK_LOCAL || ret_storage.kind == OPK_GLOBAL || - ret_storage.kind == OPK_INDIRECT) { - api_push(g, api_make_lv(ret_storage, ret_ty)); - } else { - api_push(g, api_make_sv(ret_storage, ret_ty)); - } -} - -void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, - CfreeCgCallAttrs attrs) { - CGTarget *T; - CfreeCgTypeId fty; - const ABIFuncInfo *abi; - CfreeCgTypeId ret_ty; - int has_result; - CGABIValue *avs; - CGCallDesc desc; - ApiSValue callee; - int tail; - if (!g) - return; - api_local_const_memory_boundary(g); - tail = - attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; - T = g->target; - fty = resolve_type(g->c, fn_type); - if (!fty) - return; - abi = abi_cg_func_info(g->c->abi, fty); - ret_ty = cg_type_func_ret_id(g->c, fty); - has_result = !tail && !cg_type_is_void(g->c, ret_ty); - - if (g->sp < (u32)nargs + 1u) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: call stack underflow"); - return; - } - - avs = api_alloc_call_args(g, nargs); - for (u32 i = 0; i < nargs; ++i) { - u32 idx = nargs - 1u - i; - api_pack_call_arg(g, &avs[idx], fty, abi, idx); - } - - callee = api_pop(g); - api_ensure_reg(g, &callee); - Operand callee_op = (callee.op.kind == OPK_GLOBAL) - ? callee.op - : api_force_reg(g, &callee, fty); - - memset(&desc, 0, sizeof desc); - desc.fn_type = fty; - desc.abi = abi; - desc.callee = callee_op; - desc.args = avs; - desc.nargs = nargs; - desc.flags = tail ? CG_CALL_TAIL : CG_CALL_NONE; - desc.ret.type = ret_ty; - desc.ret.abi = &abi->ret; - - if (has_result) { - api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); - } else { - desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); - } - - if (tail) - api_regalloc_finish(g); - T->call(T, &desc); - - api_release_call_args(g, avs, nargs); - - if (callee.op.kind != OPK_GLOBAL) { - api_free_reg(g, callee_op.v.reg, RC_INT); - } - - if (has_result) { - api_push_call_result(g, desc.ret.storage, ret_ty); - } -} - -static void api_cg_tail_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type) - __attribute__((unused)); -static void api_cg_tail_call(CfreeCg *g, uint32_t nargs, - CfreeCgTypeId fn_type) { - CGTarget *T; - CfreeCgTypeId fty; - const ABIFuncInfo *abi; - CGABIValue *avs; - CGCallDesc desc; - ApiSValue callee; - if (!g) - return; - api_local_const_memory_boundary(g); - T = g->target; - fty = resolve_type(g->c, fn_type); - if (!fty) - return; - abi = abi_cg_func_info(g->c->abi, fty); - avs = NULL; - if (nargs) { - avs = arena_array(g->c->tu, CGABIValue, nargs); - memset(avs, 0, sizeof(CGABIValue) * nargs); - } - for (u32 i = 0; i < nargs; ++i) { - u32 idx = nargs - 1u - i; - ApiSValue arg = api_pop(g); - api_ensure_reg(g, &arg); - CfreeCgTypeId aty = cg_type_func_param_id(g->c, fty, idx); - if (!aty) - aty = arg.type; - avs[idx].type = aty; - avs[idx].abi = idx < abi->nparams ? &abi->params[idx] : NULL; - avs[idx].storage = - (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) - ? api_force_reg(g, &arg, aty) - : arg.op; - } - callee = api_pop(g); - api_ensure_reg(g, &callee); - Operand callee_op = (callee.op.kind == OPK_GLOBAL) - ? callee.op - : api_force_reg(g, &callee, fty); - memset(&desc, 0, sizeof desc); - desc.fn_type = fty; - desc.abi = abi; - desc.callee = callee_op; - desc.args = avs; - desc.nargs = nargs; - desc.flags = CG_CALL_TAIL; - desc.ret.type = cg_type_func_ret_id(g->c, fty); - desc.ret.abi = &abi->ret; - desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); - api_regalloc_finish(g); - T->call(T, &desc); - for (u32 i = 0; i < nargs; ++i) { - api_release_arg_storage(g, &avs[i].storage); - } - if (callee.op.kind != OPK_GLOBAL) { - api_free_reg(g, callee_op.v.reg, RC_INT); - } -} - -static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, - CfreeCgCallAttrs attrs) { - CGTarget *T; - CfreeCgTypeId fty; - const ABIFuncInfo *abi; - CfreeCgTypeId ret_ty; - int has_result; - CGABIValue *avs; - CGCallDesc desc; - Operand callee_op; - if (!g) - return; - api_local_const_memory_boundary(g); - int tail = - attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; - T = g->target; - fty = api_sym_type(g, sym); - if (!fty) - return; - abi = abi_cg_func_info(g->c->abi, fty); - ret_ty = cg_type_func_ret_id(g->c, fty); - has_result = !tail && !cg_type_is_void(g->c, ret_ty); - if (g->sp < nargs) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: call stack underflow"); - return; - } - avs = api_alloc_call_args(g, nargs); - for (u32 i = 0; i < nargs; ++i) { - u32 idx = nargs - 1u - i; - api_pack_call_arg(g, &avs[idx], fty, abi, idx); - } - callee_op = api_op_global((ObjSymId)sym, 0, cg_type_ptr_to(g->c, fty)); - memset(&desc, 0, sizeof desc); - desc.fn_type = fty; - desc.abi = abi; - desc.callee = callee_op; - desc.args = avs; - desc.nargs = nargs; - desc.flags = tail ? CG_CALL_TAIL : CG_CALL_NONE; - desc.ret.type = ret_ty; - desc.ret.abi = &abi->ret; - if (has_result) { - api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); - } else { - desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); - } - if (tail) - api_regalloc_finish(g); - T->call(T, &desc); - api_release_call_args(g, avs, nargs); - if (has_result) { - api_push_call_result(g, desc.ret.storage, ret_ty); - } -} - -void cfree_cg_call_symbol(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, - CfreeCgCallAttrs attrs) { - api_call_symbol_common(g, sym, nargs, attrs); -} - -void cfree_cg_ret(CfreeCg *g) { - ApiSValue v; - CGTarget *T; - CfreeCgTypeId rty; - CGABIValue av; - Operand ret_op; - if (!g) - return; - T = g->target; - rty = g->fn_ret_type; - if (cg_type_is_void(g->c, rty)) { - T->ret(T, NULL); - return; - } - v = api_pop(g); - memset(&av, 0, sizeof av); - av.type = rty; - av.abi = &g->fn_abi->ret; - int is_aggregate = cg_type_is_aggregate(g->c, rty); - if (is_aggregate) { - av.storage = v.op; - av.storage.type = rty; - av.size = abi_cg_sizeof(g->c->abi, rty); - T->ret(T, &av); - return; - } - if (api_is_wide16_scalar_type(g->c, rty)) { - ApiSValue lv = api_wide16_materialize_lvalue(g, &v, rty); - av.storage = lv.op; - av.storage.type = rty; - av.size = 16; - T->ret(T, &av); - return; - } - if (api_sv_op_is(&v, OPK_IMM)) { - ret_op = v.op; - ret_op.type = rty; - av.storage = ret_op; - T->ret(T, &av); - api_release(g, &v); - return; - } - ret_op = api_force_reg(g, &v, rty); - av.storage = ret_op; - T->ret(T, &av); - api_release(g, &v); -} - -void cfree_cg_ret_void(CfreeCg *g) { - if (!g) - return; - g->target->ret(g->target, NULL); -} - -/* ============================================================ - * Data definitions (stubs) - * ============================================================ */ - -void cfree_cg_data_begin(CfreeCg *g, CfreeCgSym cg_sym, - CfreeCgDataDefAttrs attrs) { - Compiler *c; - ObjBuilder *ob; - ObjSymId sym; - CfreeCgTypeId ty; - u32 align; - SecKind sec_kind; - u16 sec_flags; - Sym sec_name_sym; - ObjSecId sec; - CfreeCgDecl decl_attrs; - if (!g) - return; - c = g->c; - ob = g->obj; - sym = (ObjSymId)cg_sym; - ty = api_sym_type(g, cg_sym); - if (!ty) - return; - decl_attrs = api_sym_attrs(g, cg_sym); - align = - attrs.align ? attrs.align : (u32)abi_cg_alignof(c->abi, decl_attrs.type); - if (!attrs.section && decl_attrs.as.object.section) { - attrs.section = decl_attrs.as.object.section; - } - if ((decl_attrs.as.object.flags & CFREE_CG_OBJ_TLS) && - (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL)) { - sec_kind = SEC_BSS; - sec_flags = SF_ALLOC | SF_WRITE | SF_TLS; - sec_name_sym = attrs.section ? (Sym)attrs.section : obj_secname_tbss(c); - } else if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { - sec_kind = SEC_BSS; - sec_flags = SF_ALLOC | SF_WRITE; - sec_name_sym = attrs.section ? (Sym)attrs.section - : pool_intern_cstr(c->global, ".bss"); - } else if (attrs.section) { - sec_name_sym = (Sym)attrs.section; - if (attrs.flags & CFREE_CG_DATADEF_READONLY) { - sec_kind = SEC_RODATA; - sec_flags = SF_ALLOC; - } else { - sec_kind = SEC_OTHER; - sec_flags = SF_ALLOC | SF_WRITE; - } - } else if ((attrs.flags & CFREE_CG_DATADEF_READONLY) || - (decl_attrs.as.object.flags & CFREE_CG_OBJ_READONLY)) { - sec_kind = SEC_RODATA; - sec_flags = SF_ALLOC; - sec_name_sym = pool_intern_cstr(c->global, ".rodata"); - } else if (decl_attrs.as.object.flags & CFREE_CG_OBJ_TLS) { - sec_kind = SEC_DATA; - sec_flags = SF_ALLOC | SF_WRITE | SF_TLS; - sec_name_sym = obj_secname_tdata(c); - } else { - sec_kind = SEC_DATA; - sec_flags = SF_ALLOC | SF_WRITE; - sec_name_sym = pool_intern_cstr(c->global, ".data"); - } - if (attrs.flags & CFREE_CG_DATADEF_RETAIN) - sec_flags |= SF_RETAIN; - if (attrs.flags & CFREE_CG_DATADEF_MERGE) - sec_flags |= SF_MERGE; - if (attrs.flags & CFREE_CG_DATADEF_STRINGS) - sec_flags |= SF_STRINGS; - if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { - sec = obj_section_ex(ob, sec_name_sym, sec_kind, SSEM_NOBITS, sec_flags, - align, 0, OBJ_SEC_NONE, 0); - } else if (attrs.entsize) { - sec = obj_section_ex(ob, sec_name_sym, sec_kind, SSEM_PROGBITS, sec_flags, - align, attrs.entsize, OBJ_SEC_NONE, 0); - } else { - sec = obj_section(ob, sec_name_sym, sec_kind, sec_flags, align); - } - g->data_sec = sec; - g->data_sym = sym; - g->data_base = obj_align_to(ob, sec, align); - g->data_size = 0; - if (sym != OBJ_SYM_NONE) { - obj_symbol_define(ob, sym, sec, (u64)g->data_base, - (u64)abi_cg_sizeof(c->abi, decl_attrs.type)); - } -} - -void cfree_cg_data_common(CfreeCg *g, CfreeCgSym cg_sym, uint64_t size, - uint32_t align) { - ObjSym *osym; - ObjSymId sym; - CfreeCgDecl decl_attrs; - if (!g || cg_sym == CFREE_CG_SYM_NONE) - return; - sym = (ObjSymId)cg_sym; - osym = (ObjSym *)obj_symbol_get(g->obj, sym); - if (!osym) - return; - decl_attrs = api_sym_attrs(g, cg_sym); - osym->bind = api_map_bind(decl_attrs.sym.bind); - osym->vis = api_map_vis(decl_attrs.sym.visibility); - osym->kind = SK_COMMON; - osym->section_id = OBJ_SEC_NONE; - osym->value = 0; - osym->size = size; - osym->common_align = align; -} - -void cfree_cg_data_align(CfreeCg *g, uint32_t align) { - if (!g || g->data_sec == OBJ_SEC_NONE || !align) - return; - g->data_size = obj_align_to(g->obj, g->data_sec, align) - g->data_base; -} - -void cfree_cg_data_pad(CfreeCg *g, uint64_t size, uint8_t value) { - u8 pad[64]; - if (!g || !size) - return; - memset(pad, value, sizeof(pad)); - while (size >= sizeof(pad)) { - obj_write(g->obj, g->data_sec, pad, sizeof(pad)); - size -= sizeof(pad); - g->data_size += sizeof(pad); - } - if (size) { - obj_write(g->obj, g->data_sec, pad, (size_t)size); - g->data_size += size; - } -} - -void cfree_cg_data_int(CfreeCg *g, uint64_t value, CfreeCgTypeId type) { - CfreeCgTypeId ty; - u32 size; - u8 bytes[8]; - if (!g) - return; - ty = resolve_type(g->c, type); - if (!ty) - return; - size = (u32)abi_cg_sizeof(g->c->abi, type); - if (size > sizeof(bytes)) - return; - for (u32 i = 0; i < size; ++i) { - u32 shift = g->c->target.big_endian ? (size - 1u - i) * 8u : i * 8u; - bytes[i] = (u8)(value >> shift); - } - cfree_cg_data_bytes(g, bytes, size); -} - -void cfree_cg_data_float(CfreeCg *g, double value, CfreeCgTypeId type) { - CfreeCgTypeId ty; - union { - float f; - double d; - u8 b[8]; - } u; - if (!g) - return; - ty = resolve_type(g->c, type); - if (!ty) - return; - if (api_is_f128_type(g->c, ty)) { - u8 bytes[16]; - api_encode_binary128_from_double(g, value, bytes); - cfree_cg_data_bytes(g, bytes, sizeof bytes); - return; - } - if (ty == builtin_id(CFREE_CG_BUILTIN_F32)) { - u.f = (float)value; - if (g->c->target.big_endian) { - u8 t = u.b[0]; - u.b[0] = u.b[3]; - u.b[3] = t; - t = u.b[1]; - u.b[1] = u.b[2]; - u.b[2] = t; - } - cfree_cg_data_bytes(g, u.b, 4); - } else if (ty == builtin_id(CFREE_CG_BUILTIN_F64)) { - u.d = value; - if (g->c->target.big_endian) { - for (u32 i = 0; i < 4; ++i) { - u8 t = u.b[i]; - u.b[i] = u.b[7u - i]; - u.b[7u - i] = t; - } - } - cfree_cg_data_bytes(g, u.b, 8); - } -} - -void cfree_cg_data_bytes(CfreeCg *g, const uint8_t *data, size_t len) { - if (!g || !len) - return; - obj_write(g->obj, g->data_sec, data, len); - g->data_size += len; -} - -void cfree_cg_data_zero(CfreeCg *g, uint64_t size) { - const Section *sec; - if (!g || !size) - return; - sec = obj_section_get(g->obj, g->data_sec); - if (sec && (sec->kind == SEC_BSS || sec->sem == SSEM_NOBITS)) { - obj_reserve_bss(g->obj, g->data_sec, - g->data_base + (u32)(g->data_size + size), 0); - g->data_size += size; - return; - } - { - u8 pad[64]; - memset(pad, 0, sizeof pad); - u64 remaining = size; - while (remaining >= sizeof pad) { - obj_write(g->obj, g->data_sec, pad, sizeof pad); - remaining -= sizeof pad; - } - if (remaining) - obj_write(g->obj, g->data_sec, pad, (size_t)remaining); - } - g->data_size += size; -} - -static void api_cg_data_reloc(CfreeCg *g, CfreeCgSym target, int64_t addend, - uint32_t width, int pcrel) { - ObjBuilder *ob; - RelocKind rk; - u8 pad[8]; - if (!g || !width || width > sizeof(pad)) - return; - ob = g->obj; - rk = api_data_reloc_kind(pcrel, width); - if (rk == R_NONE) - return; - memset(pad, 0, sizeof pad); - obj_write(ob, g->data_sec, pad, width); - obj_reloc(ob, g->data_sec, g->data_base + (u32)g->data_size, rk, - (ObjSymId)target, addend); - g->data_size += width; -} - -void cfree_cg_data_addr(CfreeCg *g, CfreeCgSym target, int64_t addend, - uint32_t width, uint32_t address_space) { - (void)address_space; - api_cg_data_reloc(g, target, addend, width, 0); -} - -void cfree_cg_data_label_addr(CfreeCg *g, CfreeCgLabel target, int64_t addend, - uint32_t width, uint32_t address_space) { - u8 pad[8]; - (void)address_space; - if (!g || !width || width > sizeof(pad)) - return; - memset(pad, 0, sizeof(pad)); - for (u32 i = 0; i < width; ++i) { - u32 shift = g->c->target.big_endian ? (width - 1u - i) * 8u : i * 8u; - pad[i] = (u8)(((uint64_t)target + (uint64_t)addend) >> shift); - } - obj_write(g->obj, g->data_sec, pad, width); - g->data_size += width; -} - -void cfree_cg_data_pcrel(CfreeCg *g, CfreeCgSym target, int64_t addend, - uint32_t width) { - api_cg_data_reloc(g, target, addend, width, 1); -} - -void cfree_cg_data_symdiff(CfreeCg *g, CfreeCgSym lhs, CfreeCgSym rhs, - int64_t addend, uint32_t width) { - u8 pad[8]; - RelocKind add_kind; - RelocKind sub_kind; - if (!g || width > sizeof(pad)) - return; - switch (width) { - case 1: - add_kind = R_RV_ADD8; - sub_kind = R_RV_SUB8; - break; - case 2: - add_kind = R_RV_ADD16; - sub_kind = R_RV_SUB16; - break; - case 4: - add_kind = R_RV_ADD32; - sub_kind = R_RV_SUB32; - break; - case 8: - add_kind = R_RV_ADD64; - sub_kind = R_RV_SUB64; - break; - default: - return; - } - memset(pad, 0, sizeof(pad)); - obj_write(g->obj, g->data_sec, pad, width); - obj_reloc(g->obj, g->data_sec, g->data_base + (u32)g->data_size, add_kind, - (ObjSymId)lhs, addend); - obj_reloc(g->obj, g->data_sec, g->data_base + (u32)g->data_size, sub_kind, - (ObjSymId)rhs, 0); - g->data_size += width; -} - -void cfree_cg_data_end(CfreeCg *g) { - if (!g) - return; - if (g->data_sym != OBJ_SYM_NONE) { - obj_symbol_define(g->obj, g->data_sym, g->data_sec, g->data_base, - g->data_size); - } - g->data_sec = OBJ_SEC_NONE; - g->data_sym = OBJ_SYM_NONE; - g->data_base = 0; - g->data_size = 0; -} diff --git a/src/api/cg_api.h b/src/api/cg_api.h @@ -1,23 +0,0 @@ -#ifndef CFREE_API_CG_API_H -#define CFREE_API_CG_API_H - -#include <cfree/cg.h> - -#include "api/cg_type.h" -#include "core/core.h" - -typedef struct CGTarget CGTarget; -typedef struct MCEmitter MCEmitter; -typedef uint32_t ObjSymId; - -enum { - CG_API_TYPE_SEG_SHIFT = 6, - CG_API_TYPE_SEG_SIZE = 1u << CG_API_TYPE_SEG_SHIFT, - CG_API_TYPE_SEG_MASK = CG_API_TYPE_SEG_SIZE - 1u, - CG_API_TYPE_BUILTIN_SEG = 1u, - CG_API_TYPE_USER_SEG_BIAS = 2u, -}; - -void cg_api_fini(Compiler*); - -#endif diff --git a/src/api/cg_type.h b/src/api/cg_type.h @@ -1,77 +0,0 @@ -#ifndef CFREE_API_CG_TYPE_H -#define CFREE_API_CG_TYPE_H - -#include <cfree/cg.h> - -#include "core/core.h" - -typedef struct CgTypeField { - CfreeSym name; - CfreeCgTypeId type; - u64 offset; - u32 align_override; - u32 flags; - u16 bit_width; - u16 bit_offset; - u32 bit_storage_size; - int bit_signed; -} CgTypeField; - -typedef struct CgType { - CfreeCgTypeKind kind; - u64 size; - u32 align; - u32 pad; - union { - struct { - u32 width; - } integer; - struct { - u32 width; - } fp; - struct { - CfreeCgTypeId pointee; - u32 address_space; - } ptr; - struct { - CfreeCgTypeId elem; - u64 count; - } array; - struct { - CfreeCgTypeId ret; - CfreeCgFuncParam* params; - u32 nparams; - CfreeCgCallConv call_conv; - int abi_variadic; - CfreeCgAbiAttrs ret_attrs; - } func; - struct { - CfreeSym tag; - CgTypeField* fields; - u32 nfields; - int is_union; - u32 align_override; - u32 flags; - } record; - struct { - CfreeSym tag; - CfreeCgTypeId base; - CfreeCgEnumValue* values; - u32 nvalues; - } enum_; - struct { - CfreeSym name; - CfreeCgTypeId base; - } alias; - }; -} CgType; - -const CgType* cg_type_get(Compiler*, CfreeCgTypeId); -uint64_t cg_type_size(Compiler*, CfreeCgTypeId); -uint32_t cg_type_align(Compiler*, CfreeCgTypeId); -int cg_type_is_int(Compiler*, CfreeCgTypeId); -int cg_type_is_float(Compiler*, CfreeCgTypeId); -int cg_type_is_ptr(Compiler*, CfreeCgTypeId); -int cg_type_is_record(Compiler*, CfreeCgTypeId); - -#endif diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -0,0 +1,896 @@ +#include "cg/internal.h" + +void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { + ApiSValue b, a; + CGTarget* T; + CfreeCgTypeId ty; + Operand ra, rb; + Reg rr; + Operand dst; + ApiSValue folded_sv; + i64 folded; + if (!g) return; + T = g->target; + b = api_pop(g); + a = api_pop(g); + ty = a.type ? a.type : b.type; + + if (api_is_i128_type(g->c, ty)) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + const char* name = api_i128_binop_helper(iop); + if (!name) + compiler_panic(g->c, g->cur_loc, "CfreeCg: i128 binop unsupported"); + args[0] = a; + args[1] = b; + ps[0] = i128; + ps[1] = (iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S) ? i32 : i128; + api_runtime_call_values(g, name, i128, ps, 2, args); + return; + } + + if (!flags && api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && + api_try_fold_int_binop(g, iop, ty, a.op.v.imm, b.op.v.imm, &folded)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); + return; + } + + if (api_can_delay_int_arith(g, ty, flags) && + api_try_fold_arith_chain(g, iop, ty, &a, &b, &folded_sv)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, folded_sv); + return; + } + + if (api_type_class(ty) == RC_FP) { + ra = api_force_reg(g, &a, ty); + rb = api_force_reg(g, &b, ty); + } else { + ra = api_force_reg_unless_imm(g, &a, ty); + rb = api_force_reg_unless_imm(g, &b, ty); + } + + if (api_can_delay_int_arith(g, ty, flags) && + api_try_collapse_binop_identity(g, iop, ty, &a, &b, &folded_sv)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, folded_sv); + return; + } + + if (api_can_delay_int_arith(g, ty, flags) && + (ra.kind == OPK_REG || rb.kind == OPK_REG) && + (ra.kind == OPK_REG || ra.kind == OPK_IMM) && + (rb.kind == OPK_REG || rb.kind == OPK_IMM)) { + int a_owned = api_sv_owns_operand_reg(&a, &ra); + int b_owned = api_sv_owns_operand_reg(&b, &rb); + api_push(g, api_make_arith_binop(iop, ra, rb, ty, a_owned, b_owned)); + if (a_owned) a.res = RES_INHERENT; + if (b_owned) b.res = RES_INHERENT; + api_release(g, &a); + api_release(g, &b); + return; + } + + rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); + dst = api_op_reg(rr, ty); + T->binop(T, iop, dst, ra, rb); + api_release(g, &a); + api_release(g, &b); + api_push(g, api_make_sv(dst, ty)); +} + +void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { + ApiSValue a; + CGTarget* T; + CfreeCgTypeId ty; + Operand ra; + Reg rr; + Operand dst; + ApiSValue folded_sv; + i64 folded; + if (!g) return; + T = g->target; + a = api_pop(g); + ty = a.type ? a.type : a.op.type; + + if (api_is_i128_type(g->c, ty)) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + if (iop == UO_NEG || iop == UO_BNOT) { + const char* name = (iop == UO_NEG) ? "__negti2" : "__cfree_notti3"; + api_push(g, a); + api_f128_call_unary(g, name, i128, i128); + return; + } + if (iop == UO_NOT) { + CfreeCgTypeId ps[2] = {i128, i128}; + ApiSValue args[2]; + args[0] = a; + args[1] = api_make_sv(api_op_imm(0, i128), i128); + api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, CMP_EQ); + return; + } + } + + if (!flags && api_sv_op_is(&a, OPK_IMM) && + api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) { + api_release(g, &a); + api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); + return; + } + + if (api_can_delay_int_arith(g, ty, flags) && + api_try_fold_unary_chain(&a, iop, ty, &folded_sv)) { + api_release(g, &a); + api_push(g, folded_sv); + return; + } + + ra = api_force_reg_unless_imm(g, &a, ty); + if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_REG) { + int a_owned = api_sv_owns_operand_reg(&a, &ra); + api_push(g, api_make_arith_unop(iop, ra, ty, a_owned)); + if (a_owned) a.res = RES_INHERENT; + api_release(g, &a); + return; + } + rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); + dst = api_op_reg(rr, ty); + T->unop(T, iop, dst, ra); + api_release(g, &a); + api_push(g, api_make_sv(dst, ty)); +} + +void api_cg_cmp(CfreeCg* g, CmpOp cop) { + ApiSValue b, a; + CGTarget* T; + CfreeCgTypeId opty; + CfreeCgTypeId i32; + Operand ra, rb; + Reg rr; + Operand dst; + i64 folded; + if (!g) return; + T = g->target; + b = api_pop(g); + a = api_pop(g); + opty = a.type ? a.type : b.type; + i32 = builtin_id(CFREE_CG_BUILTIN_I32); + + if (api_is_i128_type(g->c, opty)) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId ps[2] = {i128, i128}; + ApiSValue args[2]; + CmpOp icmp = CMP_EQ; + const char* name = + api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2" : "__cfree_cmpti2"; + switch (cop) { + case CMP_EQ: + icmp = CMP_EQ; + break; + case CMP_NE: + icmp = CMP_NE; + break; + case CMP_LT_S: + case CMP_LT_U: + icmp = CMP_LT_S; + break; + case CMP_LE_S: + case CMP_LE_U: + icmp = CMP_LE_S; + break; + case CMP_GT_S: + case CMP_GT_U: + icmp = CMP_GT_S; + break; + case CMP_GE_S: + case CMP_GE_U: + icmp = CMP_GE_S; + break; + default: + icmp = CMP_EQ; + break; + } + args[0] = a; + args[1] = b; + api_runtime_call_values(g, name, i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, icmp); + return; + } + + if (api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && + api_try_fold_int_cmp(g, cop, opty, a.op.v.imm, b.op.v.imm, &folded)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, api_make_sv(api_op_imm(folded, i32), i32)); + return; + } + + ra = api_force_reg_unless_imm(g, &a, opty); + rb = api_force_reg_unless_imm(g, &b, opty); + if (api_type_class(opty) != RC_FP) { + api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_reg(&a, &ra), + api_sv_owns_operand_reg(&b, &rb))); + return; + } + rr = api_alloc_reg_or_spill(g, RC_INT, i32); + dst = api_op_reg(rr, i32); + T->cmp(T, cop, dst, ra, rb); + api_release(g, &a); + api_release(g, &b); + api_push(g, api_make_sv(dst, i32)); +} + +void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { + ApiSValue v; + CGTarget* T; + CfreeCgTypeId sty; + CfreeCgTypeId dty; + Operand src; + Reg rr; + Operand dst; + if (!g) return; + T = g->target; + dty = resolve_type(g->c, dst_type); + if (!dty) return; + v = api_pop(g); + dty = api_unalias_type(g->c, dty); + sty = api_unalias_type(g->c, v.type ? v.type : v.op.type); + if (!sty) { + api_release(g, &v); + return; + } + if (sty == dty) { + v.type = dty; + v.op.type = dty; + api_push(g, v); + return; + } + if (api_is_i128_type(g->c, sty) && api_type_is_bool(g->c, dty) && + ck != CV_BITCAST) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + CfreeCgTypeId ps[2] = {i128, i128}; + ApiSValue args[2]; + ApiSValue r; + args[0] = v; + args[1] = api_make_sv(api_op_imm(0, i128), i128); + api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, CMP_NE); + r = api_pop(g); + r.type = dty; + r.op.type = dty; + api_push(g, r); + return; + } + if (api_is_i128_type(g->c, dty) && !api_is_i128_type(g->c, sty) && + ck != CV_BITCAST) { + u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); + CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); + FrameSlot slot = api_f128_temp_slot(g, dty); + Operand dst_lv = api_op_local(slot, dty); + if (api_sv_op_is(&v, OPK_IMM)) { + u8 bytes[16]; + u64 lo = (u64)v.op.v.imm; + u64 hi = 0; + if (ck == CV_SEXT && sz <= 8) { + u32 bits = sz * 8u; + u64 mask = bits >= 64u ? ~(u64)0 : ((1ull << bits) - 1ull); + u64 sign = 1ull << (bits - 1u); + u64 u = lo & mask; + if (u & sign) u |= ~mask; + lo = u; + hi = (u & (1ull << 63)) ? ~(u64)0 : 0; + } + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 lo_idx = g->c->target.big_endian ? 15u - i : i; + u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; + bytes[lo_idx] = (u8)(lo >> (i * 8u)); + bytes[hi_idx] = (u8)(hi >> (i * 8u)); + } + api_store_f128_bytes(g, slot, dty, bytes); + api_release(g, &v); + api_push(g, api_make_lv(dst_lv, dty)); + return; + } + { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); + CfreeCgTypeId src_ty = sty; + Operand src = api_force_reg(g, &v, sty); + Operand low = src; + Operand base; + Reg low_tmp = REG_NONE; + Reg ar; + MemAccess ma; + memset(&ma, 0, sizeof ma); + ma.type = i64_ty; + ma.size = 8; + ma.align = 8; + if (sz < 8) { + low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty); + low = api_op_reg(low_tmp, i64_ty); + T->convert(T, ck == CV_SEXT ? CV_SEXT : CV_ZEXT, low, src); + src_ty = i64_ty; + } else { + low.type = i64_ty; + } + ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + base = api_op_reg(ar, ptr_ty); + T->addr_of(T, base, dst_lv); + T->store(T, api_op_indirect(ar, 0, i64_ty), low, ma); + if (ck == CV_SEXT) { + Reg hr = api_alloc_reg_or_spill(g, RC_INT, i64_ty); + Operand high = api_op_reg(hr, i64_ty); + T->binop(T, BO_SHR_S, high, low, api_op_imm(63, i64_ty)); + T->store(T, api_op_indirect(ar, 8, i64_ty), high, ma); + api_free_reg(g, hr, RC_INT); + } else { + T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma); + } + if (low_tmp != REG_NONE) api_free_reg(g, low_tmp, RC_INT); + (void)src_ty; + api_free_reg(g, ar, RC_INT); + api_release(g, &v); + api_push(g, api_make_lv(dst_lv, dty)); + } + return; + } + if (api_is_i128_type(g->c, sty) && !api_is_i128_type(g->c, dty) && + ck == CV_TRUNC && abi_cg_sizeof(g->c->abi, dty) <= 8) { + Reg rr = api_alloc_reg_or_spill(g, RC_INT, dty); + Operand dst = api_op_reg(rr, dty); + if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || + v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { + ApiSValue lv = v; + lv.lvalue = 1; + T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty)); + } else if (v.op.kind == OPK_IMM) { + T->load_imm(T, dst, v.op.v.imm); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported i128 truncation source"); + } + api_release(g, &v); + api_push(g, api_make_sv(dst, dty)); + return; + } + if (ck == CV_BITCAST && + abi_cg_sizeof(g->c->abi, sty) == abi_cg_sizeof(g->c->abi, dst_type) && + api_type_class(sty) == api_type_class(dty)) { + v.type = dty; + v.op.type = dty; + api_push(g, v); + return; + } + if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && + abi_cg_sizeof(g->c->abi, dty) == 16 && + (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { + FrameSlot slot = api_f128_temp_slot(g, dty); + Operand dst_lv = api_op_local(slot, dty); + if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || + v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); + ApiSValue src_lv = v; + Operand dst_addr; + Operand src_addr; + AggregateAccess agg; + src_lv.lvalue = 1; + dst_addr = api_lvalue_addr( + g, + &(ApiSValue){ + .op = dst_lv, .type = dty, .kind = SV_OPERAND, .lvalue = 1}, + ptr_ty); + src_addr = api_lvalue_addr(g, &src_lv, cg_type_ptr_to(g->c, sty)); + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = 16; + g->target->copy_bytes(g->target, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + } else if (v.op.kind == OPK_REG) { + g->target->store(g->target, dst_lv, v.op, + api_mem_for_lvalue(g, &dst_lv, sty)); + } else if (v.op.kind == OPK_IMM) { + u8 bytes[16]; + u64 lo = (u64)v.op.v.imm; + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 idx = g->c->target.big_endian ? 15u - i : i; + bytes[idx] = (u8)(lo >> (i * 8u)); + } + api_store_f128_bytes(g, slot, dty, bytes); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported 16-byte bitcast source"); + } + api_release(g, &v); + api_push(g, api_make_lv(dst_lv, dty)); + return; + } + + src = api_force_reg(g, &v, sty); + rr = api_alloc_reg_or_spill(g, api_type_class(dty), dty); + dst = api_op_reg(rr, dty); + T->convert(T, ck, dst, src); + api_release(g, &v); + api_push(g, api_make_sv(dst, dty)); +} + +void cfree_cg_int_binop(CfreeCg* g, CfreeCgIntBinOp op, uint32_t flags) { + api_cg_binop(g, api_map_int_binop(op), flags); +} + +void cfree_cg_int_unop(CfreeCg* g, CfreeCgIntUnOp op, uint32_t flags) { + api_cg_unop(g, api_map_int_unop(op), flags); +} + +void cfree_cg_int_cmp(CfreeCg* g, CfreeCgIntCmpOp op) { + api_cg_cmp(g, api_map_int_cmp(op)); +} + +const char* api_i128_binop_helper(BinOp op) { + switch (op) { + case BO_IADD: + return "__cfree_addti3"; + case BO_ISUB: + return "__cfree_subti3"; + case BO_IMUL: + return "__multi3"; + case BO_SDIV: + return "__divti3"; + case BO_UDIV: + return "__udivti3"; + case BO_SREM: + return "__modti3"; + case BO_UREM: + return "__umodti3"; + case BO_AND: + return "__cfree_andti3"; + case BO_OR: + return "__cfree_orti3"; + case BO_XOR: + return "__cfree_xorti3"; + case BO_SHL: + return "__ashlti3"; + case BO_SHR_U: + return "__lshrti3"; + case BO_SHR_S: + return "__ashrti3"; + case BO_FADD: + case BO_FSUB: + case BO_FMUL: + case BO_FDIV: + default: + return NULL; + } +} + +int api_i128_cmp_is_unsigned(CmpOp op) { + return op == CMP_LT_U || op == CMP_LE_U || op == CMP_GT_U || op == CMP_GE_U; +} + +const char* api_f128_binop_helper(CfreeCgFpBinOp op) { + switch (op) { + case CFREE_CG_FP_ADD: + return "__addtf3"; + case CFREE_CG_FP_SUB: + return "__subtf3"; + case CFREE_CG_FP_MUL: + return "__multf3"; + case CFREE_CG_FP_DIV: + return "__divtf3"; + case CFREE_CG_FP_REM: + return NULL; + } + return NULL; +} + +int api_f128_stack_top(CfreeCg* g, u32 depth) { + if (!g || g->sp <= depth) return 0; + return api_is_f128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); +} + +void api_f128_call_unary(CfreeCg* g, const char* name, CfreeCgTypeId ret, + CfreeCgTypeId param) { + ApiSValue args[1]; + CfreeCgTypeId ps[1]; + args[0] = api_pop(g); + ps[0] = param; + api_runtime_call_values(g, name, ret, ps, 1, args); +} + +void cfree_cg_fp_binop(CfreeCg* g, CfreeCgFpBinOp op, uint32_t flags) { + (void)flags; + if (op == CFREE_CG_FP_REM) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); + return; + } + if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + const char* name = api_f128_binop_helper(op); + if (!name) + compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); + args[1] = api_pop(g); + args[0] = api_pop(g); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, name, f128, ps, 2, args); + return; + } + api_cg_binop(g, api_map_fp_binop(op), 0); +} + +void cfree_cg_fp_unop(CfreeCg* g, CfreeCgFpUnOp op, uint32_t flags) { + (void)flags; + (void)op; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + args[1] = api_pop(g); + args[0] = api_make_f128_const(g, 0.0, f128); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, "__subtf3", f128, ps, 2, args); + return; + } + api_cg_unop(g, UO_NEG, 0); +} + +void cfree_cg_fp_cmp(CfreeCg* g, CfreeCgFpCmpOp op) { + if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + const char* name = "__eqtf2"; + CmpOp cmp = CMP_EQ; + switch (op) { + case CFREE_CG_FP_OEQ: + case CFREE_CG_FP_UEQ: + name = "__eqtf2"; + cmp = CMP_EQ; + break; + case CFREE_CG_FP_ONE: + case CFREE_CG_FP_UNE: + name = "__netf2"; + cmp = CMP_NE; + break; + case CFREE_CG_FP_OLT: + case CFREE_CG_FP_ULT: + name = "__lttf2"; + cmp = CMP_LT_S; + break; + case CFREE_CG_FP_OLE: + case CFREE_CG_FP_ULE: + name = "__letf2"; + cmp = CMP_LE_S; + break; + case CFREE_CG_FP_OGT: + case CFREE_CG_FP_UGT: + name = "__gttf2"; + cmp = CMP_GT_S; + break; + case CFREE_CG_FP_OGE: + case CFREE_CG_FP_UGE: + name = "__getf2"; + cmp = CMP_GE_S; + break; + } + args[1] = api_pop(g); + args[0] = api_pop(g); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, name, i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, cmp); + return; + } + api_cg_cmp(g, api_map_fp_cmp(op)); +} + +void cfree_cg_sext(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_SEXT); +} + +void cfree_cg_zext(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_ZEXT); +} + +void cfree_cg_trunc(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_TRUNC); +} + +void cfree_cg_ptr_to_int(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_BITCAST); +} + +void cfree_cg_int_to_ptr(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_BITCAST); +} + +void cfree_cg_bitcast(CfreeCg* g, CfreeCgTypeId dst) { + api_cg_convert_kind(g, dst, CV_BITCAST); +} + +void cfree_cg_fpext(CfreeCg* g, CfreeCgTypeId dst) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + if (api_is_f128_type(g->c, dty)) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + const char* name = sty == builtin_id(CFREE_CG_BUILTIN_F32) + ? "__extendsftf2" + : "__extenddftf2"; + api_push(g, v); + api_f128_call_unary(g, name, dty, sty); + return; + } + api_cg_convert_kind(g, dst, CV_FEXT); +} + +void cfree_cg_fptrunc(CfreeCg* g, CfreeCgTypeId dst) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + if (api_f128_stack_top(g, 0)) { + ApiSValue v = api_pop(g); + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + const char* name = dty == builtin_id(CFREE_CG_BUILTIN_F32) ? "__trunctfsf2" + : "__trunctfdf2"; + api_push(g, v); + api_f128_call_unary(g, name, dty, f128); + return; + } + api_cg_convert_kind(g, dst, CV_FTRUNC); +} + +void cfree_cg_sint_to_float(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding) { + (void)rounding; + if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); + CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char* name = + sz > 8 ? "__floattitf" : (sz > 4 ? "__floatditf" : "__floatsitf"); + api_push(g, v); + api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); + return; + } + api_cg_convert_kind(g, dst, CV_ITOF_S); +} + +void cfree_cg_uint_to_float(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding) { + (void)rounding; + if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); + CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char* name = + sz > 8 ? "__floatuntitf" : (sz > 4 ? "__floatunditf" : "__floatunsitf"); + api_push(g, v); + api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); + return; + } + api_cg_convert_kind(g, dst, CV_ITOF_U); +} + +void cfree_cg_float_to_sint(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding) { + (void)rounding; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); + CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char* name = + sz > 8 ? "__fixtfti" : (sz > 4 ? "__fixtfdi" : "__fixtfsi"); + api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); + if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); + return; + } + api_cg_convert_kind(g, dst, CV_FTOI_S); +} + +void cfree_cg_float_to_uint(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding) { + (void)rounding; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); + CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char* name = + sz > 8 ? "__fixunstfti" : (sz > 4 ? "__fixunstfdi" : "__fixunstfsi"); + api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); + if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); + return; + } + api_cg_convert_kind(g, dst, CV_FTOI_U); +} + +/* ============================================================ + * Intrinsics (stub) + * ============================================================ */ + +IntrinKind api_map_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, + CfreeCgTypeId result_type) { + u32 size = result_type ? abi_cg_sizeof(g->c->abi, result_type) : 0; + switch (intrin) { + case CFREE_CG_INTRIN_TRAP: + return INTRIN_TRAP; + case CFREE_CG_INTRIN_CLZ: + return INTRIN_CLZ; + case CFREE_CG_INTRIN_CTZ: + return INTRIN_CTZ; + case CFREE_CG_INTRIN_POPCOUNT: + return INTRIN_POPCOUNT; + case CFREE_CG_INTRIN_BSWAP: + if (size <= 2) return INTRIN_BSWAP16; + if (size <= 4) return INTRIN_BSWAP32; + return INTRIN_BSWAP64; + case CFREE_CG_INTRIN_SETJMP: + return INTRIN_SETJMP; + case CFREE_CG_INTRIN_LONGJMP: + return INTRIN_LONGJMP; + case CFREE_CG_INTRIN_SADD_OVERFLOW: + return INTRIN_SADD_OVERFLOW; + case CFREE_CG_INTRIN_UADD_OVERFLOW: + return INTRIN_UADD_OVERFLOW; + case CFREE_CG_INTRIN_SSUB_OVERFLOW: + return INTRIN_SSUB_OVERFLOW; + case CFREE_CG_INTRIN_USUB_OVERFLOW: + return INTRIN_USUB_OVERFLOW; + case CFREE_CG_INTRIN_SMUL_OVERFLOW: + return INTRIN_SMUL_OVERFLOW; + case CFREE_CG_INTRIN_UMUL_OVERFLOW: + return INTRIN_UMUL_OVERFLOW; + case CFREE_CG_INTRIN_PREFETCH: + return INTRIN_PREFETCH; + case CFREE_CG_INTRIN_EXPECT: + return INTRIN_EXPECT; + case CFREE_CG_INTRIN_ASSUME_ALIGNED: + return INTRIN_ASSUME_ALIGNED; + case CFREE_CG_INTRIN_FMA: + case CFREE_CG_INTRIN_SYSCALL: + case CFREE_CG_INTRIN_IRQ_SAVE: + case CFREE_CG_INTRIN_IRQ_RESTORE: + case CFREE_CG_INTRIN_IRQ_DISABLE: + case CFREE_CG_INTRIN_IRQ_ENABLE: + case CFREE_CG_INTRIN_DMB: + case CFREE_CG_INTRIN_DSB: + case CFREE_CG_INTRIN_ISB: + case CFREE_CG_INTRIN_DCACHE_CLEAN: + case CFREE_CG_INTRIN_DCACHE_INVALIDATE: + case CFREE_CG_INTRIN_DCACHE_CLEAN_INVALIDATE: + case CFREE_CG_INTRIN_ICACHE_INVALIDATE: + case CFREE_CG_INTRIN_CPU_NOP: + case CFREE_CG_INTRIN_CPU_YIELD: + case CFREE_CG_INTRIN_WFI: + case CFREE_CG_INTRIN_WFE: + case CFREE_CG_INTRIN_SEV: + case CFREE_CG_INTRIN_CORO_SWITCH: + return INTRIN_NONE; + } + return INTRIN_NONE; +} + +int api_intrinsic_is_void(CfreeCgIntrinsic intrin) { + return intrin == CFREE_CG_INTRIN_TRAP || intrin == CFREE_CG_INTRIN_LONGJMP || + intrin == CFREE_CG_INTRIN_PREFETCH; +} + +int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin) { + return intrin == CFREE_CG_INTRIN_SADD_OVERFLOW || + intrin == CFREE_CG_INTRIN_UADD_OVERFLOW || + intrin == CFREE_CG_INTRIN_SSUB_OVERFLOW || + intrin == CFREE_CG_INTRIN_USUB_OVERFLOW || + intrin == CFREE_CG_INTRIN_SMUL_OVERFLOW || + intrin == CFREE_CG_INTRIN_UMUL_OVERFLOW; +} + +void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, + CfreeCgTypeId result_type) { + CGTarget* T; + CfreeCgTypeId rty; + CfreeCgTypeId int_ty; + IntrinKind kind; + ApiSValue* svs; + Operand* args; + Operand dsts[2]; + u32 ndst = 0; + Heap* h; + if (!g) return; + T = g->target; + h = g->c->ctx->heap; + rty = resolve_type(g->c, result_type); + int_ty = builtin_id(CFREE_CG_BUILTIN_I32); + kind = api_map_intrinsic(g, intrin, result_type); + if (kind == INTRIN_NONE) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported intrinsic"); + return; + } + + svs = NULL; + args = NULL; + if (nargs) { + svs = (ApiSValue*)h->alloc(h, sizeof(*svs) * nargs, _Alignof(ApiSValue)); + args = (Operand*)h->alloc(h, sizeof(*args) * nargs, _Alignof(Operand)); + memset(args, 0, sizeof(*args) * nargs); + for (u32 i = 0; i < nargs; ++i) { + u32 idx = nargs - 1u - i; + CfreeCgTypeId aty; + svs[idx] = api_pop(g); + aty = api_sv_type(&svs[idx]); + if (api_sv_op_is(&svs[idx], OPK_IMM) && + (intrin == CFREE_CG_INTRIN_EXPECT || + intrin == CFREE_CG_INTRIN_ASSUME_ALIGNED || + intrin == CFREE_CG_INTRIN_PREFETCH)) { + args[idx] = svs[idx].op; + } else { + args[idx] = api_force_reg(g, &svs[idx], aty); + } + } + } + + if (api_intrinsic_is_overflow(intrin)) { + CfreeCgTypeId vty = rty ? rty : (nargs ? api_sv_type(&svs[0]) : int_ty); + CfreeCgTypeId bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); + Reg rr = api_alloc_reg_or_spill(g, api_type_class(vty), vty); + Reg ok = api_alloc_reg_or_spill(g, RC_INT, bool_ty); + dsts[0] = api_op_reg(rr, vty); + dsts[1] = api_op_reg(ok, bool_ty); + ndst = 2; + } else if (!api_intrinsic_is_void(intrin) && !cg_type_is_void(g->c, rty)) { + Reg rr = api_alloc_reg_or_spill(g, api_type_class(rty), rty); + dsts[0] = api_op_reg(rr, rty); + ndst = 1; + } + + T->intrinsic(T, kind, ndst ? dsts : NULL, ndst, args, nargs); + + for (u32 i = 0; i < nargs; ++i) api_release(g, &svs[i]); + if (svs) h->free(h, svs, sizeof(*svs) * nargs); + if (args) h->free(h, args, sizeof(*args) * nargs); + + if (api_intrinsic_is_overflow(intrin)) { + api_push(g, api_make_sv(dsts[0], dsts[0].type)); + api_push(g, api_make_sv(dsts[1], dsts[1].type)); + } else if (ndst == 1) { + api_push(g, api_make_sv(dsts[0], rty)); + } +} + +/* ============================================================ + * Atomics (stub) + * ============================================================ */ + +CfreeCgTypeId api_atomic_pointee(CfreeCg* g, CfreeCgTypeId pty, + const char* who) { + CfreeCgTypeId pointee = cg_type_pointee(g->c, pty); + if (!pointee) { + compiler_panic(g->c, g->cur_loc, "%s: operand is not a pointer", who); + return builtin_id(CFREE_CG_BUILTIN_I32); + } + return pointee; +} diff --git a/src/cg/asm.c b/src/cg/asm.c @@ -0,0 +1,321 @@ +#include "cg/internal.h" + +const char* api_sym_cstr(CfreeCg* g, CfreeSym sym) { + size_t len; + const char* s; + if (!sym) return ""; + s = pool_str(g->c->global, (Sym)sym, &len); + (void)len; + return s ? s : ""; +} + +int api_asm_parse_match_index(const char* s) { + int n; + if (!s || s[0] < '0' || s[0] > '9') return -1; + n = 0; + for (const char* p = s; *p >= '0' && *p <= '9'; ++p) { + n = n * 10 + (*p - '0'); + } + return n; +} + +const char* api_asm_constraint_body(const char* s) { + if (!s) return ""; + if (s[0] == '=' && s[1] == '&') return s + 2; + if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; + return s; +} + +int api_asm_is_early_clobber(const char* s) { + if (!s) return 0; + return (s[0] == '=' && s[1] == '&') || s[0] == '&'; +} + +void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys, RegClass cls) { + FrameSlot slot = api_take_spill_slot(g, cls); + Operand victim_reg = api_op_reg(phys, api_owned_reg_type(g, sv)); + g->target->spill_reg(g->target, victim_reg, slot, api_mem_for_spill(g, sv)); + api_free_reg(g, phys, cls); + sv->spill_slot = slot; + sv->res = RES_SPILLED; + api_set_owned_reg(sv, (Reg)REG_NONE); +} + +void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { + static const char* const match_strs[10] = {"0", "1", "2", "3", "4", + "5", "6", "7", "8", "9"}; + CGTarget* T; + Heap* h; + CfreeCgTypeId fallback_ty; + AsmConstraint* outs; + AsmConstraint* ins; + Sym* clobs; + ApiSValue* in_svs; + Operand* in_ops; + Operand* out_ops; + u8* out_reg_owned; + const char* tmpl_str; + Sym sym_memory; + int has_memory_clobber; + uint32_t ninout; + uint32_t total_inputs; + CfreeSym tmpl = asm_block.tmpl; + const CfreeCgAsmOperand* outputs = asm_block.outputs; + uint32_t noutputs = asm_block.noutputs; + const CfreeCgAsmOperand* inputs = asm_block.inputs; + uint32_t ninputs = asm_block.ninputs; + const CfreeSym* clobbers = asm_block.clobbers; + uint32_t nclobbers = asm_block.nclobbers; + (void)asm_block.flags; + (void)asm_block.clobber_abi_sets; + if (!g) return; + api_local_const_memory_boundary(g); + T = g->target; + h = g->c->ctx->heap; + fallback_ty = builtin_id(CFREE_CG_BUILTIN_I64); + tmpl_str = api_sym_cstr(g, tmpl); + ninout = 0; + + outs = NULL; + ins = NULL; + clobs = NULL; + in_svs = NULL; + in_ops = NULL; + out_ops = NULL; + out_reg_owned = NULL; + + if (noutputs) { + outs = (AsmConstraint*)h->alloc(h, sizeof(*outs) * noutputs, + _Alignof(AsmConstraint)); + memset(outs, 0, sizeof(*outs) * noutputs); + for (u32 i = 0; i < noutputs; ++i) { + outs[i].str = api_sym_cstr(g, outputs[i].constraint); + outs[i].name = (Sym)outputs[i].name; + outs[i].type = resolve_type(g->c, outputs[i].type); + outs[i].dir = (u8)api_map_asm_dir(outputs[i].dir); + if (!outs[i].type) outs[i].type = fallback_ty; + if (outs[i].dir == ASM_INOUT) { + if (i >= 10) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm inout output index exceeds matching " + "constraint range"); + } + ninout++; + } + } + out_ops = + (Operand*)h->alloc(h, sizeof(*out_ops) * noutputs, _Alignof(Operand)); + memset(out_ops, 0, sizeof(*out_ops) * noutputs); + out_reg_owned = (u8*)h->alloc(h, noutputs, 1); + memset(out_reg_owned, 0, noutputs); + } + + total_inputs = ninputs + ninout; + if (total_inputs) { + uint32_t inout_index; + ins = (AsmConstraint*)h->alloc(h, sizeof(*ins) * total_inputs, + _Alignof(AsmConstraint)); + memset(ins, 0, sizeof(*ins) * total_inputs); + in_svs = (ApiSValue*)h->alloc(h, sizeof(*in_svs) * total_inputs, + _Alignof(ApiSValue)); + in_ops = (Operand*)h->alloc(h, sizeof(*in_ops) * total_inputs, + _Alignof(Operand)); + memset(in_ops, 0, sizeof(*in_ops) * total_inputs); + for (u32 i = 0; i < ninputs; ++i) { + ins[i].str = api_sym_cstr(g, inputs[i].constraint); + ins[i].name = (Sym)inputs[i].name; + ins[i].type = resolve_type(g->c, inputs[i].type); + ins[i].dir = (u8)api_map_asm_dir(inputs[i].dir); + if (!ins[i].type) ins[i].type = fallback_ty; + } + inout_index = ninputs; + for (u32 i = 0; i < noutputs; ++i) { + if (outs[i].dir != ASM_INOUT) continue; + ins[inout_index].str = match_strs[i]; + ins[inout_index].type = outs[i].type ? outs[i].type : fallback_ty; + ins[inout_index].dir = ASM_IN; + inout_index++; + } + for (u32 i = 0; i < total_inputs; ++i) { + u32 idx = total_inputs - 1u - i; + in_svs[idx] = api_pop(g); + api_ensure_reg(g, &in_svs[idx]); + } + } + + if (nclobbers) { + clobs = (Sym*)h->alloc(h, sizeof(*clobs) * nclobbers, _Alignof(Sym)); + for (u32 i = 0; i < nclobbers; ++i) clobs[i] = (Sym)clobbers[i]; + } + + for (u32 i = 0; i < noutputs; ++i) { + const char* body = api_asm_constraint_body(outs[i].str); + if (api_asm_is_early_clobber(outs[i].str)) continue; + if (body[0] == 'r') { + CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; + Reg r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); + out_ops[i] = api_op_reg(r, oty); + out_reg_owned[i] = 1; + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported asm output constraint"); + } + } + + for (u32 i = 0; i < total_inputs; ++i) { + const char* s = ins[i].str ? ins[i].str : ""; + int matched = api_asm_parse_match_index(s); + CfreeCgTypeId ity = api_sv_type(&in_svs[i]); + if (matched >= 0) { + Operand bound; + if ((u32)matched >= noutputs) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm matching constraint out of range"); + continue; + } + if (api_asm_is_early_clobber(outs[matched].str)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm matching input uses early-clobber output"); + continue; + } + bound = out_ops[matched]; + if (api_sv_op_is(&in_svs[i], OPK_REG) && + in_svs[i].op.v.reg == bound.v.reg) { + } else if (api_sv_op_is(&in_svs[i], OPK_IMM)) { + T->load_imm(T, bound, in_svs[i].op.v.imm); + } else { + Operand src = api_force_reg(g, &in_svs[i], ity); + T->copy(T, bound, src); + } + in_ops[i] = bound; + } else if (s[0] == 'r') { + in_ops[i] = api_force_reg(g, &in_svs[i], ity); + } else if (s[0] == 'i') { + if (!api_sv_op_is(&in_svs[i], OPK_IMM)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm 'i' constraint requires an immediate"); + } + in_ops[i] = in_svs[i].op; + } else if (s[0] == 'm') { + if (api_sv_op_is(&in_svs[i], OPK_INDIRECT)) { + in_ops[i] = in_svs[i].op; + } else if (api_is_lvalue_sv(&in_svs[i])) { + CfreeCgTypeId pty = + cg_type_ptr_to(g->c, ity ? ity : builtin_id(CFREE_CG_BUILTIN_VOID)); + Operand dst = api_lvalue_addr(g, &in_svs[i], pty); + in_svs[i].op = api_op_indirect(dst.v.reg, 0, ity); + in_svs[i].res = RES_REG; + in_ops[i] = in_svs[i].op; + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm 'm' constraint requires an lvalue"); + } + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported asm input constraint"); + } + } + + for (u32 i = 0; i < noutputs; ++i) { + const char* body; + CfreeCgTypeId oty; + Reg r; + if (!api_asm_is_early_clobber(outs[i].str)) continue; + body = api_asm_constraint_body(outs[i].str); + if (body[0] != 'r') { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported early-clobber asm output"); + continue; + } + oty = outs[i].type ? outs[i].type : fallback_ty; + r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); + for (u32 k = 0; k < total_inputs; ++k) { + if ((in_ops[k].kind == OPK_REG && in_ops[k].v.reg == r) || + (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm early-clobber register collision"); + } + } + out_ops[i] = api_op_reg(r, oty); + out_reg_owned[i] = 1; + } + + sym_memory = pool_intern_cstr(g->c->global, "memory"); + has_memory_clobber = 0; + for (u32 i = 0; i < nclobbers; ++i) { + if (clobs[i] == sym_memory) has_memory_clobber = 1; + } + if (has_memory_clobber) { + for (u32 i = 0; i < g->sp; ++i) { + ApiSValue* sv = &g->stack[i]; + Reg phys; + RegClass cls; + if (sv->res != RES_REG) continue; + phys = api_reg_of_sv(sv); + cls = (RegClass)api_class_of_sv(sv); + api_asm_spill_sv(g, sv, phys, cls); + } + } else if (T->resolve_reg_name) { + for (u32 i = 0; i < nclobbers; ++i) { + Reg phys; + RegClass cls; + if (T->resolve_reg_name(T, clobs[i], &phys, &cls) != 0) continue; + for (u32 k = 0; k < noutputs; ++k) { + if (out_ops[k].kind == OPK_REG && out_ops[k].cls == cls && + (Reg)out_ops[k].v.reg == phys) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm clobber overlaps output"); + } + } + for (u32 k = 0; k < total_inputs; ++k) { + if (in_ops[k].kind == OPK_REG && in_ops[k].cls == cls && + (Reg)in_ops[k].v.reg == phys) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: asm clobber overlaps input"); + } + } + for (u32 k = 0; k < g->sp; ++k) { + ApiSValue* sv = &g->stack[k]; + if (sv->res != RES_REG) continue; + if (api_class_of_sv(sv) != (u8)cls) continue; + if ((Reg)api_reg_of_sv(sv) != phys) continue; + api_asm_spill_sv(g, sv, phys, cls); + } + } + } + + T->asm_block(T, tmpl_str, outs, noutputs, out_ops, ins, total_inputs, in_ops, + clobs, nclobbers); + + for (u32 i = 0; i < total_inputs; ++i) api_release(g, &in_svs[i]); + for (u32 i = 0; i < noutputs; ++i) { + CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; + ApiSValue sv = api_make_sv(out_ops[i], oty); + if (!out_reg_owned[i] && sv.res == RES_REG) sv.res = RES_INHERENT; + api_push(g, sv); + } + + if (outs) h->free(h, outs, sizeof(*outs) * noutputs); + if (ins) h->free(h, ins, sizeof(*ins) * total_inputs); + if (clobs) h->free(h, clobs, sizeof(*clobs) * nclobbers); + if (in_svs) h->free(h, in_svs, sizeof(*in_svs) * total_inputs); + if (in_ops) h->free(h, in_ops, sizeof(*in_ops) * total_inputs); + if (out_ops) h->free(h, out_ops, sizeof(*out_ops) * noutputs); + if (out_reg_owned) h->free(h, out_reg_owned, noutputs); +} + +void cfree_cg_file_scope_asm(CfreeCg* g, const char* asm_source, + size_t asm_source_len) { + AsmLexer* lex; + if (!g || !asm_source) return; + api_local_const_memory_boundary(g); + lex = asm_lex_open_mem(g->c, "<file-scope asm>", asm_source, asm_source_len); + if (!lex) + compiler_panic(g->c, api_no_loc(), "CfreeCg: file-scope asm out of memory"); + asm_parse(g->c, lex, g->mc); + asm_lex_close(lex); +} + +/* ============================================================ + * Labels / branches + * ============================================================ */ diff --git a/src/cg/atomic.c b/src/cg/atomic.c @@ -0,0 +1,181 @@ +#include "cg/internal.h" + +MemAccess api_mem_for_atomic(CfreeCg* g, CfreeCgTypeId val_ty) { + MemAccess ma; + api_require_scalar_mem_type(g, "atomic memory access", val_ty); + if (api_mem_type_size(g, val_ty, "atomic memory access") > 8u) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: atomic memory access size exceeds 8 bytes"); + } + memset(&ma, 0, sizeof ma); + ma.type = val_ty; + ma.size = val_ty ? abi_cg_sizeof(g->c->abi, val_ty) : 0; + ma.align = val_ty ? abi_cg_alignof(g->c->abi, val_ty) : 0; + ma.flags = MF_ATOMIC; + ma.alias.kind = (u8)ALIAS_UNKNOWN; + return ma; +} + +int cfree_cg_atomic_is_legal(CfreeCompiler* c, CfreeCgMemAccess access, + CfreeCgMemOrder order) { + CfreeCgTypeId ty = resolve_type(c, access.type); + (void)order; + if (!ty) return 0; + if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0; + return abi_cg_sizeof(c->abi, access.type) <= 8; +} + +int cfree_cg_atomic_is_lock_free(CfreeCompiler* c, CfreeCgMemAccess access) { + CfreeCgTypeId ty = resolve_type(c, access.type); + if (!ty) return 0; + if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0; + return abi_cg_sizeof(c->abi, access.type) <= (u32)c->target.ptr_size; +} + +void cfree_cg_atomic_load(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder order) { + ApiSValue ptr; + CfreeCgTypeId pty, val_ty; + Operand addr, dst; + Reg rr; + if (!g) return; + api_local_const_memory_boundary(g); + ptr = api_pop(g); + pty = api_sv_type(&ptr); + val_ty = resolve_type(g->c, access.type); + if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_load"); + api_require_pointer_value(g, "atomic_load pointer", pty); + addr = api_force_reg(g, &ptr, pty); + rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); + dst = api_op_reg(rr, val_ty); + g->target->atomic_load(g->target, dst, addr, api_mem_for_atomic(g, val_ty), + api_map_mem_order(order)); + api_release(g, &ptr); + api_push(g, api_make_sv(dst, val_ty)); +} + +void cfree_cg_atomic_store(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder order) { + ApiSValue val, ptr; + CfreeCgTypeId pty, val_ty; + Operand addr, src; + if (!g) return; + api_local_const_memory_boundary(g); + val = api_pop(g); + ptr = api_pop(g); + pty = api_sv_type(&ptr); + val_ty = resolve_type(g->c, access.type); + if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_store"); + api_require_pointer_value(g, "atomic_store pointer", pty); + api_validate_memory_value(g, "atomic_store", val_ty, api_sv_type(&val)); + addr = api_force_reg(g, &ptr, pty); + src = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); + g->target->atomic_store(g->target, addr, src, api_mem_for_atomic(g, val_ty), + api_map_mem_order(order)); + api_release(g, &val); + api_release(g, &ptr); +} + +void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgAtomicOp op, CfreeCgMemOrder order) { + ApiSValue val, ptr; + CfreeCgTypeId pty, val_ty; + Operand addr, vop, dst; + Reg rr; + if (!g) return; + api_local_const_memory_boundary(g); + val = api_pop(g); + ptr = api_pop(g); + pty = api_sv_type(&ptr); + val_ty = resolve_type(g->c, access.type); + if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_rmw"); + api_require_pointer_value(g, "atomic_rmw pointer", pty); + api_validate_memory_value(g, "atomic_rmw", val_ty, api_sv_type(&val)); + addr = api_force_reg(g, &ptr, pty); + vop = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); + rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); + dst = api_op_reg(rr, val_ty); + g->target->atomic_rmw(g->target, api_map_atomic_op(op), dst, addr, vop, + api_mem_for_atomic(g, val_ty), + api_map_mem_order(order)); + api_release(g, &val); + api_release(g, &ptr); + api_push(g, api_make_sv(dst, val_ty)); +} + +int api_take_dead_owned_reg(ApiSValue* sv, u8 cls, Reg avoid, Reg* out) { + Reg r; + if (sv->res != RES_REG || sv->pinned) return 0; + if (api_class_of_sv(sv) != cls) return 0; + r = api_reg_of_sv(sv); + if (r == (Reg)REG_NONE || r == avoid) return 0; + sv->res = RES_INHERENT; + *out = r; + return 1; +} + +Reg api_alloc_dead_input_or_spill(CfreeCg* g, ApiSValue* a, ApiSValue* b, + ApiSValue* c, u8 cls, CfreeCgTypeId ty, + Reg avoid) { + Reg r; + if (api_take_dead_owned_reg(a, cls, avoid, &r)) return r; + if (api_take_dead_owned_reg(b, cls, avoid, &r)) return r; + if (api_take_dead_owned_reg(c, cls, avoid, &r)) return r; + return api_alloc_reg_or_spill(g, cls, ty); +} + +void cfree_cg_atomic_cmpxchg(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder success, CfreeCgMemOrder failure, + int weak) { + ApiSValue desired, expected, ptr; + CfreeCgTypeId pty, val_ty, bool_ty; + Operand addr, exp_op, des_op, prior, ok; + Reg pr, kr; + if (!g) return; + api_local_const_memory_boundary(g); + (void)weak; + desired = api_pop(g); + expected = api_pop(g); + ptr = api_pop(g); + pty = api_sv_type(&ptr); + val_ty = resolve_type(g->c, access.type); + if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_cmpxchg"); + api_require_pointer_value(g, "atomic_cmpxchg pointer", pty); + api_validate_memory_value(g, "atomic_cmpxchg expected", val_ty, + api_sv_type(&expected)); + api_validate_memory_value(g, "atomic_cmpxchg desired", val_ty, + api_sv_type(&desired)); + addr = api_force_reg(g, &ptr, pty); + exp_op = api_sv_op_is_reg_or_imm(&expected) + ? expected.op + : api_force_reg(g, &expected, val_ty); + des_op = api_sv_op_is_reg_or_imm(&desired) + ? desired.op + : api_force_reg(g, &desired, val_ty); + bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); + pr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, + api_type_class(val_ty), val_ty, + (Reg)REG_NONE); + kr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, RC_INT, + bool_ty, pr); + prior = api_op_reg(pr, val_ty); + ok = api_op_reg(kr, bool_ty); + g->target->atomic_cas(g->target, prior, ok, addr, exp_op, des_op, + api_mem_for_atomic(g, val_ty), + api_map_mem_order(success), api_map_mem_order(failure)); + api_release(g, &desired); + api_release(g, &expected); + api_release(g, &ptr); + api_push(g, api_make_sv(prior, val_ty)); + api_push(g, api_make_sv(ok, bool_ty)); +} + +void cfree_cg_atomic_fence(CfreeCg* g, CfreeCgMemOrder order) { + if (!g) return; + api_local_const_memory_boundary(g); + g->target->fence(g->target, api_map_mem_order(order)); +} + +/* ============================================================ + * Inline asm (stub) + * ============================================================ */ diff --git a/src/cg/call.c b/src/cg/call.c @@ -0,0 +1,317 @@ +#include "cg/internal.h" + +CGABIValue* api_alloc_call_args(CfreeCg* g, u32 nargs) { + CGABIValue* avs = NULL; + if (nargs) { + avs = arena_array(g->c->tu, CGABIValue, nargs); + memset(avs, 0, sizeof(CGABIValue) * nargs); + } + g->avs_in_flight = avs; + g->avs_in_flight_n = nargs; + return avs; +} + +void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty, + const ABIFuncInfo* abi, u32 idx) { + ApiSValue arg = api_pop(g); + int is_vararg = (idx >= abi->nparams); + CfreeCgTypeId aty = is_vararg ? (arg.type ? arg.type : api_sv_type(&arg)) + : cg_type_func_param_id(g->c, fty, idx); + if (!aty) aty = arg.type; + + av->type = aty; + av->abi = is_vararg ? NULL : &abi->params[idx]; + + if (api_is_wide16_scalar_type(g->c, aty)) { + ApiSValue lv = api_wide16_materialize_lvalue(g, &arg, aty); + av->storage = lv.op; + av->storage.type = aty; + av->size = 16; + } else if (cg_type_is_aggregate(g->c, aty)) { + api_ensure_reg(g, &arg); + Operand st = arg.op; + st.type = aty; + av->storage = st; + av->size = abi_cg_sizeof(g->c->abi, aty); + } else { + api_ensure_reg(g, &arg); + av->storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) + ? api_force_reg(g, &arg, aty) + : arg.op; + } +} + +void api_alloc_call_ret_storage(CfreeCg* g, CGTarget* T, CfreeCgTypeId ret_ty, + Operand* out) { + if (api_arg_storage_must_be_addr(g->c, ret_ty)) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ret_ty; + fsd.size = abi_cg_sizeof(g->c->abi, ret_ty); + fsd.align = abi_cg_alignof(g->c->abi, ret_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_ADDR_TAKEN; + FrameSlot slot = T->frame_slot(T, &fsd); + *out = api_op_local(slot, ret_ty); + } else { + Reg r = api_alloc_reg_or_spill(g, api_type_class(ret_ty), ret_ty); + *out = api_op_reg(r, ret_ty); + } +} + +void api_release_call_args(CfreeCg* g, CGABIValue* avs, u32 nargs) { + for (u32 i = 0; i < nargs; ++i) { + api_release_arg_storage(g, &avs[i].storage); + } + g->avs_in_flight = NULL; + g->avs_in_flight_n = 0; +} + +void api_push_call_result(CfreeCg* g, Operand ret_storage, + CfreeCgTypeId ret_ty) { + if (ret_storage.kind == OPK_LOCAL || ret_storage.kind == OPK_GLOBAL || + ret_storage.kind == OPK_INDIRECT) { + api_push(g, api_make_lv(ret_storage, ret_ty)); + } else { + api_push(g, api_make_sv(ret_storage, ret_ty)); + } +} + +void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, + CfreeCgCallAttrs attrs) { + CGTarget* T; + CfreeCgTypeId fty; + const ABIFuncInfo* abi; + CfreeCgTypeId ret_ty; + int has_result; + CGABIValue* avs; + CGCallDesc desc; + ApiSValue callee; + int tail; + if (!g) return; + api_local_const_memory_boundary(g); + tail = + attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; + T = g->target; + fty = resolve_type(g->c, fn_type); + if (!fty) return; + abi = abi_cg_func_info(g->c->abi, fty); + ret_ty = cg_type_func_ret_id(g->c, fty); + has_result = !tail && !cg_type_is_void(g->c, ret_ty); + + if (g->sp < (u32)nargs + 1u) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: call stack underflow"); + return; + } + + avs = api_alloc_call_args(g, nargs); + for (u32 i = 0; i < nargs; ++i) { + u32 idx = nargs - 1u - i; + api_pack_call_arg(g, &avs[idx], fty, abi, idx); + } + + callee = api_pop(g); + api_ensure_reg(g, &callee); + Operand callee_op = (callee.op.kind == OPK_GLOBAL) + ? callee.op + : api_force_reg(g, &callee, fty); + + memset(&desc, 0, sizeof desc); + desc.fn_type = fty; + desc.abi = abi; + desc.callee = callee_op; + desc.args = avs; + desc.nargs = nargs; + desc.flags = tail ? CG_CALL_TAIL : CG_CALL_NONE; + desc.ret.type = ret_ty; + desc.ret.abi = &abi->ret; + + if (has_result) { + api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); + } else { + desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); + } + + if (tail) api_regalloc_finish(g); + T->call(T, &desc); + + api_release_call_args(g, avs, nargs); + + if (callee.op.kind != OPK_GLOBAL) { + api_free_reg(g, callee_op.v.reg, RC_INT); + } + + if (has_result) { + api_push_call_result(g, desc.ret.storage, ret_ty); + } +} + +void api_cg_tail_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type) + __attribute__((unused)); +void api_cg_tail_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type) { + CGTarget* T; + CfreeCgTypeId fty; + const ABIFuncInfo* abi; + CGABIValue* avs; + CGCallDesc desc; + ApiSValue callee; + if (!g) return; + api_local_const_memory_boundary(g); + T = g->target; + fty = resolve_type(g->c, fn_type); + if (!fty) return; + abi = abi_cg_func_info(g->c->abi, fty); + avs = NULL; + if (nargs) { + avs = arena_array(g->c->tu, CGABIValue, nargs); + memset(avs, 0, sizeof(CGABIValue) * nargs); + } + for (u32 i = 0; i < nargs; ++i) { + u32 idx = nargs - 1u - i; + ApiSValue arg = api_pop(g); + api_ensure_reg(g, &arg); + CfreeCgTypeId aty = cg_type_func_param_id(g->c, fty, idx); + if (!aty) aty = arg.type; + avs[idx].type = aty; + avs[idx].abi = idx < abi->nparams ? &abi->params[idx] : NULL; + avs[idx].storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) + ? api_force_reg(g, &arg, aty) + : arg.op; + } + callee = api_pop(g); + api_ensure_reg(g, &callee); + Operand callee_op = (callee.op.kind == OPK_GLOBAL) + ? callee.op + : api_force_reg(g, &callee, fty); + memset(&desc, 0, sizeof desc); + desc.fn_type = fty; + desc.abi = abi; + desc.callee = callee_op; + desc.args = avs; + desc.nargs = nargs; + desc.flags = CG_CALL_TAIL; + desc.ret.type = cg_type_func_ret_id(g->c, fty); + desc.ret.abi = &abi->ret; + desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); + api_regalloc_finish(g); + T->call(T, &desc); + for (u32 i = 0; i < nargs; ++i) { + api_release_arg_storage(g, &avs[i].storage); + } + if (callee.op.kind != OPK_GLOBAL) { + api_free_reg(g, callee_op.v.reg, RC_INT); + } +} + +void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs) { + CGTarget* T; + CfreeCgTypeId fty; + const ABIFuncInfo* abi; + CfreeCgTypeId ret_ty; + int has_result; + CGABIValue* avs; + CGCallDesc desc; + Operand callee_op; + if (!g) return; + api_local_const_memory_boundary(g); + int tail = + attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; + T = g->target; + fty = api_sym_type(g, sym); + if (!fty) return; + abi = abi_cg_func_info(g->c->abi, fty); + ret_ty = cg_type_func_ret_id(g->c, fty); + has_result = !tail && !cg_type_is_void(g->c, ret_ty); + if (g->sp < nargs) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: call stack underflow"); + return; + } + avs = api_alloc_call_args(g, nargs); + for (u32 i = 0; i < nargs; ++i) { + u32 idx = nargs - 1u - i; + api_pack_call_arg(g, &avs[idx], fty, abi, idx); + } + callee_op = api_op_global((ObjSymId)sym, 0, cg_type_ptr_to(g->c, fty)); + memset(&desc, 0, sizeof desc); + desc.fn_type = fty; + desc.abi = abi; + desc.callee = callee_op; + desc.args = avs; + desc.nargs = nargs; + desc.flags = tail ? CG_CALL_TAIL : CG_CALL_NONE; + desc.ret.type = ret_ty; + desc.ret.abi = &abi->ret; + if (has_result) { + api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); + } else { + desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); + } + if (tail) api_regalloc_finish(g); + T->call(T, &desc); + api_release_call_args(g, avs, nargs); + if (has_result) { + api_push_call_result(g, desc.ret.storage, ret_ty); + } +} + +void cfree_cg_call_symbol(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs) { + api_call_symbol_common(g, sym, nargs, attrs); +} + +void cfree_cg_ret(CfreeCg* g) { + ApiSValue v; + CGTarget* T; + CfreeCgTypeId rty; + CGABIValue av; + Operand ret_op; + if (!g) return; + T = g->target; + rty = g->fn_ret_type; + if (cg_type_is_void(g->c, rty)) { + T->ret(T, NULL); + return; + } + v = api_pop(g); + memset(&av, 0, sizeof av); + av.type = rty; + av.abi = &g->fn_abi->ret; + int is_aggregate = cg_type_is_aggregate(g->c, rty); + if (is_aggregate) { + av.storage = v.op; + av.storage.type = rty; + av.size = abi_cg_sizeof(g->c->abi, rty); + T->ret(T, &av); + return; + } + if (api_is_wide16_scalar_type(g->c, rty)) { + ApiSValue lv = api_wide16_materialize_lvalue(g, &v, rty); + av.storage = lv.op; + av.storage.type = rty; + av.size = 16; + T->ret(T, &av); + return; + } + if (api_sv_op_is(&v, OPK_IMM)) { + ret_op = v.op; + ret_op.type = rty; + av.storage = ret_op; + T->ret(T, &av); + api_release(g, &v); + return; + } + ret_op = api_force_reg(g, &v, rty); + av.storage = ret_op; + T->ret(T, &av); + api_release(g, &v); +} + +void cfree_cg_ret_void(CfreeCg* g) { + if (!g) return; + g->target->ret(g->target, NULL); +} + +/* ============================================================ + * Data definitions (stubs) + * ============================================================ */ diff --git a/src/cg/control.c b/src/cg/control.c @@ -0,0 +1,698 @@ +#include "cg/internal.h" + +CfreeCgLabel cfree_cg_label_new(CfreeCg* g) { + if (!g) return CFREE_CG_LABEL_NONE; + return (CfreeCgLabel)g->target->label_new(g->target); +} + +void cfree_cg_label_place(CfreeCg* g, CfreeCgLabel label) { + if (!g) return; + api_local_const_control_boundary(g); + g->target->label_place(g->target, (Label)label); +} + +void cfree_cg_jump(CfreeCg* g, CfreeCgLabel label) { + if (!g) return; + api_local_const_control_boundary(g); + g->target->jump(g->target, (Label)label); +} + +void api_branch_if(CfreeCg* g, ApiSValue* v, int branch_when_true, + Label label) { + CGTarget* T; + CfreeCgTypeId ty; + if (!g) return; + api_local_const_control_boundary(g); + T = g->target; + ty = v->type ? v->type : builtin_id(CFREE_CG_BUILTIN_I32); + if (v->op.kind == OPK_IMM && v->kind == SV_OPERAND) { + if ((v->op.v.imm != 0) == !!branch_when_true) T->jump(T, label); + api_release(g, v); + return; + } + if (v->kind == SV_CMP) { + CmpOp op = branch_when_true ? v->delayed.cmp.op + : api_invert_cmp(v->delayed.cmp.op); + T->cmp_branch(T, op, v->delayed.cmp.a, v->delayed.cmp.b, label); + api_release(g, v); + return; + } + if (api_is_i128_type(g->c, ty)) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + CfreeCgTypeId ps[2] = {i128, i128}; + ApiSValue args[2]; + ApiSValue cmp; + args[0] = *v; + args[1] = api_make_sv(api_op_imm(0, i128), i128); + api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); + cmp = api_pop(g); + api_branch_if(g, &cmp, branch_when_true, label); + return; + } + { + Operand a = api_force_reg(g, v, ty); + Operand zero = api_op_imm(0, ty); + T->cmp_branch(T, branch_when_true ? CMP_NE : CMP_EQ, a, zero, label); + api_release(g, v); + } +} + +void cfree_cg_branch_true(CfreeCg* g, CfreeCgLabel label) { + ApiSValue v; + if (!g) return; + v = api_pop(g); + api_branch_if(g, &v, 1, (Label)label); +} + +void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label) { + ApiSValue v; + if (!g) return; + v = api_pop(g); + api_branch_if(g, &v, 0, (Label)label); +} + +void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { + ApiSValue selector; + CfreeCgTypeId ty; + Operand sel; + if (!g) return; + if (g->sp == 0) return; + api_local_const_control_boundary(g); + selector = api_pop(g); + ty = resolve_type(g->c, sw.selector_type); + if (!ty) ty = api_sv_type(&selector); + sel = api_force_reg_unless_imm(g, &selector, ty); + for (u32 i = 0; i < sw.ncases; ++i) { + Operand imm = api_op_imm((i64)sw.cases[i].value, ty); + g->target->cmp_branch(g->target, CMP_EQ, sel, imm, + (Label)sw.cases[i].label); + } + if (sw.default_label != CFREE_CG_LABEL_NONE) { + g->target->jump(g->target, (Label)sw.default_label); + } + api_release(g, &selector); +} + +void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label, + CfreeCgTypeId ptr_type) { + CfreeCgTypeId ty; + if (!g) return; + ty = resolve_type(g->c, ptr_type); + if (!ty) return; + api_push(g, api_make_sv(api_op_imm((i64)label, ty), ty)); +} + +void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, + uint32_t ntargets) { + ApiSValue target; + CfreeCgTypeId target_ty; + Operand target_op; + if (!g) return; + api_local_const_control_boundary(g); + target = api_pop(g); + target_ty = api_sv_type(&target); + target_op = api_force_reg(g, &target, target_ty); + for (uint32_t i = 0; i < ntargets; ++i) { + Operand imm = api_op_imm((i64)valid_targets[i], target_ty); + g->target->cmp_branch(g->target, CMP_EQ, target_op, imm, + (Label)valid_targets[i]); + } + api_release(g, &target); + g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); +} + +void cfree_cg_unreachable(CfreeCg* g) { + if (!g) return; + api_local_const_control_boundary(g); + g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); +} + +/* ============================================================ + * Scopes / structured control flow + * ============================================================ */ + +CfreeCgScope api_scope_handle(u32 idx, u32 generation) { + return (CfreeCgScope)((generation << 8) | ((idx + 1u) & 0xffu)); +} + +ApiCgScope* api_scope_from_handle(CfreeCg* g, CfreeCgScope scope, + int require_top, const char* who) { + u32 slot; + u32 generation; + ApiCgScope* s; + if (!g || scope == 0) return NULL; + slot = ((u32)scope & 0xffu); + generation = ((u32)scope >> 8); + if (slot == 0 || slot > API_CG_MAX_SCOPES) { + compiler_panic(g->c, g->cur_loc, "%s: invalid scope handle", who); + return NULL; + } + slot--; + if (slot >= g->nscopes) { + compiler_panic(g->c, g->cur_loc, "%s: stale scope handle", who); + return NULL; + } + if (require_top && slot + 1u != g->nscopes) { + compiler_panic(g->c, g->cur_loc, "%s: non-LIFO scope end", who); + return NULL; + } + s = &g->scopes[slot]; + if (!s->active || s->generation != generation) { + compiler_panic(g->c, g->cur_loc, "%s: stale scope handle", who); + return NULL; + } + return s; +} + +int api_scope_has_result(const ApiCgScope* s) { + return s->result_type != CFREE_CG_TYPE_NONE; +} + +void api_scope_store_result(CfreeCg* g, ApiCgScope* s, ApiSValue* result) { + Operand dst; + Operand src; + if (!api_scope_has_result(s)) return; + dst = api_op_local(s->result_slot, s->result_type); + src = api_sv_op_is_reg_or_imm(result) + ? result->op + : api_force_reg(g, result, s->result_type); + g->target->store(g->target, dst, src, + api_mem_for_lvalue(g, &dst, s->result_type)); + api_release(g, result); +} + +void api_scope_push_result(CfreeCg* g, ApiCgScope* s) { + Operand dst; + Operand src; + Reg r; + if (!api_scope_has_result(s)) return; + r = api_alloc_reg_or_spill(g, api_type_class(s->result_type), s->result_type); + dst = api_op_reg(r, s->result_type); + src = api_op_local(s->result_slot, s->result_type); + g->target->load(g->target, dst, src, + api_mem_for_lvalue(g, &src, s->result_type)); + api_push(g, api_make_sv(dst, s->result_type)); +} + +CfreeCgScope cfree_cg_scope_begin(CfreeCg* g, CfreeCgTypeId result_type) { + Label break_lbl, cont_lbl; + CGScopeDesc d; + ApiCgScope* s; + CGScope target_scope; + u32 idx; + if (!g) return 0; + break_lbl = g->target->label_new(g->target); + cont_lbl = g->target->label_new(g->target); + api_local_const_control_boundary(g); + g->target->label_place(g->target, cont_lbl); + + if (g->nscopes >= API_CG_MAX_SCOPES) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: too many nested scopes"); + return 0; + } + idx = g->nscopes; + s = &g->scopes[idx]; + s->break_lbl = break_lbl; + s->continue_lbl = cont_lbl; + s->result_type = resolve_type(g->c, result_type); + s->generation = ++g->scope_generation; + if (s->generation == 0) s->generation = ++g->scope_generation; + s->active = 1; + g->nscopes++; + + memset(&d, 0, sizeof d); + d.kind = (u8)SCOPE_LOOP; + d.break_label = break_lbl; + d.continue_label = cont_lbl; + d.result_type = s->result_type; + target_scope = g->target->scope_begin(g->target, &d); + s->target_scope = target_scope; + s->result_slot = FRAME_SLOT_NONE; + if (api_scope_has_result(s)) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = s->result_type; + fsd.size = abi_cg_sizeof(g->c->abi, result_type); + fsd.align = abi_cg_alignof(g->c->abi, result_type); + fsd.kind = FS_LOCAL; + s->result_slot = g->target->frame_slot(g->target, &fsd); + } + + return api_scope_handle(idx, s->generation); +} + +void cfree_cg_scope_end(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s = api_scope_from_handle(g, scope, 1, "CfreeCg: scope_end"); + if (!s) return; + if (api_scope_has_result(s)) { + ApiSValue result = api_pop(g); + api_scope_store_result(g, s, &result); + } + api_local_const_control_boundary(g); + g->target->label_place(g->target, s->break_lbl); + g->target->scope_end(g->target, s->target_scope); + api_scope_push_result(g, s); + s->active = 0; + g->nscopes--; +} + +void cfree_cg_break(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s = api_scope_from_handle(g, scope, 0, "CfreeCg: break"); + if (!s) return; + if (api_scope_has_result(s)) { + ApiSValue result = api_pop(g); + api_scope_store_result(g, s, &result); + } + api_local_const_control_boundary(g); + g->target->jump(g->target, s->break_lbl); +} + +void cfree_cg_break_true(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s; + ApiSValue cond; + if (!g || scope == 0) return; + s = api_scope_from_handle(g, scope, 0, "CfreeCg: break_true"); + if (!s) return; + cond = api_pop(g); + + if (api_scope_has_result(s)) { + ApiSValue result = api_pop(g); + if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { + if (cond.op.v.imm != 0) { + api_scope_store_result(g, s, &result); + api_local_const_control_boundary(g); + g->target->jump(g->target, s->break_lbl); + } else { + api_release(g, &result); + } + api_release(g, &cond); + } else { + Label skip = g->target->label_new(g->target); + api_branch_if(g, &cond, 0, skip); + api_scope_store_result(g, s, &result); + api_local_const_control_boundary(g); + g->target->jump(g->target, s->break_lbl); + api_local_const_control_boundary(g); + g->target->label_place(g->target, skip); + } + } else { + api_branch_if(g, &cond, 1, s->break_lbl); + } +} + +void cfree_cg_break_false(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s; + ApiSValue cond; + if (!g || scope == 0) return; + s = api_scope_from_handle(g, scope, 0, "CfreeCg: break_false"); + if (!s) return; + cond = api_pop(g); + + if (api_scope_has_result(s)) { + ApiSValue result = api_pop(g); + if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { + if (cond.op.v.imm == 0) { + api_scope_store_result(g, s, &result); + api_local_const_control_boundary(g); + g->target->jump(g->target, s->break_lbl); + } else { + api_release(g, &result); + } + api_release(g, &cond); + } else { + Label skip = g->target->label_new(g->target); + api_branch_if(g, &cond, 1, skip); + api_scope_store_result(g, s, &result); + api_local_const_control_boundary(g); + g->target->jump(g->target, s->break_lbl); + api_local_const_control_boundary(g); + g->target->label_place(g->target, skip); + } + } else { + api_branch_if(g, &cond, 0, s->break_lbl); + } +} + +void cfree_cg_continue(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue"); + if (!s) return; + api_local_const_control_boundary(g); + g->target->jump(g->target, s->continue_lbl); +} + +void cfree_cg_continue_true(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s; + ApiSValue v; + if (!g || scope == 0) return; + s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue_true"); + if (!s) return; + v = api_pop(g); + api_branch_if(g, &v, 1, s->continue_lbl); +} + +void cfree_cg_continue_false(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s; + ApiSValue v; + if (!g || scope == 0) return; + s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue_false"); + if (!s) return; + v = api_pop(g); + api_branch_if(g, &v, 0, s->continue_lbl); +} + +/* ============================================================ + * Dynamic stack allocation / variadics (stubs) + * ============================================================ */ + +void cfree_cg_alloca(CfreeCg* g, uint32_t align, + CfreeCgTypeId result_ptr_type) { + ApiSValue sz; + CGTarget* T; + CfreeCgTypeId pty; + Operand sz_op; + Reg rr; + Operand dst; + if (!g) return; + T = g->target; + sz = api_pop(g); + pty = resolve_type(g->c, result_ptr_type); + if (!pty) pty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); + sz_op = api_sv_op_is(&sz, OPK_IMM) ? sz.op + : api_force_reg(g, &sz, api_sv_type(&sz)); + rr = api_alloc_reg_or_spill(g, RC_INT, pty); + dst = api_op_reg(rr, pty); + T->alloca_(T, dst, sz_op, align ? align : 16); + api_release(g, &sz); + api_push(g, api_make_sv(dst, pty)); +} + +void cfree_cg_vararg_start(CfreeCg* g) { + ApiSValue ap; + CGTarget* T; + Operand ap_op; + if (!g) return; + T = g->target; + ap = api_pop(g); + ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); + T->va_start_(T, ap_op); + api_release(g, &ap); +} + +void cfree_cg_vararg_next(CfreeCg* g, CfreeCgTypeId type) { + ApiSValue ap; + CGTarget* T; + CfreeCgTypeId ty; + Operand ap_op; + Reg rr; + Operand dst; + if (!g) return; + T = g->target; + ty = resolve_type(g->c, type); + if (!ty) return; + ap = api_pop(g); + ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); + rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); + dst = api_op_reg(rr, ty); + T->va_arg_(T, dst, ap_op, ty); + api_release(g, &ap); + api_push(g, api_make_sv(dst, ty)); +} + +void cfree_cg_vararg_end(CfreeCg* g) { + ApiSValue ap; + CGTarget* T; + Operand ap_op; + if (!g) return; + T = g->target; + ap = api_pop(g); + ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); + T->va_end_(T, ap_op); + api_release(g, &ap); +} + +void cfree_cg_vararg_copy(CfreeCg* g) { + ApiSValue src, dst; + CGTarget* T; + Operand src_op, dst_op; + if (!g) return; + T = g->target; + src = api_pop(g); + dst = api_pop(g); + src_op = api_force_reg(g, &src, api_sv_type(&src)); + dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); + T->va_copy_(T, dst_op, src_op); + api_release(g, &src); + api_release(g, &dst); +} + +/* ============================================================ + * Memory operations (stubs) + * ============================================================ */ + +void cfree_cg_memcpy(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, + CfreeCgMemAccess src_access) { + ApiSValue src, dst; + CGTarget* T; + AggregateAccess agg; + Operand dst_op, src_op; + if (!g) return; + api_local_const_memory_boundary(g); + (void)src_access; + if (size > UINT32_MAX) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: memcpy size exceeds CGTarget"); + return; + } + T = g->target; + src = api_pop(g); + dst = api_pop(g); + api_require_pointer_value(g, "memcpy destination", api_sv_type(&dst)); + api_require_pointer_value(g, "memcpy source", api_sv_type(&src)); + dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); + src_op = api_force_reg(g, &src, api_sv_type(&src)); + memset(&agg, 0, sizeof agg); + agg.size = (u32)size; + agg.align = dst_access.align ? dst_access.align : (u32)size; + T->copy_bytes(T, dst_op, src_op, agg); + api_release(g, &dst); + api_release(g, &src); +} + +void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, + CfreeCgMemAccess src_access) { + ApiSValue src, dst; + Operand args[3]; + if (!g) return; + api_local_const_memory_boundary(g); + (void)dst_access; + (void)src_access; + if (size > INT64_MAX) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: memmove size exceeds CGTarget"); + return; + } + src = api_pop(g); + dst = api_pop(g); + api_require_pointer_value(g, "memmove destination", api_sv_type(&dst)); + api_require_pointer_value(g, "memmove source", api_sv_type(&src)); + args[0] = api_force_reg(g, &dst, api_sv_type(&dst)); + args[1] = api_force_reg(g, &src, api_sv_type(&src)); + args[2] = api_op_imm((i64)size, builtin_id(CFREE_CG_BUILTIN_I64)); + g->target->intrinsic(g->target, INTRIN_MEMMOVE, NULL, 0, args, 3); + api_release(g, &dst); + api_release(g, &src); +} + +void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, + CfreeCgMemAccess dst_access) { + ApiSValue dst; + CGTarget* T; + AggregateAccess agg; + Operand dst_op, byte_val; + if (!g) return; + api_local_const_memory_boundary(g); + if (size > UINT32_MAX) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: memset size exceeds CGTarget"); + return; + } + T = g->target; + dst = api_pop(g); + api_require_pointer_value(g, "memset destination", api_sv_type(&dst)); + dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); + byte_val = api_op_imm((i64)val, CFREE_CG_TYPE_NONE); + memset(&agg, 0, sizeof agg); + agg.size = (u32)size; + agg.align = dst_access.align ? dst_access.align : (u32)size; + T->set_bytes(T, dst_op, byte_val, agg); + api_release(g, &dst); +} + +void cfree_cg_index(CfreeCg* g, uint64_t offset) { + ApiSValue idx, base; + CGTarget* T; + CfreeCgTypeId base_ty, base_ptr_ty, elem_ty, idx_ty; + const CgType* base_info; + u32 elemsz; + int free_base_op = 0; + Operand base_op, idx_op, result; + Reg rr; + if (!g) return; + if (offset > INT64_MAX) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: index offset too large"); + return; + } + T = g->target; + idx = api_pop(g); + base = api_pop(g); + api_ensure_reg(g, &base); + base_ty = api_sv_type(&base); + base_info = cg_type_get(g->c, base_ty); + if (base_info && base_info->kind == CFREE_CG_TYPE_PTR) { + elem_ty = base_info->ptr.pointee; + base_ptr_ty = base_ty; + } else if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY && + api_is_lvalue_sv(&base)) { + elem_ty = base_info->array.elem; + base_ptr_ty = cg_type_ptr_to(g->c, elem_ty); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: index base is not a pointer or array lvalue"); + return; + } + elemsz = (u32)abi_cg_sizeof(g->c->abi, elem_ty); + idx_ty = idx.type ? idx.type : idx.op.type; + if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I32); + if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY) { + rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); + base_op = api_op_reg(rr, base_ptr_ty); + T->addr_of(T, base_op, base.op); + api_release(g, &base); + free_base_op = 1; + } else { + base_op = api_force_reg(g, &base, base_ptr_ty); + } + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); + result = api_op_reg(rr, base_ptr_ty); + if (idx_op.kind == OPK_IMM) { + i64 total_offset = idx_op.v.imm * (i64)elemsz + (i64)offset; + T->binop(T, BO_IADD, result, base_op, + api_op_imm(total_offset, base_ptr_ty)); + } else { + Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); + Operand scaled = api_op_reg(sr, idx_ty); + T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty)); + if (offset > 0) { + T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); + } + T->binop(T, BO_IADD, result, base_op, scaled); + api_free_reg(g, sr, RC_INT); + } + if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT); + if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) + api_release(g, &base); + api_release(g, &idx); + api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty)); +} + +void cfree_cg_field(CfreeCg* g, uint32_t field_index) { + ApiSValue base; + CGTarget* T; + CfreeCgTypeId rec_ty; + CfreeCgTypeId field_ty; + CfreeCgTypeId rec_ptr_ty; + const CgType* rec_info; + const ABIRecordLayout* layout; + u32 field_offset; + Operand result; + Reg rr; + if (!g) return; + T = g->target; + base = api_pop(g); + api_ensure_reg(g, &base); + rec_ty = api_sv_type(&base); + if (!api_is_lvalue_sv(&base)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: field base is not an lvalue"); + return; + } + layout = abi_cg_record_layout(g->c->abi, rec_ty); + if (!layout || field_index >= layout->nfields) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid field index"); + return; + } + rec_info = cg_type_get(g->c, rec_ty); + if (!rec_info || rec_info->kind != CFREE_CG_TYPE_RECORD || + field_index >= rec_info->record.nfields) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: invalid record base"); + return; + } + field_ty = rec_info->record.fields[field_index].type; + rec_ptr_ty = cg_type_ptr_to(g->c, rec_ty); + field_offset = layout->fields[field_index].offset; + if (layout->fields[field_index].bit_width != 0 || + (rec_info->record.fields[field_index].flags & CFREE_CG_FIELD_BITFIELD) != + 0) { + Operand base_addr; + ApiSValue sv; + BitFieldAccess bf; + if (layout->fields[field_index].bit_width == 0) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: zero-width bit-field access"); + api_release(g, &base); + return; + } + base_addr = api_lvalue_addr(g, &base, rec_ptr_ty); + memset(&bf, 0, sizeof bf); + bf.field_type = field_ty; + bf.storage = api_mem_for_lvalue(g, &base_addr, field_ty); + bf.storage.size = layout->fields[field_index].storage_size; + bf.storage_offset = layout->fields[field_index].offset; + bf.bit_offset = layout->fields[field_index].bit_offset; + bf.bit_width = layout->fields[field_index].bit_width; + bf.signed_ = rec_info->record.fields[field_index].bit_signed != 0; + sv = api_make_lv(base_addr, field_ty); + sv.bitfield_lvalue = 1; + sv.delayed.bitfield = bf; + api_release(g, &base); + api_push(g, sv); + return; + } + if (base.op.kind == OPK_GLOBAL) { + result = + api_op_global(base.op.v.global.sym, + base.op.v.global.addend + (i64)field_offset, field_ty); + api_push(g, api_make_lv(result, field_ty)); + } else if (base.op.kind == OPK_INDIRECT && field_offset <= (u32)INT32_MAX && + base.op.v.ind.ofs <= INT32_MAX - (i32)field_offset) { + result = api_op_indirect(base.op.v.ind.base, + base.op.v.ind.ofs + (i32)field_offset, field_ty); + api_push(g, api_make_lv(result, field_ty)); + } else { + Operand base_addr; + rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); + base_addr = api_op_reg(rr, rec_ptr_ty); + T->addr_of(T, base_addr, base.op); + api_release(g, &base); + if (field_offset == 0) { + result = base_addr; + } else { + Reg fr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); + result = api_op_reg(fr, rec_ptr_ty); + T->binop(T, BO_IADD, result, base_addr, + api_op_imm((i64)field_offset, rec_ptr_ty)); + api_free_reg(g, base_addr.v.reg, RC_INT); + } + api_push(g, + api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); + } +} + +/* ============================================================ + * Calls / return + * ============================================================ */ + +/* Shared scaffolding for cfree_cg_call / cfree_cg_call_symbol. The two + * public entry points differ only in how the callee is obtained and in + * their pre-call stack-depth check; everything else (arg packaging, return + * storage allocation, post-call release, result push) is identical. These + * helpers carry the common shape and are the natural targets for any future + * change that wants to vary call-shape policy (e.g. an ABI-driven storage + * decision). */ diff --git a/src/cg/data.c b/src/cg/data.c @@ -0,0 +1,292 @@ +#include "cg/internal.h" + +void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, + CfreeCgDataDefAttrs attrs) { + Compiler* c; + ObjBuilder* ob; + ObjSymId sym; + CfreeCgTypeId ty; + u32 align; + SecKind sec_kind; + u16 sec_flags; + Sym sec_name_sym; + ObjSecId sec; + CfreeCgDecl decl_attrs; + if (!g) return; + c = g->c; + ob = g->obj; + sym = (ObjSymId)cg_sym; + ty = api_sym_type(g, cg_sym); + if (!ty) return; + decl_attrs = api_sym_attrs(g, cg_sym); + align = + attrs.align ? attrs.align : (u32)abi_cg_alignof(c->abi, decl_attrs.type); + if (!attrs.section && decl_attrs.as.object.section) { + attrs.section = decl_attrs.as.object.section; + } + if ((decl_attrs.as.object.flags & CFREE_CG_OBJ_TLS) && + (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL)) { + sec_kind = SEC_BSS; + sec_flags = SF_ALLOC | SF_WRITE | SF_TLS; + sec_name_sym = attrs.section ? (Sym)attrs.section : obj_secname_tbss(c); + } else if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { + sec_kind = SEC_BSS; + sec_flags = SF_ALLOC | SF_WRITE; + sec_name_sym = attrs.section ? (Sym)attrs.section + : pool_intern_cstr(c->global, ".bss"); + } else if (attrs.section) { + sec_name_sym = (Sym)attrs.section; + if (attrs.flags & CFREE_CG_DATADEF_READONLY) { + sec_kind = SEC_RODATA; + sec_flags = SF_ALLOC; + } else { + sec_kind = SEC_OTHER; + sec_flags = SF_ALLOC | SF_WRITE; + } + } else if ((attrs.flags & CFREE_CG_DATADEF_READONLY) || + (decl_attrs.as.object.flags & CFREE_CG_OBJ_READONLY)) { + sec_kind = SEC_RODATA; + sec_flags = SF_ALLOC; + sec_name_sym = pool_intern_cstr(c->global, ".rodata"); + } else if (decl_attrs.as.object.flags & CFREE_CG_OBJ_TLS) { + sec_kind = SEC_DATA; + sec_flags = SF_ALLOC | SF_WRITE | SF_TLS; + sec_name_sym = obj_secname_tdata(c); + } else { + sec_kind = SEC_DATA; + sec_flags = SF_ALLOC | SF_WRITE; + sec_name_sym = pool_intern_cstr(c->global, ".data"); + } + if (attrs.flags & CFREE_CG_DATADEF_RETAIN) sec_flags |= SF_RETAIN; + if (attrs.flags & CFREE_CG_DATADEF_MERGE) sec_flags |= SF_MERGE; + if (attrs.flags & CFREE_CG_DATADEF_STRINGS) sec_flags |= SF_STRINGS; + if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { + sec = obj_section_ex(ob, sec_name_sym, sec_kind, SSEM_NOBITS, sec_flags, + align, 0, OBJ_SEC_NONE, 0); + } else if (attrs.entsize) { + sec = obj_section_ex(ob, sec_name_sym, sec_kind, SSEM_PROGBITS, sec_flags, + align, attrs.entsize, OBJ_SEC_NONE, 0); + } else { + sec = obj_section(ob, sec_name_sym, sec_kind, sec_flags, align); + } + g->data_sec = sec; + g->data_sym = sym; + g->data_base = obj_align_to(ob, sec, align); + g->data_size = 0; + if (sym != OBJ_SYM_NONE) { + obj_symbol_define(ob, sym, sec, (u64)g->data_base, + (u64)abi_cg_sizeof(c->abi, decl_attrs.type)); + } +} + +void cfree_cg_data_common(CfreeCg* g, CfreeCgSym cg_sym, uint64_t size, + uint32_t align) { + ObjSym* osym; + ObjSymId sym; + CfreeCgDecl decl_attrs; + if (!g || cg_sym == CFREE_CG_SYM_NONE) return; + sym = (ObjSymId)cg_sym; + osym = (ObjSym*)obj_symbol_get(g->obj, sym); + if (!osym) return; + decl_attrs = api_sym_attrs(g, cg_sym); + osym->bind = api_map_bind(decl_attrs.sym.bind); + osym->vis = api_map_vis(decl_attrs.sym.visibility); + osym->kind = SK_COMMON; + osym->section_id = OBJ_SEC_NONE; + osym->value = 0; + osym->size = size; + osym->common_align = align; +} + +void cfree_cg_data_align(CfreeCg* g, uint32_t align) { + if (!g || g->data_sec == OBJ_SEC_NONE || !align) return; + g->data_size = obj_align_to(g->obj, g->data_sec, align) - g->data_base; +} + +void cfree_cg_data_pad(CfreeCg* g, uint64_t size, uint8_t value) { + u8 pad[64]; + if (!g || !size) return; + memset(pad, value, sizeof(pad)); + while (size >= sizeof(pad)) { + obj_write(g->obj, g->data_sec, pad, sizeof(pad)); + size -= sizeof(pad); + g->data_size += sizeof(pad); + } + if (size) { + obj_write(g->obj, g->data_sec, pad, (size_t)size); + g->data_size += size; + } +} + +void cfree_cg_data_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type) { + CfreeCgTypeId ty; + u32 size; + u8 bytes[8]; + if (!g) return; + ty = resolve_type(g->c, type); + if (!ty) return; + size = (u32)abi_cg_sizeof(g->c->abi, type); + if (size > sizeof(bytes)) return; + for (u32 i = 0; i < size; ++i) { + u32 shift = g->c->target.big_endian ? (size - 1u - i) * 8u : i * 8u; + bytes[i] = (u8)(value >> shift); + } + cfree_cg_data_bytes(g, bytes, size); +} + +void cfree_cg_data_float(CfreeCg* g, double value, CfreeCgTypeId type) { + CfreeCgTypeId ty; + union { + float f; + double d; + u8 b[8]; + } u; + if (!g) return; + ty = resolve_type(g->c, type); + if (!ty) return; + if (api_is_f128_type(g->c, ty)) { + u8 bytes[16]; + api_encode_binary128_from_double(g, value, bytes); + cfree_cg_data_bytes(g, bytes, sizeof bytes); + return; + } + if (ty == builtin_id(CFREE_CG_BUILTIN_F32)) { + u.f = (float)value; + if (g->c->target.big_endian) { + u8 t = u.b[0]; + u.b[0] = u.b[3]; + u.b[3] = t; + t = u.b[1]; + u.b[1] = u.b[2]; + u.b[2] = t; + } + cfree_cg_data_bytes(g, u.b, 4); + } else if (ty == builtin_id(CFREE_CG_BUILTIN_F64)) { + u.d = value; + if (g->c->target.big_endian) { + for (u32 i = 0; i < 4; ++i) { + u8 t = u.b[i]; + u.b[i] = u.b[7u - i]; + u.b[7u - i] = t; + } + } + cfree_cg_data_bytes(g, u.b, 8); + } +} + +void cfree_cg_data_bytes(CfreeCg* g, const uint8_t* data, size_t len) { + if (!g || !len) return; + obj_write(g->obj, g->data_sec, data, len); + g->data_size += len; +} + +void cfree_cg_data_zero(CfreeCg* g, uint64_t size) { + const Section* sec; + if (!g || !size) return; + sec = obj_section_get(g->obj, g->data_sec); + if (sec && (sec->kind == SEC_BSS || sec->sem == SSEM_NOBITS)) { + obj_reserve_bss(g->obj, g->data_sec, + g->data_base + (u32)(g->data_size + size), 0); + g->data_size += size; + return; + } + { + u8 pad[64]; + memset(pad, 0, sizeof pad); + u64 remaining = size; + while (remaining >= sizeof pad) { + obj_write(g->obj, g->data_sec, pad, sizeof pad); + remaining -= sizeof pad; + } + if (remaining) obj_write(g->obj, g->data_sec, pad, (size_t)remaining); + } + g->data_size += size; +} + +void api_cg_data_reloc(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width, int pcrel) { + ObjBuilder* ob; + RelocKind rk; + u8 pad[8]; + if (!g || !width || width > sizeof(pad)) return; + ob = g->obj; + rk = api_data_reloc_kind(pcrel, width); + if (rk == R_NONE) return; + memset(pad, 0, sizeof pad); + obj_write(ob, g->data_sec, pad, width); + obj_reloc(ob, g->data_sec, g->data_base + (u32)g->data_size, rk, + (ObjSymId)target, addend); + g->data_size += width; +} + +void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width, uint32_t address_space) { + (void)address_space; + api_cg_data_reloc(g, target, addend, width, 0); +} + +void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, + uint32_t width, uint32_t address_space) { + u8 pad[8]; + (void)address_space; + if (!g || !width || width > sizeof(pad)) return; + memset(pad, 0, sizeof(pad)); + for (u32 i = 0; i < width; ++i) { + u32 shift = g->c->target.big_endian ? (width - 1u - i) * 8u : i * 8u; + pad[i] = (u8)(((uint64_t)target + (uint64_t)addend) >> shift); + } + obj_write(g->obj, g->data_sec, pad, width); + g->data_size += width; +} + +void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width) { + api_cg_data_reloc(g, target, addend, width, 1); +} + +void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs, + int64_t addend, uint32_t width) { + u8 pad[8]; + RelocKind add_kind; + RelocKind sub_kind; + if (!g || width > sizeof(pad)) return; + switch (width) { + case 1: + add_kind = R_RV_ADD8; + sub_kind = R_RV_SUB8; + break; + case 2: + add_kind = R_RV_ADD16; + sub_kind = R_RV_SUB16; + break; + case 4: + add_kind = R_RV_ADD32; + sub_kind = R_RV_SUB32; + break; + case 8: + add_kind = R_RV_ADD64; + sub_kind = R_RV_SUB64; + break; + default: + return; + } + memset(pad, 0, sizeof(pad)); + obj_write(g->obj, g->data_sec, pad, width); + obj_reloc(g->obj, g->data_sec, g->data_base + (u32)g->data_size, add_kind, + (ObjSymId)lhs, addend); + obj_reloc(g->obj, g->data_sec, g->data_base + (u32)g->data_size, sub_kind, + (ObjSymId)rhs, 0); + g->data_size += width; +} + +void cfree_cg_data_end(CfreeCg* g) { + if (!g) return; + if (g->data_sym != OBJ_SYM_NONE) { + obj_symbol_define(g->obj, g->data_sym, g->data_sec, g->data_base, + g->data_size); + } + g->data_sec = OBJ_SEC_NONE; + g->data_sym = OBJ_SYM_NONE; + g->data_base = 0; + g->data_size = 0; +} diff --git a/src/cg/debug.c b/src/cg/debug.c @@ -0,0 +1,84 @@ +#include "cg/internal.h" + +DebugTypeId api_debug_type(CfreeCg* g, CfreeCgTypeId id) { + const CgType* ty; + if (!g || !g->debug) return DEBUG_TYPE_NONE; + ty = cg_type_get(g->c, id); + if (!ty) return DEBUG_TYPE_NONE; + switch (ty->kind) { + case CFREE_CG_TYPE_VOID: + return debug_type_void(g->debug); + case CFREE_CG_TYPE_BOOL: + return debug_type_base(g->debug, pool_intern_cstr(g->c->global, "_Bool"), + DEBUG_BE_BOOL, 1); + case CFREE_CG_TYPE_INT: { + const char* name = "long long"; + if (ty->integer.width <= 8) + name = "char"; + else if (ty->integer.width <= 16) + name = "short"; + else if (ty->integer.width <= 32) + name = "int"; + return debug_type_base(g->debug, pool_intern_cstr(g->c->global, name), + DEBUG_BE_SIGNED, + (u32)((ty->integer.width + 7u) / 8u)); + } + case CFREE_CG_TYPE_FLOAT: { + const char* name = ty->fp.width <= 32 ? "float" : "double"; + return debug_type_base(g->debug, pool_intern_cstr(g->c->global, name), + DEBUG_BE_FLOAT, (u32)((ty->fp.width + 7u) / 8u)); + } + case CFREE_CG_TYPE_PTR: { + DebugTypeId pointee = api_debug_type(g, ty->ptr.pointee); + if (pointee == DEBUG_TYPE_NONE) pointee = debug_type_void(g->debug); + return debug_type_ptr(g->debug, pointee); + } + case CFREE_CG_TYPE_ARRAY: { + DebugTypeId elem = api_debug_type(g, ty->array.elem); + u32 count = ty->array.count > UINT32_MAX ? 0u : (u32)ty->array.count; + if (elem == DEBUG_TYPE_NONE) elem = debug_type_void(g->debug); + return debug_type_array(g->debug, elem, count); + } + case CFREE_CG_TYPE_FUNC: { + Heap* h = (Heap*)g->c->ctx->heap; + DebugTypeId ret = api_debug_type(g, ty->func.ret); + DebugTypeId* params = NULL; + DebugTypeId fn; + if (ret == DEBUG_TYPE_NONE) ret = debug_type_void(g->debug); + if (ty->func.nparams) { + params = (DebugTypeId*)h->alloc(h, sizeof(*params) * ty->func.nparams, + _Alignof(DebugTypeId)); + if (!params) return DEBUG_TYPE_NONE; + for (u32 i = 0; i < ty->func.nparams; ++i) { + params[i] = api_debug_type(g, ty->func.params[i].type); + if (params[i] == DEBUG_TYPE_NONE) + params[i] = debug_type_void(g->debug); + } + } + fn = debug_type_func(g->debug, ret, params, ty->func.nparams, + ty->func.abi_variadic); + if (params) h->free(h, params, sizeof(*params) * ty->func.nparams); + return fn; + } + case CFREE_CG_TYPE_RECORD: { + DebugTypeBuilder* b = debug_type_record_begin( + g->debug, (Sym)ty->record.tag, ty->record.is_union, (u32)ty->size, + ty->align); + if (!b) return DEBUG_TYPE_NONE; + return debug_type_record_end(b); + } + case CFREE_CG_TYPE_ENUM: + return debug_type_base(g->debug, pool_intern_cstr(g->c->global, "int"), + DEBUG_BE_SIGNED, ty->size ? (u32)ty->size : 4u); + case CFREE_CG_TYPE_ALIAS: { + DebugTypeId base = api_debug_type(g, ty->alias.base); + if (base == DEBUG_TYPE_NONE) base = debug_type_void(g->debug); + return debug_type_typedef(g->debug, (Sym)ty->alias.name, base); + } + case CFREE_CG_TYPE_VARARG_STATE: + return debug_type_void(g->debug); + } + return DEBUG_TYPE_NONE; +} + +/* ---- value stack helpers ---- */ diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -0,0 +1,490 @@ +#ifndef CFREE_CG_INTERNAL_H +#define CFREE_CG_INTERNAL_H + +#include <cfree/cg.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "abi/abi.h" +#include "arch/arch.h" +#include "arch/regalloc.h" +#include "asm/asm.h" +#include "asm/asm_lex.h" +#include "cg/type.h" +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/segvec.h" +#include "debug/debug.h" +#include "obj/obj.h" +#include "opt/opt.h" + +typedef struct CGTarget CGTarget; +typedef struct MCEmitter MCEmitter; +typedef uint32_t ObjSymId; + +typedef enum SResidency { + RES_INHERENT, + RES_REG, + RES_SPILLED, + RES_FIXED_REG, +} SResidency; + +typedef enum ApiSValueKind { + SV_OPERAND, + SV_CMP, + SV_ARITH, +} ApiSValueKind; + +typedef enum ApiDelayedArithKind { + API_DELAYED_UNOP, + API_DELAYED_BINOP, +} ApiDelayedArithKind; + +typedef struct ApiDelayedCmp { + Operand a; + Operand b; + CmpOp op; + u8 a_owned; + u8 b_owned; + u8 pad[2]; +} ApiDelayedCmp; + +typedef struct ApiDelayedArith { + Operand a; + Operand b; + BinOp bin_op; + UnOp un_op; + u8 kind; + u8 a_owned; + u8 b_owned; + u8 pad; +} ApiDelayedArith; + +typedef struct ApiSValue { + Operand op; + union { + ApiDelayedCmp cmp; + ApiDelayedArith arith; + BitFieldAccess bitfield; + } delayed; + CfreeCgTypeId type; + u8 kind; + u8 res; + u8 pinned; + u8 lvalue; + u8 bitfield_lvalue; + FrameSlot spill_slot; + CfreeCgLocal source_local; +} ApiSValue; + +#define API_CG_STACK_INITIAL 16u + +typedef struct ApiCgScope { + Label break_lbl; + Label continue_lbl; + CGScope target_scope; + CfreeCgTypeId result_type; + FrameSlot result_slot; + u32 generation; + u8 active; + u8 pad[3]; +} ApiCgScope; + +#define API_CG_MAX_SCOPES 64 + +typedef enum ApiSourceLocalKind { + API_SOURCE_LOCAL_AUTO, + API_SOURCE_LOCAL_PARAM, +} ApiSourceLocalKind; + +typedef struct ApiSourceLocal { + CfreeCgTypeId type; + CfreeSym name; + CfreeCgLocalAttrs attrs; + SrcLoc loc; + CGLocalDesc desc; + CGLocalStorage storage; + i64 const_value; + u32 param_index; + u8 kind; + u8 const_valid; + u8 pad[2]; +} ApiSourceLocal; + +struct CfreeCg { + Compiler* c; + ObjBuilder* obj; + CGTarget* target; + MCEmitter* mc; + Debug* debug; + CGSimpleRegAlloc regalloc; + + ApiSValue* stack; + u32 sp; + u32 cap; + + ApiSourceLocal* locals; + u32 nlocals; + u32 locals_cap; + + struct { + FrameSlot* free; + u32 n; + u32 cap; + } slot_pools[3]; + + CGABIValue* avs_in_flight; + u32 avs_in_flight_n; + + CfreeCgTypeId fn_ret_type; + const ABIFuncInfo* fn_abi; + SrcLoc cur_loc; + + CGFuncDesc fn_desc; + CGParamDesc fn_params[64]; + + CfreeCgTypeId* sym_types; + CfreeCgDecl* sym_attrs; + u32 sym_cap; + + ApiCgScope scopes[API_CG_MAX_SCOPES]; + u32 nscopes; + u32 scope_generation; + + u32 rodata_counter; + + ObjSecId data_sec; + ObjSymId data_sym; + u32 data_base; + u64 data_size; +}; + +void cg_api_fini(Compiler*); + +void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags); +void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags); +void api_cg_cmp(CfreeCg* g, CmpOp cop); +void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck); +void cfree_cg_int_binop(CfreeCg* g, CfreeCgIntBinOp op, uint32_t flags); +void cfree_cg_int_unop(CfreeCg* g, CfreeCgIntUnOp op, uint32_t flags); +void cfree_cg_int_cmp(CfreeCg* g, CfreeCgIntCmpOp op); +const char* api_i128_binop_helper(BinOp op); +int api_i128_cmp_is_unsigned(CmpOp op); +const char* api_f128_binop_helper(CfreeCgFpBinOp op); +int api_f128_stack_top(CfreeCg* g, u32 depth); +void api_f128_call_unary(CfreeCg* g, const char* name, CfreeCgTypeId ret, + CfreeCgTypeId param); +void cfree_cg_fp_binop(CfreeCg* g, CfreeCgFpBinOp op, uint32_t flags); +void cfree_cg_fp_unop(CfreeCg* g, CfreeCgFpUnOp op, uint32_t flags); +void cfree_cg_fp_cmp(CfreeCg* g, CfreeCgFpCmpOp op); +void cfree_cg_sext(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_zext(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_trunc(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_ptr_to_int(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_int_to_ptr(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_bitcast(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_fpext(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_fptrunc(CfreeCg* g, CfreeCgTypeId dst); +void cfree_cg_sint_to_float(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding); +void cfree_cg_uint_to_float(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding); +void cfree_cg_float_to_sint(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding); +void cfree_cg_float_to_uint(CfreeCg* g, CfreeCgTypeId dst, + CfreeCgRounding rounding); +IntrinKind api_map_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, + CfreeCgTypeId result_type); +int api_intrinsic_is_void(CfreeCgIntrinsic intrin); +int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin); +void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, + CfreeCgTypeId result_type); +CfreeCgTypeId api_atomic_pointee(CfreeCg* g, CfreeCgTypeId pty, + const char* who); +const char* api_sym_cstr(CfreeCg* g, CfreeSym sym); +int api_asm_parse_match_index(const char* s); +const char* api_asm_constraint_body(const char* s); +int api_asm_is_early_clobber(const char* s); +void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys, RegClass cls); +void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block); +void cfree_cg_file_scope_asm(CfreeCg* g, const char* asm_source, + size_t asm_source_len); +MemAccess api_mem_for_atomic(CfreeCg* g, CfreeCgTypeId val_ty); +int cfree_cg_atomic_is_legal(CfreeCompiler* c, CfreeCgMemAccess access, + CfreeCgMemOrder order); +int cfree_cg_atomic_is_lock_free(CfreeCompiler* c, CfreeCgMemAccess access); +void cfree_cg_atomic_load(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder order); +void cfree_cg_atomic_store(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder order); +void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgAtomicOp op, CfreeCgMemOrder order); +int api_take_dead_owned_reg(ApiSValue* sv, u8 cls, Reg avoid, Reg* out); +Reg api_alloc_dead_input_or_spill(CfreeCg* g, ApiSValue* a, ApiSValue* b, + ApiSValue* c, u8 cls, CfreeCgTypeId ty, + Reg avoid); +void cfree_cg_atomic_cmpxchg(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgMemOrder success, CfreeCgMemOrder failure, + int weak); +void cfree_cg_atomic_fence(CfreeCg* g, CfreeCgMemOrder order); +CGABIValue* api_alloc_call_args(CfreeCg* g, u32 nargs); +void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty, + const ABIFuncInfo* abi, u32 idx); +void api_alloc_call_ret_storage(CfreeCg* g, CGTarget* T, CfreeCgTypeId ret_ty, + Operand* out); +void api_release_call_args(CfreeCg* g, CGABIValue* avs, u32 nargs); +void api_push_call_result(CfreeCg* g, Operand ret_storage, + CfreeCgTypeId ret_ty); +void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, + CfreeCgCallAttrs attrs); +void api_cg_tail_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type); +void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs); +void cfree_cg_call_symbol(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs); +void cfree_cg_ret(CfreeCg* g); +void cfree_cg_ret_void(CfreeCg* g); +CfreeCgLabel cfree_cg_label_new(CfreeCg* g); +void cfree_cg_label_place(CfreeCg* g, CfreeCgLabel label); +void cfree_cg_jump(CfreeCg* g, CfreeCgLabel label); +void api_branch_if(CfreeCg* g, ApiSValue* v, int branch_when_true, Label label); +void cfree_cg_branch_true(CfreeCg* g, CfreeCgLabel label); +void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label); +void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw); +void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label, + CfreeCgTypeId ptr_type); +void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, + uint32_t ntargets); +void cfree_cg_unreachable(CfreeCg* g); +CfreeCgScope api_scope_handle(u32 idx, u32 generation); +ApiCgScope* api_scope_from_handle(CfreeCg* g, CfreeCgScope scope, + int require_top, const char* who); +int api_scope_has_result(const ApiCgScope* s); +void api_scope_store_result(CfreeCg* g, ApiCgScope* s, ApiSValue* result); +void api_scope_push_result(CfreeCg* g, ApiCgScope* s); +CfreeCgScope cfree_cg_scope_begin(CfreeCg* g, CfreeCgTypeId result_type); +void cfree_cg_scope_end(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_break(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_break_true(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_break_false(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_continue(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_continue_true(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_continue_false(CfreeCg* g, CfreeCgScope scope); +void cfree_cg_alloca(CfreeCg* g, uint32_t align, CfreeCgTypeId result_ptr_type); +void cfree_cg_vararg_start(CfreeCg* g); +void cfree_cg_vararg_next(CfreeCg* g, CfreeCgTypeId type); +void cfree_cg_vararg_end(CfreeCg* g); +void cfree_cg_vararg_copy(CfreeCg* g); +void cfree_cg_memcpy(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, + CfreeCgMemAccess src_access); +void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, + CfreeCgMemAccess src_access); +void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, + CfreeCgMemAccess dst_access); +void cfree_cg_index(CfreeCg* g, uint64_t offset); +void cfree_cg_field(CfreeCg* g, uint32_t field_index); +void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, + CfreeCgDataDefAttrs attrs); +void cfree_cg_data_common(CfreeCg* g, CfreeCgSym cg_sym, uint64_t size, + uint32_t align); +void cfree_cg_data_align(CfreeCg* g, uint32_t align); +void cfree_cg_data_pad(CfreeCg* g, uint64_t size, uint8_t value); +void cfree_cg_data_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type); +void cfree_cg_data_float(CfreeCg* g, double value, CfreeCgTypeId type); +void cfree_cg_data_bytes(CfreeCg* g, const uint8_t* data, size_t len); +void cfree_cg_data_zero(CfreeCg* g, uint64_t size); +void api_cg_data_reloc(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width, int pcrel); +void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width, uint32_t address_space); +void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, + uint32_t width, uint32_t address_space); +void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend, + uint32_t width); +void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs, + int64_t addend, uint32_t width); +void cfree_cg_data_end(CfreeCg* g); +DebugTypeId api_debug_type(CfreeCg* g, CfreeCgTypeId id); +int api_source_flags_addr_taken(u32 flags); +int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, + CfreeCgLocalAttrs attrs); +CfreeCgLocal api_local_handle(u32 index); +int api_grow_locals(CfreeCg* g, u32 want); +ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local); +CGLocalStorage api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d); +CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, + CfreeCgLocalAttrs attrs); +CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, + CfreeCgLocalAttrs attrs); +void cfree_cg_push_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type); +void cfree_cg_push_float(CfreeCg* g, double value, CfreeCgTypeId type); +void cfree_cg_push_null(CfreeCg* g, CfreeCgTypeId ptr_type); +CfreeCgSym cfree_cg_const_data(CfreeCg* g, const uint8_t* data, size_t len, + uint32_t align, CfreeCgTypeId pointee_type); +void api_push_frame_lvalue(CfreeCg* g, FrameSlot slot, CfreeCgTypeId type); +void api_push_source_frame_lvalue(CfreeCg* g, CfreeCgLocal local, + FrameSlot slot, CfreeCgTypeId type); +void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg, + CfreeCgTypeId type); +void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local); +void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local); +void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend); +void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend); +void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset, + CfreeCgTypeId result_type); +void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access); +void cfree_cg_indirect(CfreeCg* g); +void cfree_cg_addr(CfreeCg* g); +void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access); +void cfree_cg_dup(CfreeCg* g); +void cfree_cg_swap(CfreeCg* g); +void cfree_cg_drop(CfreeCg* g); +int cfree_cg_top_const_int(CfreeCg* g, int64_t* out_value); +void cfree_cg_rot3(CfreeCg* g); +CfreeStatus cfree_cg_new(CfreeCompiler* c, CfreeObjBuilder* out, + const CfreeCodeOptions* opts, CfreeCg** cg_out); +void cfree_cg_free(CfreeCg* g); +void cfree_cg_set_loc(CfreeCg* g, CfreeSrcLoc loc); +CfreeCgSym cfree_cg_decl(CfreeCg* g, CfreeCgDecl decl); +CfreeCgSym cfree_cg_alias(CfreeCg* g, CfreeCgAlias alias); +void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym); +void cfree_cg_func_end(CfreeCg* g); +SymBind api_map_bind(CfreeSymBind b); +SymVis api_map_vis(CfreeCgVisibility v); +SymKind api_decl_sym_kind(CfreeCgDecl decl); +void api_remember_sym(CfreeCg* g, ObjSymId sym, CfreeCgTypeId ty, + CfreeCgDecl decl); +CfreeCgTypeId api_sym_type(CfreeCg* g, CfreeCgSym sym); +CfreeCgDecl api_sym_attrs(CfreeCg* g, CfreeCgSym sym); +int api_sym_is_tls(CfreeCg* g, CfreeCgSym sym); +RelocKind api_data_reloc_kind(int pcrel, uint32_t width); +SrcLoc api_no_loc(void); +u8 api_type_class(CfreeCgTypeId ty); +int api_is_f128_type(Compiler* c, CfreeCgTypeId ty); +int api_is_i128_type(Compiler* c, CfreeCgTypeId ty); +int api_is_wide16_scalar_type(Compiler* c, CfreeCgTypeId ty); +int api_arg_storage_must_be_addr(Compiler* c, CfreeCgTypeId ty); +Operand api_op_imm(i64 v, CfreeCgTypeId ty); +Operand api_op_reg(Reg r, CfreeCgTypeId ty); +Operand api_op_local(FrameSlot s, CfreeCgTypeId ty); +Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty); +Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty); +u8 api_residency_for(const Operand* o); +ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty); +ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty); +ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, + int a_owned, int b_owned); +ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, + int a_owned); +ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, + int a_owned, int b_owned); +ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, + int owned); +CfreeCgTypeId api_sv_type(const ApiSValue* sv); +int api_operand_can_address(const Operand* o); +int api_sv_op_is(const ApiSValue* sv, OpKind kind); +int api_sv_op_is_reg_or_imm(const ApiSValue* sv); +int api_is_lvalue_sv(const ApiSValue* sv); +void api_stack_grow(CfreeCg* g, u32 want); +void api_push(CfreeCg* g, ApiSValue v); +ApiSValue api_pop(CfreeCg* g); +u8 api_class_of_sv(const ApiSValue* sv); +Reg api_reg_of_sv(const ApiSValue* sv); +void api_set_owned_reg(ApiSValue* sv, Reg r); +CfreeCgTypeId api_owned_reg_type(CfreeCg* g, const ApiSValue* sv); +void api_take_spill_slot_alloc(CfreeCg* g, u8 cls, FrameSlot* out); +FrameSlot api_take_spill_slot(CfreeCg* g, u8 cls); +void api_return_spill_slot(CfreeCg* g, FrameSlot s, u8 cls); +ApiSValue* api_pick_victim(CfreeCg* g, u8 cls); +void api_regalloc_begin(CfreeCg* g); +void api_regalloc_finish(CfreeCg* g); +Reg api_alloc_reg(CfreeCg* g, u8 cls); +void api_free_reg(CfreeCg* g, Reg r, u8 cls); +int api_spill_avs_victim(CfreeCg* g, u8 cls); +MemAccess api_mem_for_lvalue(CfreeCg* g, const Operand* lv, CfreeCgTypeId ty); +MemAccess api_mem_from_access(CfreeCg* g, const Operand* lv, + CfreeCgMemAccess access); +CfreeCgTypeId api_mem_access_type(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgTypeId fallback, const char* who); +u32 api_mem_type_size(CfreeCg* g, CfreeCgTypeId ty, const char* who); +void api_require_scalar_mem_type(CfreeCg* g, const char* who, CfreeCgTypeId ty); +void api_require_pointer_value(CfreeCg* g, const char* who, CfreeCgTypeId ty); +void api_validate_memory_value(CfreeCg* g, const char* who, + CfreeCgTypeId access_ty, CfreeCgTypeId value_ty); +MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv); +void api_release_operand_reg(CfreeCg* g, Operand op); +int api_sv_owns_operand_reg(const ApiSValue* sv, const Operand* op); +void api_release_cmp(CfreeCg* g, ApiSValue* sv); +void api_release_arith(CfreeCg* g, ApiSValue* sv); +void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst); +void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst); +int api_arith_rhs_reusable(const ApiSValue* sv); +int api_materialize_cmp_victim(CfreeCg* g, u8 cls); +int api_materialize_arith_victim(CfreeCg* g, u8 cls); +Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty); +void api_ensure_reg(CfreeCg* g, ApiSValue* sv); +Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); +Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); +void api_release(CfreeCg* g, ApiSValue* sv); +void api_release_arg_storage(CfreeCg* g, Operand* storage); +BinOp api_map_int_binop(CfreeCgIntBinOp op); +BinOp api_map_fp_binop(CfreeCgFpBinOp op); +UnOp api_map_int_unop(CfreeCgIntUnOp op); +CmpOp api_map_int_cmp(CfreeCgIntCmpOp op); +CmpOp api_map_fp_cmp(CfreeCgFpCmpOp op); +CmpOp api_invert_cmp(CmpOp op); +AtomicOp api_map_atomic_op(CfreeCgAtomicOp op); +MemOrder api_map_mem_order(CfreeCgMemOrder order); +AsmDir api_map_asm_dir(uint8_t dir); +u32 api_int_like_width(Compiler* c, CfreeCgTypeId id); +int api_type_is_bool(Compiler* c, CfreeCgTypeId id); +u64 api_width_mask(u32 width); +u64 api_mask_width(u64 v, u32 width); +i64 api_sign_extend_width(u64 v, u32 width); +int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); +int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); +i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width); +int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out); +int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, + i64* out); +int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out); +void api_local_const_clear(ApiSourceLocal* rec); +void api_local_const_clear_all(CfreeCg* g); +void api_local_const_memory_boundary(CfreeCg* g); +void api_local_const_control_boundary(CfreeCg* g); +void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local); +Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty); +int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, + CfreeCgMemAccess access); +void api_local_const_store(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, i64 value); +int api_local_const_load(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, Operand* out); +int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags); +int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm); +int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out); +int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out); +int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, + ApiSValue* out); +FrameSlot api_f128_temp_slot(CfreeCg* g, CfreeCgTypeId ty); +u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes); +void api_store_f128_bytes(CfreeCg* g, FrameSlot slot, CfreeCgTypeId ty, + const u8 bytes[16]); +void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]); +ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty); +ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, + CfreeCgTypeId ty); +CfreeCgSym api_runtime_helper(CfreeCg* g, const char* name, CfreeCgTypeId ret, + const CfreeCgTypeId* params, u32 nparams); +void api_runtime_call_values(CfreeCg* g, const char* name, CfreeCgTypeId ret, + const CfreeCgTypeId* params, u32 nparams, + ApiSValue* args); + +#endif diff --git a/src/cg/local.c b/src/cg/local.c @@ -0,0 +1,171 @@ +#include "cg/internal.h" + +int api_source_flags_addr_taken(u32 flags) { + return (flags & CFREE_CG_LOCAL_ADDRESS_TAKEN) != 0; +} + +int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, + CfreeCgLocalAttrs attrs) { + if (api_source_flags_addr_taken(attrs.flags)) return 1; + if (api_is_wide16_scalar_type(g->c, ty)) return 1; + return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) || + cg_type_is_ptr(g->c, ty)); +} + +CfreeCgLocal api_local_handle(u32 index) { + u32 raw = index + 1u; + if (!raw) return CFREE_CG_LOCAL_NONE; + return raw; +} + +int api_grow_locals(CfreeCg* g, u32 want) { + Heap* h = g->c->ctx->heap; + ApiSourceLocal* nb; + u32 cap; + if (g->locals_cap >= want) return 1; + cap = g->locals_cap ? g->locals_cap : 16u; + while (cap < want) cap *= 2u; + nb = + (ApiSourceLocal*)h->alloc(h, sizeof(*nb) * cap, _Alignof(ApiSourceLocal)); + if (!nb) return 0; + memset(nb, 0, sizeof(*nb) * cap); + if (g->locals) { + memcpy(nb, g->locals, sizeof(*nb) * g->nlocals); + h->free(h, g->locals, sizeof(*g->locals) * g->locals_cap); + } + g->locals = nb; + g->locals_cap = cap; + return 1; +} + +ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local) { + u32 index; + if (local == CFREE_CG_LOCAL_NONE) return NULL; + index = local - 1u; + if (index >= g->nlocals) { + return NULL; + } + return &g->locals[index]; +} + +CGLocalStorage api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d) { + FrameSlotDesc fsd; + CGLocalStorage st; + memset(&fsd, 0, sizeof fsd); + fsd.type = d->type; + fsd.name = d->name; + fsd.loc = d->loc; + fsd.size = d->size; + fsd.align = d->align; + fsd.kind = FS_LOCAL; + if (d->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; + st.kind = CG_LOCAL_STORAGE_FRAME; + st.v.frame_slot = g->target->frame_slot(g->target, &fsd); + return st; +} + +CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, + CfreeCgLocalAttrs attrs) { + CfreeCgTypeId ty; + CGLocalDesc desc; + CGLocalStorage storage; + ApiSourceLocal* rec; + CfreeCgLocal handle; + if (!g) return CFREE_CG_LOCAL_NONE; + ty = resolve_type(g->c, type); + if (!ty) return CFREE_CG_LOCAL_NONE; + handle = api_local_handle(g->nlocals); + if (handle == CFREE_CG_LOCAL_NONE || !api_grow_locals(g, g->nlocals + 1u)) + return CFREE_CG_LOCAL_NONE; + memset(&desc, 0, sizeof desc); + desc.type = ty; + desc.name = (Sym)attrs.name; + desc.loc = g->cur_loc; + desc.size = abi_cg_sizeof(g->c->abi, type); + desc.align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type); + if (api_source_flags_addr_taken(attrs.flags)) + desc.flags |= CG_LOCAL_ADDR_TAKEN; + if (api_local_requires_memory(g, ty, attrs)) + desc.flags |= CG_LOCAL_MEMORY_REQUIRED; + if (g->target->local) + storage = g->target->local(g->target, &desc); + else + storage = api_frame_local_storage(g, &desc); + if (storage.kind == CG_LOCAL_STORAGE_REG) { + cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), + storage.v.reg); + } + rec = &g->locals[g->nlocals++]; + memset(rec, 0, sizeof *rec); + rec->type = ty; + rec->name = attrs.name; + rec->attrs = attrs; + rec->loc = g->cur_loc; + rec->desc = desc; + rec->storage = storage; + rec->param_index = 0; + rec->kind = API_SOURCE_LOCAL_AUTO; + return handle; +} + +CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, + CfreeCgLocalAttrs attrs) { + CfreeCgTypeId ty; + CGParamDesc pd; + ApiSourceLocal* rec; + CfreeCgLocal handle; + CGLocalStorage storage; + u32 size; + u32 align; + if (!g) return CFREE_CG_LOCAL_NONE; + ty = resolve_type(g->c, type); + if (!ty) return CFREE_CG_LOCAL_NONE; + if (index != g->nlocals) return CFREE_CG_LOCAL_NONE; + handle = api_local_handle(g->nlocals); + if (handle == CFREE_CG_LOCAL_NONE || !api_grow_locals(g, g->nlocals + 1u)) + return CFREE_CG_LOCAL_NONE; + + size = abi_cg_sizeof(g->c->abi, type); + align = attrs.align ? attrs.align : abi_cg_alignof(g->c->abi, type); + + memset(&pd, 0, sizeof pd); + pd.index = index; + pd.name = (Sym)attrs.name; + pd.type = ty; + pd.size = size; + pd.align = align; + if (api_source_flags_addr_taken(attrs.flags)) pd.flags |= CG_LOCAL_ADDR_TAKEN; + if (api_local_requires_memory(g, ty, attrs)) + pd.flags |= CG_LOCAL_MEMORY_REQUIRED; + if (g->fn_abi && index < g->fn_abi->nparams) { + pd.abi = &g->fn_abi->params[index]; + } + pd.loc = g->cur_loc; + storage = g->target->param(g->target, &pd); + if (storage.kind == CG_LOCAL_STORAGE_REG) { + cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), + storage.v.reg); + } + + rec = &g->locals[g->nlocals++]; + memset(rec, 0, sizeof *rec); + rec->type = ty; + rec->name = attrs.name; + rec->attrs = attrs; + rec->loc = g->cur_loc; + memset(&rec->desc, 0, sizeof rec->desc); + rec->desc.type = ty; + rec->desc.name = (Sym)attrs.name; + rec->desc.loc = g->cur_loc; + rec->desc.size = size; + rec->desc.align = align; + rec->desc.flags = pd.flags; + rec->storage = storage; + rec->param_index = index; + rec->kind = API_SOURCE_LOCAL_PARAM; + return handle; +} + +/* ============================================================ + * Push operations + * ============================================================ */ diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -0,0 +1,597 @@ +#include "cg/internal.h" + +void cfree_cg_push_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type) { + CfreeCgTypeId ty; + if (!g) return; + ty = resolve_type(g->c, type); + if (!ty) return; + api_push(g, api_make_sv(api_op_imm((i64)value, ty), ty)); +} + +void cfree_cg_push_float(CfreeCg* g, double value, CfreeCgTypeId type) { + CfreeCgTypeId ty; + CGTarget* T; + ConstBytes cb; + union { + double d; + float f; + uint8_t b[8]; + } u; + Reg r; + Operand dst; + if (!g) return; + ty = resolve_type(g->c, type); + if (!ty) return; + if (api_is_f128_type(g->c, ty)) { + api_push(g, api_make_f128_const(g, value, ty)); + return; + } + T = g->target; + cb.type = ty; + cb.size = (u32)abi_cg_sizeof(g->c->abi, type); + cb.align = (u32)abi_cg_alignof(g->c->abi, type); + if (ty == builtin_id(CFREE_CG_BUILTIN_F32)) + u.f = (float)value; + else + u.d = value; + cb.bytes = u.b; + r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); + dst = api_op_reg(r, ty); + T->load_const(T, dst, cb); + api_push(g, api_make_sv(dst, ty)); +} + +void cfree_cg_push_null(CfreeCg* g, CfreeCgTypeId ptr_type) { + CfreeCgTypeId ty; + if (!g) return; + ty = resolve_type(g->c, ptr_type); + if (!ty) return; + api_push(g, api_make_sv(api_op_imm(0, ty), ty)); +} + +CfreeCgSym cfree_cg_const_data(CfreeCg* g, const uint8_t* data, size_t len, + uint32_t align, CfreeCgTypeId pointee_type) { + Compiler* c; + ObjBuilder* ob; + CfreeCgTypeId pty; + Sym sec_name; + ObjSecId sec; + u32 base; + char name_buf[32]; + Sym anon_name; + ObjSymId sym; + CfreeCgDecl attrs; + if (!g) return CFREE_CG_SYM_NONE; + c = g->c; + ob = g->obj; + pty = resolve_type(c, pointee_type); + if (!pty) return CFREE_CG_SYM_NONE; + sec_name = pool_intern_cstr(c->global, ".rodata"); + sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, + align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); + base = obj_align_to( + ob, sec, align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); + obj_write(ob, sec, data, len); + snprintf(name_buf, sizeof(name_buf), ".Lcfree_ro.%u", g->rodata_counter++); + anon_name = pool_intern_cstr(c->global, name_buf); + sym = obj_symbol(ob, anon_name, SB_LOCAL, SK_OBJ, sec, base, (u64)len); + memset(&attrs, 0, sizeof(attrs)); + attrs.kind = CFREE_CG_DECL_OBJECT; + attrs.sym.bind = CFREE_SB_LOCAL; + attrs.sym.visibility = CFREE_CG_VIS_DEFAULT; + attrs.as.object.flags = CFREE_CG_OBJ_READONLY; + api_remember_sym(g, sym, pty, attrs); + return (CfreeCgSym)sym; +} + +void api_push_frame_lvalue(CfreeCg* g, FrameSlot slot, CfreeCgTypeId type) { + if (!g) return; + api_push(g, api_make_lv(api_op_local(slot, type), type)); +} + +void api_push_source_frame_lvalue(CfreeCg* g, CfreeCgLocal local, + FrameSlot slot, CfreeCgTypeId type) { + ApiSValue sv; + if (!g) return; + sv = api_make_lv(api_op_local(slot, type), type); + sv.source_local = local; + api_push(g, sv); +} + +void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg, + CfreeCgTypeId type) { + ApiSValue sv; + if (!g) return; + sv = api_make_lv(api_op_reg(reg, type), type); + sv.res = RES_FIXED_REG; + sv.source_local = local; + api_push(g, sv); +} + +void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local) { + ApiSourceLocal* rec; + if (!g) return; + rec = api_local_from_handle(g, local); + if (!rec) return; + if (rec->storage.kind == CG_LOCAL_STORAGE_REG) { + api_push_source_reg_lvalue(g, local, rec->storage.v.reg, rec->type); + } else if (rec->kind == API_SOURCE_LOCAL_AUTO) { + api_push_source_frame_lvalue(g, local, rec->storage.v.frame_slot, + rec->type); + } else { + api_push_frame_lvalue(g, rec->storage.v.frame_slot, rec->type); + } +} + +void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local) { + cfree_cg_push_local(g, local); + cfree_cg_addr(g); +} + +void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend) { + CfreeCgTypeId ty; + CfreeCgTypeId ptr_ty; + if (!g) return; + ty = api_sym_type(g, sym); + if (!ty) ty = builtin_id(CFREE_CG_BUILTIN_VOID); + ptr_ty = cg_type_ptr_to(g->c, ty); + if (api_sym_is_tls(g, sym)) { + Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + Operand dst = api_op_reg(r, ptr_ty); + g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); + api_push(g, api_make_sv(dst, ptr_ty)); + } else { + api_push(g, + api_make_sv(api_op_global((ObjSymId)sym, addend, ptr_ty), ptr_ty)); + } +} + +void cfree_cg_push_symbol_lvalue(CfreeCg* g, CfreeCgSym sym, int64_t addend) { + CfreeCgTypeId ty; + if (!g) return; + ty = api_sym_type(g, sym); + if (!ty) return; + if (api_sym_is_tls(g, sym)) { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + Operand dst = api_op_reg(r, ptr_ty); + g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); + api_push(g, api_make_lv(api_op_indirect(r, 0, ty), ty)); + } else { + api_push(g, api_make_lv(api_op_global((ObjSymId)sym, addend, ty), ty)); + } +} + +void cfree_cg_addr_offset(CfreeCg* g, int64_t byte_offset, + CfreeCgTypeId result_type) { + ApiSValue v; + CfreeCgTypeId rty; + CfreeCgTypeId ptr_ty; + Operand base; + Operand result; + Reg rr; + int want_ptr; + int base_is_lvalue; + int free_base = 0; + if (!g) return; + rty = resolve_type(g->c, result_type); + if (!rty) return; + v = api_pop(g); + want_ptr = cg_type_is_ptr(g->c, rty); + base_is_lvalue = api_is_lvalue_sv(&v); + if (v.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_clear(api_local_from_handle(g, v.source_local)); + api_ensure_reg(g, &v); + if (v.op.kind == OPK_GLOBAL) { + result = api_op_global(v.op.v.global.sym, + v.op.v.global.addend + byte_offset, rty); + api_push(g, want_ptr ? api_make_sv(result, rty) : api_make_lv(result, rty)); + return; + } + if (!want_ptr && v.op.kind == OPK_INDIRECT) { + i64 ofs = (i64)v.op.v.ind.ofs + byte_offset; + if (ofs >= INT32_MIN && ofs <= INT32_MAX) { + result = api_op_indirect(v.op.v.ind.base, (i32)ofs, rty); + api_push(g, api_make_lv(result, rty)); + return; + } + } + ptr_ty = want_ptr ? rty : cg_type_ptr_to(g->c, rty); + if (!base_is_lvalue && cg_type_is_ptr(g->c, api_sv_type(&v))) + ptr_ty = api_sv_type(&v); + if (base_is_lvalue) { + base = api_lvalue_addr(g, &v, ptr_ty); + free_base = 1; + } else { + base = api_force_reg(g, &v, ptr_ty); + } + rr = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + result = api_op_reg(rr, ptr_ty); + g->target->binop(g->target, BO_IADD, result, base, + api_op_imm(byte_offset, ptr_ty)); + if (free_base) api_free_reg(g, base.v.reg, RC_INT); + api_release(g, &v); + if (want_ptr) { + result.type = rty; + api_push(g, api_make_sv(result, rty)); + } else { + api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, rty), rty)); + } +} + +/* ============================================================ + * Load / addr / store + * ============================================================ */ + +void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access) { + ApiSValue v; + CfreeCgTypeId ty; + Operand dst; + if (!g) return; + if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); + v = api_pop(g); + if (!api_is_lvalue_sv(&v)) { + api_push(g, v); + return; + } + ty = api_mem_access_type(g, access, api_sv_type(&v), "load"); + if (v.bitfield_lvalue) { + CfreeCgTypeId load_ty = ty; + Reg rr; + api_require_scalar_mem_type(g, "load", load_ty); + rr = api_alloc_reg_or_spill(g, RC_INT, load_ty); + dst = api_op_reg(rr, load_ty); + g->target->bitfield_load(g->target, dst, v.op, v.delayed.bitfield); + api_release(g, &v); + api_push(g, api_make_sv(dst, load_ty)); + return; + } + if (cg_type_is_aggregate(g->c, api_sv_type(&v))) { + u32 access_size; + u32 lvalue_size; + if (!cg_type_is_aggregate(g->c, ty)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: load scalar access from aggregate lvalue " + "requires selecting a field"); + } + access_size = api_mem_type_size(g, ty, "load"); + lvalue_size = api_mem_type_size(g, api_sv_type(&v), "load"); + if (access_size != lvalue_size) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: load aggregate type/size mismatch: access size " + "%u, lvalue size %u", + (unsigned)access_size, (unsigned)lvalue_size); + } + api_push(g, v); + return; + } + api_require_scalar_mem_type(g, "load", ty); + if (api_is_wide16_scalar_type(g->c, ty)) { + v.type = ty; + v.op.type = ty; + api_push(g, v); + return; + } + if (v.source_local != CFREE_CG_LOCAL_NONE && + api_local_const_load(g, v.source_local, access, &dst)) { + api_release(g, &v); + api_push(g, api_make_sv(dst, dst.type)); + return; + } + api_ensure_reg(g, &v); + if (v.source_local != CFREE_CG_LOCAL_NONE && v.op.kind == OPK_REG) { + dst = v.op; + dst.type = ty; + v.op = dst; + v.type = ty; + v.lvalue = 0; + v.res = RES_FIXED_REG; + api_push(g, v); + return; + } + dst = api_force_reg(g, &v, ty); + dst.type = ty; + api_push(g, api_make_sv(dst, ty)); +} + +void cfree_cg_indirect(CfreeCg* g) { + ApiSValue ptr; + CfreeCgTypeId pty; + CfreeCgTypeId pointee; + Operand ptr_op; + if (!g) return; + ptr = api_pop(g); + pty = api_sv_type(&ptr); + pointee = cg_type_pointee(g->c, pty); + if (!pointee || cg_type_is_void(g->c, pointee)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: indirect operand is not a pointer to object"); + return; + } + ptr_op = api_force_reg(g, &ptr, pty); + api_push(g, api_make_lv(api_op_indirect(ptr_op.v.reg, 0, pointee), pointee)); +} + +void cfree_cg_addr(CfreeCg* g) { + ApiSValue v; + CfreeCgTypeId pty; + Operand dst; + if (!g) return; + v = api_pop(g); + if (v.bitfield_lvalue) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: cannot take address of bit-field"); + return; + } + pty = cg_type_ptr_to(g->c, api_sv_type(&v)); + if (v.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_address_taken(g, v.source_local); + dst = api_lvalue_addr(g, &v, pty); + api_release(g, &v); + api_push(g, api_make_sv(dst, pty)); +} + +void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) { + ApiSValue lv, rv; + CGTarget* T; + CfreeCgTypeId ty; + Operand src; + int scalar_aggregate_store = 0; + if (!g) return; + if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); + T = g->target; + rv = api_pop(g); + lv = api_pop(g); + if (!api_is_lvalue_sv(&lv)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: store destination is not an lvalue"); + return; + } + ty = api_mem_access_type(g, access, api_sv_type(&lv), "store"); + if (lv.bitfield_lvalue) { + api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); + if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_memory_boundary(g); + } + if (api_sv_op_is_reg_or_imm(&rv)) { + src = rv.op; + } else { + src = api_force_reg(g, &rv, api_sv_type(&rv)); + } + T->bitfield_store(T, lv.op, src, lv.delayed.bitfield); + api_release(g, &lv); + api_release(g, &rv); + return; + } + if (cg_type_is_aggregate(g->c, api_sv_type(&lv)) && + !cg_type_is_aggregate(g->c, api_sv_type(&rv)) && + !cg_type_is_aggregate(g->c, ty)) { + u32 access_size = api_mem_type_size(g, ty, "store"); + u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); + u32 value_size = api_mem_type_size(g, api_sv_type(&rv), "store"); + if (access_size != dst_size || value_size != dst_size) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: store scalar/aggregate size mismatch: access " + "size %u, destination size %u, value size %u", + (unsigned)access_size, (unsigned)dst_size, + (unsigned)value_size); + } + scalar_aggregate_store = 1; + } + if (!scalar_aggregate_store && + (cg_type_is_aggregate(g->c, ty) || + cg_type_is_aggregate(g->c, api_sv_type(&lv)) || + cg_type_is_aggregate(g->c, api_sv_type(&rv)))) { + CfreeCgTypeId ptr_ty; + Operand dst_addr, src_addr; + AggregateAccess agg; + u32 dst_size = api_mem_type_size(g, api_sv_type(&lv), "store"); + u32 src_size = api_mem_type_size(g, api_sv_type(&rv), "store"); + u32 access_size = cg_type_is_aggregate(g->c, ty) + ? api_mem_type_size(g, ty, "store") + : dst_size; + if (!api_is_lvalue_sv(&rv)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: aggregate store source is not an lvalue"); + } + if (!cg_type_is_aggregate(g->c, api_sv_type(&lv)) || + !cg_type_is_aggregate(g->c, api_sv_type(&rv)) || + access_size != dst_size || access_size != src_size) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: store aggregate type/size mismatch: access " + "size %u, destination size %u, value size %u", + (unsigned)access_size, (unsigned)dst_size, + (unsigned)src_size); + } + if (lv.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, lv.source_local)); + } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_memory_boundary(g); + } + ptr_ty = cg_type_ptr_to(g->c, api_sv_type(&lv)); + dst_addr = api_lvalue_addr(g, &lv, ptr_ty); + src_addr = api_lvalue_addr(g, &rv, ptr_ty); + memset(&agg, 0, sizeof agg); + agg.size = access_size; + agg.align = access.align ? access.align + : abi_cg_alignof(g->c->abi, api_sv_type(&lv)); + T->copy_bytes(T, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + api_release(g, &lv); + api_release(g, &rv); + return; + } + api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); + if (api_is_wide16_scalar_type(g->c, ty)) { + if (lv.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, lv.source_local)); + } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_memory_boundary(g); + } + if (api_is_lvalue_sv(&rv)) { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); + Operand src_addr = api_lvalue_addr(g, &rv, ptr_ty); + AggregateAccess agg; + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = access.align ? access.align : 16; + T->copy_bytes(T, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + } else if (rv.op.kind == OPK_IMM) { + u8 bytes[16]; + u64 lo = (u64)rv.op.v.imm; + u64 hi = rv.op.v.imm < 0 ? ~(u64)0 : 0; + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 lo_idx = g->c->target.big_endian ? 15u - i : i; + u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; + bytes[lo_idx] = (u8)(lo >> (i * 8u)); + bytes[hi_idx] = (u8)(hi >> (i * 8u)); + } + if (lv.op.kind == OPK_LOCAL) { + api_store_f128_bytes(g, lv.op.v.frame_slot, ty, bytes); + } else { + FrameSlot slot = api_f128_temp_slot(g, ty); + ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty); + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); + Operand src_addr; + AggregateAccess agg; + api_store_f128_bytes(g, slot, ty, bytes); + src_addr = api_lvalue_addr(g, &tmp, ptr_ty); + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = access.align ? access.align : 16; + T->copy_bytes(T, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + } + } else { + src = api_force_reg(g, &rv, ty); + T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); + } + api_release(g, &lv); + api_release(g, &rv); + return; + } + api_ensure_reg(g, &lv); + api_ensure_reg(g, &rv); + if (api_sv_op_is_reg_or_imm(&rv)) { + src = rv.op; + } else { + src = api_force_reg(g, &rv, api_sv_type(&rv)); + } + if (lv.source_local != CFREE_CG_LOCAL_NONE) { + if (src.kind == OPK_IMM) { + api_local_const_store(g, lv.source_local, access, src.v.imm); + } else { + api_local_const_clear(api_local_from_handle(g, lv.source_local)); + } + } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_memory_boundary(g); + } + if (lv.source_local != CFREE_CG_LOCAL_NONE && lv.op.kind == OPK_REG) { + Operand dst = lv.op; + dst.type = ty; + if (src.kind == OPK_IMM) { + T->load_imm(T, dst, src.v.imm); + } else if (src.kind == OPK_REG) { + if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src); + } else { + src = api_force_reg(g, &rv, ty); + if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src); + } + } else { + T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); + } + api_release(g, &lv); + api_release(g, &rv); +} + +/* ============================================================ + * Stack manipulation + * ============================================================ */ + +void cfree_cg_dup(CfreeCg* g) { + ApiSValue v, dup; + ApiSValue* top; + CfreeCgTypeId ty; + Reg r; + Operand dst; + if (!g || g->sp == 0) return; + top = &g->stack[g->sp - 1]; + api_ensure_reg(g, top); + v = *top; + if (v.res != RES_REG) { + api_push(g, v); + return; + } + top->pinned = 1; + ty = api_owned_reg_type(g, &v); + r = api_alloc_reg_or_spill(g, api_class_of_sv(&v), ty); + dst = api_op_reg(r, ty); + g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); + g->stack[g->sp - 1].pinned = 0; + dup = v; + api_set_owned_reg(&dup, r); + dup.res = RES_REG; + dup.pinned = 0; + dup.spill_slot = FRAME_SLOT_NONE; + api_push(g, dup); +} + +void cfree_cg_swap(CfreeCg* g) { + ApiSValue tmp; + if (!g || g->sp < 2) return; + tmp = g->stack[g->sp - 1]; + g->stack[g->sp - 1] = g->stack[g->sp - 2]; + g->stack[g->sp - 2] = tmp; +} + +void cfree_cg_drop(CfreeCg* g) { + ApiSValue v; + if (!g) return; + v = api_pop(g); + api_release(g, &v); +} + +int cfree_cg_top_const_int(CfreeCg* g, int64_t* out_value) { + ApiSValue* v; + CfreeCgTypeId ty; + u32 width; + if (!g || !out_value || !g->sp) return 0; + v = &g->stack[g->sp - 1u]; + if (v->kind != SV_OPERAND || v->op.kind != OPK_IMM) return 0; + ty = api_sv_type(v); + if (!api_foldable_int_like_type(g->c, ty, &width)) return 0; + *out_value = api_fold_result(g->c, ty, (u64)v->op.v.imm, width); + return 1; +} + +void cfree_cg_rot3(CfreeCg* g) { + ApiSValue a, b, c; + if (!g || g->sp < 3) return; + a = g->stack[g->sp - 3]; + b = g->stack[g->sp - 2]; + c = g->stack[g->sp - 1]; + g->stack[g->sp - 3] = b; + g->stack[g->sp - 2] = c; + g->stack[g->sp - 1] = a; +} + +/* ============================================================ + * Arithmetic / compare / convert + * ============================================================ */ + +const char* api_i128_binop_helper(BinOp op); +int api_i128_cmp_is_unsigned(CmpOp op); +void api_cg_cmp(CfreeCg* g, CmpOp cop); +void api_f128_call_unary(CfreeCg* g, const char* name, CfreeCgTypeId ret, + CfreeCgTypeId param); diff --git a/src/cg/session.c b/src/cg/session.c @@ -0,0 +1,217 @@ +#include "cg/internal.h" + +CfreeStatus cfree_cg_new(CfreeCompiler* c, CfreeObjBuilder* out, + const CfreeCodeOptions* opts, CfreeCg** cg_out) { + Heap* h; + CfreeCg* g; + MCEmitter* mc; + CGTarget* target; + Debug* debug = NULL; + int opt_level = opts ? opts->opt_level : 0; + if (!cg_out) return CFREE_INVALID; + *cg_out = NULL; + if (!c || !out) return CFREE_INVALID; + if (opt_level < 0 || opt_level > 2) { + compiler_panic((Compiler*)c, api_no_loc(), + "CfreeCg: unsupported opt_level %d", opt_level); + } + h = (Heap*)c->ctx->heap; + mc = mc_new((Compiler*)c, (ObjBuilder*)out); + if (!mc) return CFREE_NOMEM; + if (opts && opts->debug_info) { + debug = debug_new((Compiler*)c, (ObjBuilder*)out); + if (!debug) { + mc_free(mc); + return CFREE_NOMEM; + } + mc->debug = debug; + } + target = cgtarget_new((Compiler*)c, (ObjBuilder*)out, mc); + if (!target) { + if (debug) debug_free(debug); + mc_free(mc); + return CFREE_UNSUPPORTED; + } + target->debug = debug; + if (opt_level > 0) { + target = opt_cgtarget_new((Compiler*)c, target, opt_level); + if (target) target->debug = debug; + } + g = (CfreeCg*)h->alloc(h, sizeof(CfreeCg), _Alignof(CfreeCg)); + if (!g) { + if (debug) debug_free(debug); + cgtarget_free(target); + mc_free(mc); + return CFREE_NOMEM; + } + memset(g, 0, sizeof *g); + g->c = (Compiler*)c; + g->obj = (ObjBuilder*)out; + g->target = target; + g->mc = mc; + g->debug = debug; + *cg_out = g; + return CFREE_OK; +} + +void cfree_cg_free(CfreeCg* g) { + Heap* h; + if (!g) return; + cgtarget_finalize(g->target); + if (g->debug) { + debug_emit(g->debug); + debug_free(g->debug); + } + cgtarget_free(g->target); + mc_free(g->mc); + h = g->c->ctx->heap; + if (g->stack) h->free(h, g->stack, sizeof(ApiSValue) * g->cap); + if (g->locals) { + h->free(h, g->locals, sizeof(*g->locals) * g->locals_cap); + } + if (g->sym_types) { + h->free(h, g->sym_types, sizeof(*g->sym_types) * g->sym_cap); + } + if (g->sym_attrs) { + h->free(h, g->sym_attrs, sizeof(*g->sym_attrs) * g->sym_cap); + } + for (u32 c = 0; c < 3; ++c) { + if (g->slot_pools[c].free) { + h->free(h, g->slot_pools[c].free, + sizeof(FrameSlot) * g->slot_pools[c].cap); + } + } + h->free(h, g, sizeof *g); +} + +/* ============================================================ + * Source location + * ============================================================ */ + +void cfree_cg_set_loc(CfreeCg* g, CfreeSrcLoc loc) { + if (!g) return; + g->cur_loc = *(SrcLoc*)&loc; + if (g->debug) debug_set_pending_loc(g->debug, *(SrcLoc*)&loc); + if (g->target->set_loc) g->target->set_loc(g->target, *(SrcLoc*)&loc); +} + +/* ============================================================ + * Function lifecycle + * ============================================================ */ + +CfreeCgSym cfree_cg_decl(CfreeCg* g, CfreeCgDecl decl) { + Compiler* c; + ObjBuilder* ob; + ObjSymId sym; + CfreeCgTypeId ty; + if (!g || !decl.linkage_name) return CFREE_CG_SYM_NONE; + c = g->c; + ob = g->obj; + ty = resolve_type(c, decl.type); + if (!ty) return CFREE_CG_SYM_NONE; + sym = obj_symbol_find(ob, (Sym)decl.linkage_name); + if (sym == OBJ_SYM_NONE) { + sym = obj_symbol_ex(ob, (Sym)decl.linkage_name, api_map_bind(decl.sym.bind), + api_map_vis(decl.sym.visibility), + api_decl_sym_kind(decl), OBJ_SEC_NONE, 0, 0, 0); + } + if (decl.sym.flags) { + obj_symbol_set_flags(ob, sym, (u16)decl.sym.flags); + } + api_remember_sym(g, sym, ty, decl); + return (CfreeCgSym)sym; +} + +CfreeCgSym cfree_cg_alias(CfreeCg* g, CfreeCgAlias alias) { + ObjBuilder* ob; + ObjSymId sym; + const ObjSym* ts; + CfreeCgDecl decl_attrs; + if (!g || !alias.linkage_name || alias.target == CFREE_CG_SYM_NONE) { + return CFREE_CG_SYM_NONE; + } + ob = g->obj; + sym = obj_symbol_find(ob, (Sym)alias.linkage_name); + ts = obj_symbol_get(ob, (ObjSymId)alias.target); + if (!ts) return CFREE_CG_SYM_NONE; + if (sym == OBJ_SYM_NONE) { + sym = + obj_symbol_ex(ob, (Sym)alias.linkage_name, api_map_bind(alias.sym.bind), + api_map_vis(alias.sym.visibility), (SymKind)ts->kind, + ts->section_id, ts->value, ts->size, ts->common_align); + } else if (ts->section_id != OBJ_SEC_NONE) { + obj_symbol_define(ob, sym, ts->section_id, ts->value, ts->size); + } + if (alias.sym.flags) obj_symbol_set_flags(ob, sym, (u16)alias.sym.flags); + decl_attrs = api_sym_attrs(g, alias.target); + decl_attrs.sym = alias.sym; + api_remember_sym(g, sym, api_sym_type(g, alias.target), decl_attrs); + return (CfreeCgSym)sym; +} + +void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym) { + Compiler* c; + ObjBuilder* ob; + CGTarget* T; + ObjSymId sym; + ObjSecId text_sec; + CfreeCgTypeId fty; + const ABIFuncInfo* abi; + CfreeCgDecl attrs; + if (!g) return; + c = g->c; + ob = g->obj; + T = g->target; + sym = (ObjSymId)cg_sym; + fty = api_sym_type(g, cg_sym); + if (!fty) return; + attrs = api_sym_attrs(g, cg_sym); + abi = abi_cg_func_info(c->abi, fty); + + text_sec = obj_section(ob, pool_intern_cstr(c->global, ".text"), SEC_TEXT, + SF_EXEC | SF_ALLOC, 4); + + if (sym != OBJ_SYM_NONE) { + obj_symbol_define(ob, sym, text_sec, 0, 0); + } + + memset(&g->fn_desc, 0, sizeof g->fn_desc); + g->fn_desc.sym = sym; + g->fn_desc.text_section_id = text_sec; + g->fn_desc.group_id = OBJ_GROUP_NONE; + g->fn_desc.fn_type = fty; + g->fn_desc.abi = abi; + g->fn_desc.loc = g->cur_loc; + if (attrs.as.func.flags & CFREE_CG_FUNC_NORETURN) { + g->fn_desc.flags |= CGFD_NORETURN; + } + + g->fn_ret_type = cg_type_func_ret_id(c, fty); + g->fn_abi = abi; + g->nlocals = 0; + g->sp = 0; + for (u32 i = 0; i < 3; ++i) g->slot_pools[i].n = 0; + g->avs_in_flight = NULL; + g->avs_in_flight_n = 0; + + if (g->debug) { + DebugTypeId dt = api_debug_type(g, fty); + if (dt != DEBUG_TYPE_NONE) debug_func_begin(g->debug, sym, dt, g->cur_loc); + } + T->func_begin(T, &g->fn_desc); + api_regalloc_begin(g); +} + +void cfree_cg_func_end(CfreeCg* g) { + if (!g) return; + api_regalloc_finish(g); + g->target->func_end(g->target); + if (g->debug) debug_func_end(g->debug); + g->fn_abi = NULL; + g->fn_ret_type = CFREE_CG_TYPE_NONE; + g->nscopes = 0; + memset(g->scopes, 0, sizeof g->scopes); +} + +void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs); diff --git a/src/cg/symbol.c b/src/cg/symbol.c @@ -0,0 +1,120 @@ +#include "cg/internal.h" + +SymBind api_map_bind(CfreeSymBind b) { + switch (b) { + case CFREE_SB_LOCAL: + return SB_LOCAL; + case CFREE_SB_GLOBAL: + return SB_GLOBAL; + case CFREE_SB_WEAK: + return SB_WEAK; + } + return SB_LOCAL; +} + +SymVis api_map_vis(CfreeCgVisibility v) { + switch (v) { + case CFREE_CG_VIS_DEFAULT: + return SV_DEFAULT; + case CFREE_CG_VIS_HIDDEN: + return SV_HIDDEN; + case CFREE_CG_VIS_PROTECTED: + return SV_PROTECTED; + } + return SV_DEFAULT; +} + +SymKind api_decl_sym_kind(CfreeCgDecl decl) { + if (decl.kind == CFREE_CG_DECL_FUNC) { + if (decl.as.func.flags & CFREE_CG_FUNC_IFUNC) return SK_IFUNC; + return SK_FUNC; + } + if (decl.as.object.flags & CFREE_CG_OBJ_TLS) return SK_TLS; + return SK_OBJ; +} + +void api_remember_sym(CfreeCg* g, ObjSymId sym, CfreeCgTypeId ty, + CfreeCgDecl decl) { + Heap* h; + CfreeCgTypeId* nts; + CfreeCgDecl* nas; + u32 cap; + if (!g || sym == OBJ_SYM_NONE) return; + if (sym < g->sym_cap) { + g->sym_types[sym] = ty; + g->sym_attrs[sym] = decl; + return; + } + h = g->c->ctx->heap; + cap = g->sym_cap ? g->sym_cap : 16u; + while (cap <= sym) cap *= 2u; + nts = + (CfreeCgTypeId*)h->alloc(h, sizeof(*nts) * cap, _Alignof(CfreeCgTypeId)); + nas = (CfreeCgDecl*)h->alloc(h, sizeof(*nas) * cap, _Alignof(CfreeCgDecl)); + if (!nts || !nas) { + if (nts) h->free(h, nts, sizeof(*nts) * cap); + if (nas) h->free(h, nas, sizeof(*nas) * cap); + return; + } + memset(nts, 0, sizeof(*nts) * cap); + memset(nas, 0, sizeof(*nas) * cap); + if (g->sym_types) { + memcpy(nts, g->sym_types, sizeof(*nts) * g->sym_cap); + h->free(h, g->sym_types, sizeof(*g->sym_types) * g->sym_cap); + } + if (g->sym_attrs) { + memcpy(nas, g->sym_attrs, sizeof(*nas) * g->sym_cap); + h->free(h, g->sym_attrs, sizeof(*g->sym_attrs) * g->sym_cap); + } + g->sym_types = nts; + g->sym_attrs = nas; + g->sym_cap = cap; + g->sym_types[sym] = ty; + g->sym_attrs[sym] = decl; +} + +CfreeCgTypeId api_sym_type(CfreeCg* g, CfreeCgSym sym) { + if (!g || sym == CFREE_CG_SYM_NONE || sym >= g->sym_cap) { + return CFREE_CG_TYPE_NONE; + } + return g->sym_types[sym]; +} + +CfreeCgDecl api_sym_attrs(CfreeCg* g, CfreeCgSym sym) { + CfreeCgDecl decl; + memset(&decl, 0, sizeof(decl)); + decl.kind = CFREE_CG_DECL_OBJECT; + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + if (!g || sym == CFREE_CG_SYM_NONE || sym >= g->sym_cap) return decl; + return g->sym_attrs[sym]; +} + +int api_sym_is_tls(CfreeCg* g, CfreeCgSym sym) { + CfreeCgDecl decl = api_sym_attrs(g, sym); + return decl.kind == CFREE_CG_DECL_OBJECT && + (decl.as.object.flags & CFREE_CG_OBJ_TLS); +} + +RelocKind api_data_reloc_kind(int pcrel, uint32_t width) { + if (pcrel) { + if (width == 4) return R_PC32; + if (width == 8) return R_PC64; + } else { + if (width == 4) return R_ABS32; + if (width == 8) return R_ABS64; + } + return R_NONE; +} + +/* ============================================================ + * Public API: CfreeCg lifecycle + * ============================================================ */ + +SrcLoc api_no_loc(void) { + SrcLoc loc; + loc.file_id = 0; + loc.line = 0; + loc.col = 0; + return loc; +} diff --git a/src/cg/type.c b/src/cg/type.c @@ -0,0 +1,965 @@ +#include "cg/internal.h" + +typedef enum CgApiTypeKind { + CG_API_TYPE_PTR, + CG_API_TYPE_ARRAY, + CG_API_TYPE_ALIAS, + CG_API_TYPE_RECORD, + CG_API_TYPE_ENUM, + CG_API_TYPE_FUNC, +} CgApiTypeKind; + +typedef struct CgApiType { + CgType cg; + CfreeCgTypeId base; + CfreeSym name; + u32 count; + u32 flags; + u32 address_space; + u64 array_count; + const CfreeCgField* fields; + const CfreeCgEnumValue* values; + const CfreeCgFuncParam* params; + CfreeCgAbiAttrs ret_attrs; + CfreeCgCallConv call_conv; + u8 kind; + u8 abi_variadic; + u8 pad[2]; +} CgApiType; + +SEGVEC_DEFINE(CgApiTypes, CgApiType, CG_API_TYPE_SEG_SHIFT); + +typedef struct CgApiState { + Heap* heap; + CgApiTypes types; + CgType builtins[CFREE_CG_BUILTIN_COUNT]; + u8 builtins_init; + u8 pad[3]; +} CgApiState; + +static CfreeCgTypeId type_id_from_tuple(u32 seg, u32 index) { + return (CfreeCgTypeId)((seg << CG_API_TYPE_SEG_SHIFT) | + (index & CG_API_TYPE_SEG_MASK)); +} + +CfreeCgTypeId builtin_id(CfreeCgBuiltinType t) { + return type_id_from_tuple(CG_API_TYPE_BUILTIN_SEG, (u32)t); +} + +static int decode_user_id(CfreeCgTypeId id, u32* index_out) { + u32 seg = id >> CG_API_TYPE_SEG_SHIFT; + u32 off = id & CG_API_TYPE_SEG_MASK; + if (seg < CG_API_TYPE_USER_SEG_BIAS) return 0; + *index_out = + ((seg - CG_API_TYPE_USER_SEG_BIAS) << CG_API_TYPE_SEG_SHIFT) | off; + return 1; +} + +static CfreeCgTypeId user_id_from_index(u32 index) { + u32 raw_seg = index >> CG_API_TYPE_SEG_SHIFT; + u32 off = index & CG_API_TYPE_SEG_MASK; + u32 seg_limit = UINT32_MAX >> CG_API_TYPE_SEG_SHIFT; + if (raw_seg > seg_limit - CG_API_TYPE_USER_SEG_BIAS) { + return CFREE_CG_TYPE_NONE; + } + return type_id_from_tuple(raw_seg + CG_API_TYPE_USER_SEG_BIAS, off); +} + +static CfreeCgTypeId type_id_for_user_index(u32 index) { + return user_id_from_index(index); +} + +static u64 cg_align_to(u64 n, u32 align) { + u64 a = align ? (u64)align : 1u; + return ((n + a - 1u) / a) * a; +} + +static void builtin_cg_type_init(Compiler* c, CgType* out, + CfreeCgBuiltinType t) { + memset(out, 0, sizeof(*out)); + switch (t) { + case CFREE_CG_BUILTIN_VOID: + out->kind = CFREE_CG_TYPE_VOID; + out->align = 1; + break; + case CFREE_CG_BUILTIN_BOOL: + out->kind = CFREE_CG_TYPE_BOOL; + out->size = 1; + out->align = 1; + out->integer.width = 8; + break; + case CFREE_CG_BUILTIN_I8: + out->kind = CFREE_CG_TYPE_INT; + out->size = 1; + out->align = 1; + out->integer.width = 8; + break; + case CFREE_CG_BUILTIN_I16: + out->kind = CFREE_CG_TYPE_INT; + out->size = 2; + out->align = 2; + out->integer.width = 16; + break; + case CFREE_CG_BUILTIN_I32: + out->kind = CFREE_CG_TYPE_INT; + out->size = 4; + out->align = 4; + out->integer.width = 32; + break; + case CFREE_CG_BUILTIN_I64: + out->kind = CFREE_CG_TYPE_INT; + out->size = 8; + out->align = 8; + out->integer.width = 64; + break; + case CFREE_CG_BUILTIN_I128: + out->kind = CFREE_CG_TYPE_INT; + out->size = 16; + out->align = 16; + out->integer.width = 128; + break; + case CFREE_CG_BUILTIN_F32: + out->kind = CFREE_CG_TYPE_FLOAT; + out->size = 4; + out->align = 4; + out->fp.width = 32; + break; + case CFREE_CG_BUILTIN_F64: + out->kind = CFREE_CG_TYPE_FLOAT; + out->size = 8; + out->align = 8; + out->fp.width = 64; + break; + case CFREE_CG_BUILTIN_F128: + out->kind = CFREE_CG_TYPE_FLOAT; + out->size = 16; + out->align = 16; + out->fp.width = 128; + break; + case CFREE_CG_BUILTIN_VARARG_STATE: { + ABITypeInfo info = abi_va_list_info(c->abi); + out->kind = CFREE_CG_TYPE_VARARG_STATE; + out->size = info.size; + out->align = info.align ? info.align : 1; + break; + } + case CFREE_CG_BUILTIN_COUNT: + break; + } +} + +static void cg_api_init_builtins(Compiler* c, CgApiState* s) { + if (s->builtins_init) return; + for (u32 i = 0; i < CFREE_CG_BUILTIN_COUNT; ++i) { + builtin_cg_type_init(c, &s->builtins[i], (CfreeCgBuiltinType)i); + } + s->builtins_init = 1; +} + +static CgApiState* cg_api_get(Compiler* c) { + Heap* h; + CgApiState* s; + if (!c) return NULL; + if (c->cg_api) return (CgApiState*)c->cg_api; + h = (Heap*)c->ctx->heap; + s = (CgApiState*)h->alloc(h, sizeof(*s), _Alignof(CgApiState)); + if (!s) return NULL; + memset(s, 0, sizeof(*s)); + s->heap = h; + CgApiTypes_init(&s->types, h); + c->cg_api = s; + c->cg_api_free = cg_api_fini; + cg_api_init_builtins(c, s); + return s; +} + +static CgApiType* api_type_from_id(Compiler* c, CfreeCgTypeId id); + +const CgType* cg_type_get(Compiler* c, CfreeCgTypeId id) { + u32 seg; + u32 off; + CgApiState* s; + CgApiType* e; + if (!c || id == CFREE_CG_TYPE_NONE) return NULL; + seg = id >> CG_API_TYPE_SEG_SHIFT; + off = id & CG_API_TYPE_SEG_MASK; + if (seg == CG_API_TYPE_BUILTIN_SEG) { + if (off >= CFREE_CG_BUILTIN_COUNT) return NULL; + s = cg_api_get(c); + if (!s) return NULL; + cg_api_init_builtins(c, s); + return &s->builtins[off]; + } + e = api_type_from_id(c, id); + return e ? &e->cg : NULL; +} + +uint64_t cg_type_size(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return ty ? ty->size : 0; +} + +uint32_t cg_type_align(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return ty ? ty->align : 0; +} + +int cg_type_is_int(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { + return cg_type_is_int(c, ty->alias.base); + } + return ty && + (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL || + ty->kind == CFREE_CG_TYPE_ENUM); +} + +int cg_type_is_float(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { + return cg_type_is_float(c, ty->alias.base); + } + return ty && ty->kind == CFREE_CG_TYPE_FLOAT; +} + +int cg_type_is_ptr(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { + return cg_type_is_ptr(c, ty->alias.base); + } + return ty && ty->kind == CFREE_CG_TYPE_PTR; +} + +int cg_type_is_record(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { + return cg_type_is_record(c, ty->alias.base); + } + return ty && ty->kind == CFREE_CG_TYPE_RECORD; +} + +int cg_type_is_void(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) + return cg_type_is_void(c, ty->alias.base); + return ty && ty->kind == CFREE_CG_TYPE_VOID; +} + +int cg_type_is_aggregate(Compiler* c, CfreeCgTypeId id) { + return cg_type_is_record(c, id); +} + +CfreeCgTypeId cg_type_ptr_to(Compiler* c, CfreeCgTypeId pointee) { + return cfree_cg_type_ptr(c, pointee, 0); +} + +CfreeCgTypeId cg_type_pointee(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) + return cg_type_pointee(c, ty->alias.base); + return ty && ty->kind == CFREE_CG_TYPE_PTR ? ty->ptr.pointee + : CFREE_CG_TYPE_NONE; +} + +CfreeCgTypeId cg_type_func_ret_id(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) + return cg_type_func_ret_id(c, ty->alias.base); + return ty && ty->kind == CFREE_CG_TYPE_FUNC ? ty->func.ret + : CFREE_CG_TYPE_NONE; +} + +CfreeCgTypeId cg_type_func_param_id(Compiler* c, CfreeCgTypeId id, u32 index) { + const CgType* ty = cg_type_get(c, id); + if (ty && ty->kind == CFREE_CG_TYPE_ALIAS) + return cg_type_func_param_id(c, ty->alias.base, index); + if (!ty || ty->kind != CFREE_CG_TYPE_FUNC || index >= ty->func.nparams) + return CFREE_CG_TYPE_NONE; + return ty->func.params[index].type; +} + +static CgApiType* type_alloc(Compiler* c, CfreeCgTypeId* id_out) { + CgApiState* s = cg_api_get(c); + CgApiType* e; + u32 index; + if (!s) return NULL; + e = CgApiTypes_push(&s->types, &index); + if (!e) return NULL; + *id_out = user_id_from_index(index); + if (*id_out == CFREE_CG_TYPE_NONE) return NULL; + return e; +} + +static CfreeCgTypeId find_ptr_type_id(Compiler* c, CfreeCgTypeId pointee, + u32 address_space) { + CgApiState* s; + u32 n; + if (!c || !c->cg_api) return CFREE_CG_TYPE_NONE; + s = (CgApiState*)c->cg_api; + n = CgApiTypes_count(&s->types); + for (u32 i = 0; i < n; ++i) { + CgApiType* e = CgApiTypes_at(&s->types, i); + if (e && e->kind == CG_API_TYPE_PTR && e->base == pointee && + e->address_space == address_space) + return type_id_for_user_index(i); + } + return CFREE_CG_TYPE_NONE; +} + +static CfreeCgTypeId find_array_type_id(Compiler* c, CfreeCgTypeId elem, + u64 count) { + CgApiState* s; + u32 n; + if (!c || !c->cg_api) return CFREE_CG_TYPE_NONE; + s = (CgApiState*)c->cg_api; + n = CgApiTypes_count(&s->types); + for (u32 i = 0; i < n; ++i) { + CgApiType* e = CgApiTypes_at(&s->types, i); + if (e && e->kind == CG_API_TYPE_ARRAY && e->base == elem && + e->array_count == count) + return type_id_for_user_index(i); + } + return CFREE_CG_TYPE_NONE; +} + +static int cg_params_eq(const CfreeCgFuncParam* a, const CfreeCgFuncParam* b, + u32 n) { + for (u32 i = 0; i < n; ++i) + if (a[i].type != b[i].type || + memcmp(&a[i].attrs, &b[i].attrs, sizeof(a[i].attrs)) != 0) { + return 0; + } + return 1; +} + +static CfreeCgTypeId find_func_type_id(Compiler* c, CfreeCgFuncSig sig) { + CgApiState* s; + u32 n; + if (!c || !c->cg_api) return CFREE_CG_TYPE_NONE; + s = (CgApiState*)c->cg_api; + n = CgApiTypes_count(&s->types); + for (u32 i = 0; i < n; ++i) { + CgApiType* e = CgApiTypes_at(&s->types, i); + if (!e || e->kind != CG_API_TYPE_FUNC) continue; + if (e->base != sig.ret || e->count != sig.nparams) continue; + if (e->abi_variadic != (sig.abi_variadic != 0)) continue; + if (e->call_conv != sig.call_conv) continue; + if (memcmp(&e->ret_attrs, &sig.ret_attrs, sizeof(e->ret_attrs)) != 0) { + continue; + } + if (sig.nparams && !cg_params_eq(e->params, sig.params, sig.nparams)) { + continue; + } + return type_id_for_user_index(i); + } + return CFREE_CG_TYPE_NONE; +} + +static CgApiType* api_type_from_id(Compiler* c, CfreeCgTypeId id) { + u32 index; + CgApiState* s; + CgApiType* e; + if (!c || id == CFREE_CG_TYPE_NONE) return NULL; + if ((id >> CG_API_TYPE_SEG_SHIFT) == CG_API_TYPE_BUILTIN_SEG) return NULL; + if (!decode_user_id(id, &index)) return NULL; + s = (CgApiState*)c->cg_api; + if (!s) return NULL; + e = CgApiTypes_at(&s->types, index); + return e; +} + +CfreeCgTypeId resolve_type(Compiler* c, CfreeCgTypeId id) { + return cg_type_get(c, id) ? id : CFREE_CG_TYPE_NONE; +} + +CfreeCgTypeId api_unalias_type(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + while (ty && ty->kind == CFREE_CG_TYPE_ALIAS) { + id = ty->alias.base; + ty = cg_type_get(c, id); + } + return ty ? id : CFREE_CG_TYPE_NONE; +} + +static CfreeCgFuncParam* copy_cg_params(Compiler* c, + const CfreeCgFuncParam* src, u32 n) { + CfreeCgFuncParam* dst; + if (!n) return NULL; + if (!src) return NULL; + dst = arena_array(&c->global->arena, CfreeCgFuncParam, n); + if (!dst) return NULL; + memcpy(dst, src, sizeof(*dst) * n); + return dst; +} + +static CgTypeField* copy_cg_fields(Compiler* c, const CfreeCgField* src, + u32 n) { + CgTypeField* dst; + if (!n) return NULL; + if (!src) return NULL; + dst = arena_array(&c->global->arena, CgTypeField, n); + if (!dst) return NULL; + memset(dst, 0, sizeof(*dst) * n); + for (u32 i = 0; i < n; ++i) { + dst[i].name = src[i].name; + dst[i].type = src[i].type; + dst[i].align_override = src[i].align_override; + dst[i].flags = src[i].flags; + dst[i].bit_width = src[i].bit_width; + dst[i].bit_signed = src[i].bit_signed != 0; + } + return dst; +} + +static int cg_type_layout_record(Compiler* c, CgType* cg) { + u32 max_align = 1; + u64 size = 0; + if (!c || !cg || cg->kind != CFREE_CG_TYPE_RECORD) return 0; + if (cg->record.nfields && !cg->record.fields) return 0; + if (cg->record.is_union) { + for (u32 i = 0; i < cg->record.nfields; ++i) { + CgTypeField* f = &cg->record.fields[i]; + u64 fsize = cg_type_size(c, f->type); + u32 falign = cg_type_align(c, f->type); + if (!falign) return 0; + if (f->align_override == 1u) { + falign = 1; + } else if (f->align_override > falign) { + falign = f->align_override; + } + if (falign > max_align) max_align = falign; + if ((f->flags & CFREE_CG_FIELD_BITFIELD) != 0) { + f->offset = 0; + f->bit_offset = 0; + f->bit_storage_size = (u32)fsize; + if (f->bit_width == 0) continue; + } + if (fsize > size) size = fsize; + f->offset = 0; + } + } else { + u64 off = 0; + int active_bitfield_unit = 0; + u64 unit_off = 0; + u32 unit_bits = 0; + u32 unit_size = 0; + u32 next_bit = 0; + for (u32 i = 0; i < cg->record.nfields; ++i) { + CgTypeField* f = &cg->record.fields[i]; + u64 fsize = cg_type_size(c, f->type); + u32 falign = cg_type_align(c, f->type); + if (!falign) return 0; + if (f->align_override == 1u) { + falign = 1; + } else if (f->align_override > falign) { + falign = f->align_override; + } + if (falign > max_align) max_align = falign; + if ((f->flags & CFREE_CG_FIELD_BITFIELD) != 0) { + if (fsize > UINT32_MAX / 8u) return 0; + if (f->bit_width == 0) { + if (active_bitfield_unit) off = unit_off + unit_size; + off = cg_align_to(off, falign); + f->offset = off; + f->bit_offset = 0; + f->bit_storage_size = (u32)fsize; + active_bitfield_unit = 0; + next_bit = 0; + continue; + } + if (f->bit_width > fsize * 8u) return 0; + if (!active_bitfield_unit || unit_size != (u32)fsize || + next_bit + f->bit_width > unit_bits) { + if (active_bitfield_unit) off = unit_off + unit_size; + off = cg_align_to(off, falign); + unit_off = off; + unit_size = (u32)fsize; + unit_bits = unit_size * 8u; + next_bit = 0; + active_bitfield_unit = 1; + } + f->offset = unit_off; + f->bit_offset = (u16)next_bit; + f->bit_storage_size = unit_size; + next_bit += f->bit_width; + off = unit_off + unit_size; + continue; + } + active_bitfield_unit = 0; + off = cg_align_to(off, falign); + f->offset = off; + off += fsize; + } + size = off; + } + if (cg->record.align_override > max_align) { + max_align = cg->record.align_override; + } + cg->align = max_align; + cg->size = cg_align_to(size, max_align); + return 1; +} + +static int cg_type_set_ptr(Compiler* c, CgApiType* e, CfreeCgTypeId pointee, + u32 address_space) { + u32 ptr_size; + u32 ptr_align; + if (!cg_type_get(c, pointee)) return 0; + memset(&e->cg, 0, sizeof(e->cg)); + ptr_size = c->target.ptr_size ? c->target.ptr_size : 8; + ptr_align = c->target.ptr_align ? c->target.ptr_align : ptr_size; + e->cg.kind = CFREE_CG_TYPE_PTR; + e->cg.size = ptr_size; + e->cg.align = ptr_align; + e->cg.ptr.pointee = pointee; + e->cg.ptr.address_space = address_space; + return 1; +} + +static int cg_type_set_array(Compiler* c, CgApiType* e, CfreeCgTypeId elem, + u64 count) { + const CgType* ety = cg_type_get(c, elem); + if (!ety) return 0; + memset(&e->cg, 0, sizeof(e->cg)); + e->cg.kind = CFREE_CG_TYPE_ARRAY; + e->cg.size = ety->size * count; + e->cg.align = ety->align; + e->cg.array.elem = elem; + e->cg.array.count = count; + return 1; +} + +static int cg_type_set_alias(Compiler* c, CgApiType* e, CfreeSym name, + CfreeCgTypeId base) { + const CgType* bty = cg_type_get(c, base); + if (!bty) return 0; + memset(&e->cg, 0, sizeof(e->cg)); + e->cg.kind = CFREE_CG_TYPE_ALIAS; + e->cg.size = bty->size; + e->cg.align = bty->align; + e->cg.alias.name = name; + e->cg.alias.base = base; + return 1; +} + +static int cg_type_set_record(Compiler* c, CgApiType* e, CfreeSym tag, + const CfreeCgField* fields, u32 nfields, + int is_union, u32 align_override, u32 flags) { + CgTypeField* copied = copy_cg_fields(c, fields, nfields); + if (nfields && !copied) return 0; + memset(&e->cg, 0, sizeof(e->cg)); + e->cg.kind = CFREE_CG_TYPE_RECORD; + e->cg.record.tag = tag; + e->cg.record.fields = copied; + e->cg.record.nfields = nfields; + e->cg.record.is_union = is_union != 0; + e->cg.record.align_override = align_override; + e->cg.record.flags = flags; + return cg_type_layout_record(c, &e->cg); +} + +static int cg_type_set_enum(Compiler* c, CgApiType* e, CfreeSym tag, + CfreeCgTypeId base, CfreeCgEnumValue* values, + u32 nvalues) { + const CgType* bty; + if (base == CFREE_CG_TYPE_NONE) base = builtin_id(CFREE_CG_BUILTIN_I32); + bty = cg_type_get(c, base); + if (!bty || + !(bty->kind == CFREE_CG_TYPE_INT || bty->kind == CFREE_CG_TYPE_BOOL)) { + return 0; + } + memset(&e->cg, 0, sizeof(e->cg)); + e->cg.kind = CFREE_CG_TYPE_ENUM; + e->cg.size = bty->size; + e->cg.align = bty->align; + e->cg.enum_.tag = tag; + e->cg.enum_.base = base; + e->cg.enum_.values = values; + e->cg.enum_.nvalues = nvalues; + return 1; +} + +static int cg_type_set_func(Compiler* c, CgApiType* e, CfreeCgFuncSig sig, + CfreeCgFuncParam* params) { + if (!cg_type_get(c, sig.ret)) return 0; + for (u32 i = 0; i < sig.nparams; ++i) { + if (!cg_type_get(c, sig.params[i].type)) return 0; + } + memset(&e->cg, 0, sizeof(e->cg)); + e->cg.kind = CFREE_CG_TYPE_FUNC; + e->cg.size = 1; + e->cg.align = 1; + e->cg.func.ret = sig.ret; + e->cg.func.params = params; + e->cg.func.nparams = sig.nparams; + e->cg.func.call_conv = sig.call_conv; + e->cg.func.abi_variadic = sig.abi_variadic != 0; + e->cg.func.ret_attrs = sig.ret_attrs; + return 1; +} + +CfreeCgBuiltinTypes cfree_cg_builtin_types(CfreeCompiler* c) { + CfreeCgBuiltinTypes out; + (void)c; + memset(&out, 0, sizeof(out)); + for (u32 i = 0; i < CFREE_CG_BUILTIN_COUNT; ++i) { + out.id[i] = builtin_id((CfreeCgBuiltinType)i); + } + return out; +} + +CfreeCgTypeId cfree_cg_type_ptr(CfreeCompiler* c, CfreeCgTypeId pointee, + uint32_t address_space) { + CfreeCgTypeId id; + CgApiType* e; + if (!cg_type_get(c, pointee)) return CFREE_CG_TYPE_NONE; + id = find_ptr_type_id(c, pointee, address_space); + if (id != CFREE_CG_TYPE_NONE) return id; + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->base = pointee; + e->address_space = address_space; + e->kind = CG_API_TYPE_PTR; + if (!cg_type_set_ptr(c, e, pointee, address_space)) { + return CFREE_CG_TYPE_NONE; + } + return id; +} + +CfreeCgTypeId cfree_cg_type_array(CfreeCompiler* c, CfreeCgTypeId elem, + uint64_t count) { + CfreeCgTypeId id; + CgApiType* e; + if (!cg_type_get(c, elem) || count > UINT32_MAX) return CFREE_CG_TYPE_NONE; + id = find_array_type_id(c, elem, count); + if (id != CFREE_CG_TYPE_NONE) return id; + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->base = elem; + e->array_count = count; + e->kind = CG_API_TYPE_ARRAY; + if (!cg_type_set_array(c, e, elem, count)) { + return CFREE_CG_TYPE_NONE; + } + return id; +} + +CfreeCgTypeId cfree_cg_type_alias(CfreeCompiler* c, CfreeSym name, + CfreeCgTypeId base) { + CfreeCgTypeId id; + CgApiType* e; + if (!cg_type_get(c, base)) return CFREE_CG_TYPE_NONE; + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->base = base; + e->name = name; + e->kind = CG_API_TYPE_ALIAS; + return cg_type_set_alias(c, e, name, base) ? id : CFREE_CG_TYPE_NONE; +} + +CfreeCgTypeId cfree_cg_type_record(CfreeCompiler* c, CfreeSym tag, + const CfreeCgField* fields, + uint32_t nfields) { + CfreeCgRecordDesc desc; + memset(&desc, 0, sizeof desc); + desc.tag = tag; + desc.fields = fields; + desc.nfields = nfields; + return cfree_cg_type_record_ex(c, &desc); +} + +CfreeCgTypeId cfree_cg_type_record_ex(CfreeCompiler* c, + const CfreeCgRecordDesc* desc) { + CfreeCgTypeId id; + CgApiType* e; + CfreeCgField* copied = NULL; + if (!c || !desc || (desc->nfields && !desc->fields) || + desc->nfields > UINT16_MAX) { + return CFREE_CG_TYPE_NONE; + } + if (desc->nfields) { + copied = arena_array(&c->global->arena, CfreeCgField, desc->nfields); + if (!copied) return CFREE_CG_TYPE_NONE; + } + + for (u32 i = 0; i < desc->nfields; ++i) { + if (!cg_type_get(c, desc->fields[i].type)) return CFREE_CG_TYPE_NONE; + copied[i] = desc->fields[i]; + } + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->name = desc->tag; + e->count = desc->nfields; + e->fields = copied; + e->kind = CG_API_TYPE_RECORD; + if (!cg_type_set_record(c, e, desc->tag, desc->fields, desc->nfields, + desc->is_union, desc->align_override, 0)) { + return CFREE_CG_TYPE_NONE; + } + return id; +} + +CfreeCgTypeId cfree_cg_type_enum(CfreeCompiler* c, CfreeSym tag, + CfreeCgTypeId base, + const CfreeCgEnumValue* values, + uint32_t nvalues) { + CfreeCgEnumValue* copied = NULL; + CfreeCgTypeId id; + CgApiType* e; + if (!c || (nvalues && !values)) return CFREE_CG_TYPE_NONE; + if (base == CFREE_CG_TYPE_NONE) base = builtin_id(CFREE_CG_BUILTIN_I32); + if (!cg_type_is_int(c, base)) return CFREE_CG_TYPE_NONE; + if (nvalues) { + copied = arena_array(&c->global->arena, CfreeCgEnumValue, nvalues); + if (!copied) return CFREE_CG_TYPE_NONE; + memcpy(copied, values, sizeof(*copied) * nvalues); + } + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->base = base; + e->name = tag; + e->count = nvalues; + e->values = copied; + e->kind = CG_API_TYPE_ENUM; + if (!cg_type_set_enum(c, e, tag, base, copied, nvalues)) { + return CFREE_CG_TYPE_NONE; + } + return id; +} + +CfreeCgTypeId cfree_cg_type_func(CfreeCompiler* c, CfreeCgFuncSig sig) { + CfreeCgFuncParam* copied = NULL; + CfreeCgTypeId id; + CgApiType* e; + if (!c || !cg_type_get(c, sig.ret) || (sig.nparams && !sig.params) || + sig.nparams > UINT16_MAX) { + return CFREE_CG_TYPE_NONE; + } + id = find_func_type_id(c, sig); + if (id != CFREE_CG_TYPE_NONE) return id; + if (sig.nparams) { + copied = copy_cg_params(c, sig.params, sig.nparams); + if (!copied) return CFREE_CG_TYPE_NONE; + for (u32 i = 0; i < sig.nparams; ++i) { + if (!cg_type_get(c, sig.params[i].type)) return CFREE_CG_TYPE_NONE; + } + } + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->base = sig.ret; + e->count = sig.nparams; + e->params = copied; + e->ret_attrs = sig.ret_attrs; + e->call_conv = sig.call_conv; + e->abi_variadic = sig.abi_variadic != 0; + e->kind = CG_API_TYPE_FUNC; + if (!cg_type_set_func(c, e, sig, copied)) { + return CFREE_CG_TYPE_NONE; + } + return id; +} + +uint64_t cfree_cg_type_size(CfreeCompiler* c, CfreeCgTypeId id) { + return cg_type_size(c, id); +} + +uint32_t cfree_cg_type_align(CfreeCompiler* c, CfreeCgTypeId id) { + return cg_type_align(c, id); +} + +CfreeCgTypeKind cfree_cg_type_kind(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return ty ? ty->kind : CFREE_CG_TYPE_VOID; +} + +uint32_t cfree_cg_type_int_width(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) { + return ty->integer.width; + } + if (ty->kind == CFREE_CG_TYPE_ENUM) { + return (uint32_t)ty->size * 8u; + } + if (ty->kind == CFREE_CG_TYPE_ALIAS) { + return cfree_cg_type_int_width(c, ty->alias.base); + } + return 0; +} + +uint32_t cfree_cg_type_float_width(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_FLOAT) return ty->fp.width; + if (ty->kind == CFREE_CG_TYPE_ALIAS) { + return cfree_cg_type_float_width(c, ty->alias.base); + } + return 0; +} + +CfreeCgTypeId cfree_cg_type_ptr_pointee(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_PTR) ? ty->ptr.pointee + : CFREE_CG_TYPE_NONE; +} + +CfreeCgTypeId cfree_cg_type_array_elem(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_ARRAY) ? ty->array.elem + : CFREE_CG_TYPE_NONE; +} + +uint32_t cfree_cg_type_ptr_address_space(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_PTR) ? ty->ptr.address_space : 0u; +} + +uint64_t cfree_cg_type_array_count(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_ARRAY) ? ty->array.count : 0u; +} + +CfreeCgTypeId cfree_cg_type_func_ret(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.ret + : CFREE_CG_TYPE_NONE; +} + +uint32_t cfree_cg_type_func_nparams(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.nparams : 0; +} + +CfreeCgAbiAttrs cfree_cg_type_func_ret_attrs(CfreeCompiler* c, + CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + CfreeCgAbiAttrs empty; + memset(&empty, 0, sizeof(empty)); + return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.ret_attrs : empty; +} + +CfreeCgFuncParam cfree_cg_type_func_param(CfreeCompiler* c, CfreeCgTypeId id, + uint32_t index) { + const CgType* ty = cg_type_get(c, id); + CfreeCgFuncParam empty; + memset(&empty, 0, sizeof(empty)); + if (!ty || ty->kind != CFREE_CG_TYPE_FUNC || index >= ty->func.nparams) { + return empty; + } + return ty->func.params[index]; +} + +CfreeCgCallConv cfree_cg_type_func_call_conv(CfreeCompiler* c, + CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_FUNC) ? ty->func.call_conv + : CFREE_CG_CC_TARGET_C; +} + +int cfree_cg_type_func_is_variadic(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return ty && ty->kind == CFREE_CG_TYPE_FUNC && ty->func.abi_variadic; +} + +uint32_t cfree_cg_type_record_nfields(CfreeCompiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + return (ty && ty->kind == CFREE_CG_TYPE_RECORD) ? ty->record.nfields : 0; +} + +CfreeStatus cfree_cg_type_record_field(CfreeCompiler* c, CfreeCgTypeId id, + uint32_t index, CfreeCgField* out, + uint64_t* offset_out) { + const CgType* ty = cg_type_get(c, id); + const CgTypeField* f; + if (!ty || ty->kind != CFREE_CG_TYPE_RECORD || index >= ty->record.nfields) { + return CFREE_NOT_FOUND; + } + f = &ty->record.fields[index]; + if (out) { + out->name = f->name; + out->type = f->type; + out->align_override = f->align_override; + out->flags = f->flags; + out->bit_width = f->bit_width; + out->bit_offset = f->bit_offset; + out->bit_storage_size = f->bit_storage_size; + out->bit_signed = f->bit_signed; + } + if (offset_out) *offset_out = f->offset; + return CFREE_OK; +} + +int cfree_cg_target_supports_call_conv(CfreeCompiler* c, CfreeCgCallConv cc) { + if (!c) return 0; + switch (cc) { + case CFREE_CG_CC_TARGET_C: + return 1; + case CFREE_CG_CC_SYSV: + return c->target.arch == CFREE_ARCH_X86_64 && + c->target.os != CFREE_OS_WINDOWS; + case CFREE_CG_CC_WIN64: + return c->target.arch == CFREE_ARCH_X86_64 && + c->target.os == CFREE_OS_WINDOWS; + case CFREE_CG_CC_AAPCS: + return c->target.arch == CFREE_ARCH_ARM_32 || + c->target.arch == CFREE_ARCH_ARM_64; + case CFREE_CG_CC_WASM: + return c->target.arch == CFREE_ARCH_WASM; + case CFREE_CG_CC_INTERRUPT: + return 0; + } + return 0; +} + +int cfree_cg_target_supports_symbol_feature(CfreeCompiler* c, + CfreeCgSymbolFeature feat) { + if (!c) return 0; + switch (feat) { + case CFREE_CG_SYMFEAT_WEAK: + case CFREE_CG_SYMFEAT_PROTECTED_VISIBILITY: + case CFREE_CG_SYMFEAT_COMDAT: + case CFREE_CG_SYMFEAT_COMMON: + return 1; + case CFREE_CG_SYMFEAT_TLS_LOCAL_EXEC: + case CFREE_CG_SYMFEAT_TLS_INITIAL_EXEC: + case CFREE_CG_SYMFEAT_TLS_LOCAL_DYNAMIC: + case CFREE_CG_SYMFEAT_TLS_GENERAL_DYNAMIC: + return c->target.obj == CFREE_OBJ_ELF || c->target.obj == CFREE_OBJ_MACHO; + case CFREE_CG_SYMFEAT_DLLIMPORT: + case CFREE_CG_SYMFEAT_DLLEXPORT: + case CFREE_CG_SYMFEAT_MERGE_SECTIONS: + case CFREE_CG_SYMFEAT_CONSTRUCTOR_PRIORITY: + return 0; + } + return 0; +} + +uint64_t cfree_cg_target_backend_features(CfreeCompiler* c) { + uint64_t out = 0; + if (!c) return 0; + if (c->target.arch == CFREE_ARCH_X86_64 || + c->target.arch == CFREE_ARCH_X86_32) { + out |= CFREE_CG_BACKEND_UNALIGNED_MEMORY; + out |= CFREE_CG_BACKEND_RED_ZONE; + out |= CFREE_CG_BACKEND_SIMD; + } else { + out |= CFREE_CG_BACKEND_STRICT_ALIGNMENT; + } + return out; +} + +void cg_api_fini(Compiler* c) { + CgApiState* s; + if (!c || !c->cg_api) return; + s = (CgApiState*)c->cg_api; + CgApiTypes_fini(&s->types); + s->heap->free(s->heap, s, sizeof(*s)); + c->cg_api = NULL; + c->cg_api_free = NULL; +} + +/* ============================================================ + * CfreeCg: public codegen API implementation + * + * Drives CGTarget directly with its own value stack. + * ============================================================ */ diff --git a/src/cg/type.h b/src/cg/type.h @@ -0,0 +1,95 @@ +#ifndef CFREE_CG_TYPE_H +#define CFREE_CG_TYPE_H + +#include <cfree/cg.h> + +#include "core/core.h" + +typedef struct CgTypeField { + CfreeSym name; + CfreeCgTypeId type; + u64 offset; + u32 align_override; + u32 flags; + u16 bit_width; + u16 bit_offset; + u32 bit_storage_size; + int bit_signed; +} CgTypeField; + +typedef struct CgType { + CfreeCgTypeKind kind; + u64 size; + u32 align; + u32 pad; + union { + struct { + u32 width; + } integer; + struct { + u32 width; + } fp; + struct { + CfreeCgTypeId pointee; + u32 address_space; + } ptr; + struct { + CfreeCgTypeId elem; + u64 count; + } array; + struct { + CfreeCgTypeId ret; + CfreeCgFuncParam* params; + u32 nparams; + CfreeCgCallConv call_conv; + int abi_variadic; + CfreeCgAbiAttrs ret_attrs; + } func; + struct { + CfreeSym tag; + CgTypeField* fields; + u32 nfields; + int is_union; + u32 align_override; + u32 flags; + } record; + struct { + CfreeSym tag; + CfreeCgTypeId base; + CfreeCgEnumValue* values; + u32 nvalues; + } enum_; + struct { + CfreeSym name; + CfreeCgTypeId base; + } alias; + }; +} CgType; + +const CgType* cg_type_get(Compiler*, CfreeCgTypeId); +uint64_t cg_type_size(Compiler*, CfreeCgTypeId); +uint32_t cg_type_align(Compiler*, CfreeCgTypeId); +int cg_type_is_int(Compiler*, CfreeCgTypeId); +int cg_type_is_float(Compiler*, CfreeCgTypeId); +int cg_type_is_ptr(Compiler*, CfreeCgTypeId); +int cg_type_is_record(Compiler*, CfreeCgTypeId); + +enum { + CG_API_TYPE_SEG_SHIFT = 6, + CG_API_TYPE_SEG_SIZE = 1u << CG_API_TYPE_SEG_SHIFT, + CG_API_TYPE_SEG_MASK = CG_API_TYPE_SEG_SIZE - 1u, + CG_API_TYPE_BUILTIN_SEG = 1u, + CG_API_TYPE_USER_SEG_BIAS = 2u, +}; + +CfreeCgTypeId builtin_id(CfreeCgBuiltinType); +CfreeCgTypeId resolve_type(Compiler*, CfreeCgTypeId); +CfreeCgTypeId api_unalias_type(Compiler*, CfreeCgTypeId); +int cg_type_is_void(Compiler*, CfreeCgTypeId); +int cg_type_is_aggregate(Compiler*, CfreeCgTypeId); +CfreeCgTypeId cg_type_ptr_to(Compiler*, CfreeCgTypeId); +CfreeCgTypeId cg_type_pointee(Compiler*, CfreeCgTypeId); +CfreeCgTypeId cg_type_func_ret_id(Compiler*, CfreeCgTypeId); +CfreeCgTypeId cg_type_func_param_id(Compiler*, CfreeCgTypeId, uint32_t); + +#endif diff --git a/src/cg/value.c b/src/cg/value.c @@ -0,0 +1,1425 @@ +#include "cg/internal.h" + +u8 api_type_class(CfreeCgTypeId ty) { + if (ty == builtin_id(CFREE_CG_BUILTIN_F32) || + ty == builtin_id(CFREE_CG_BUILTIN_F64) || + ty == builtin_id(CFREE_CG_BUILTIN_F128)) { + return RC_FP; + } + return RC_INT; +} + +int api_is_f128_type(Compiler* c, CfreeCgTypeId ty) { + const CgType* cg; + ty = api_unalias_type(c, ty); + cg = cg_type_get(c, ty); + return cg && cg->kind == CFREE_CG_TYPE_FLOAT && cg->fp.width == 128; +} + +int api_is_i128_type(Compiler* c, CfreeCgTypeId ty) { + const CgType* cg; + ty = api_unalias_type(c, ty); + cg = cg_type_get(c, ty); + return cg && cg->kind == CFREE_CG_TYPE_INT && cg->integer.width == 128; +} + +int api_is_wide16_scalar_type(Compiler* c, CfreeCgTypeId ty) { + return api_is_f128_type(c, ty) || api_is_i128_type(c, ty); +} + +/* Whether a CGABIValue.storage for `ty` must be an address operand (pointing + * to a memory image of the value) rather than a value operand. Today this is + * driven by the type shape — aggregates and wide16 scalars cannot fit in a + * single Operand. A future refactor will key this off ABIArgInfo so a + * trivial-DIRECT ABI (e.g. for a C-source backend) can keep aggregates as + * value operands. See doc/CBACKEND.md. */ +int api_arg_storage_must_be_addr(Compiler* c, CfreeCgTypeId ty) { + return cg_type_is_aggregate(c, ty) || api_is_wide16_scalar_type(c, ty); +} + +Operand api_op_imm(i64 v, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_IMM; + o.cls = api_type_class(ty); + o.type = ty; + o.v.imm = v; + return o; +} + +Operand api_op_reg(Reg r, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_REG; + o.cls = api_type_class(ty); + o.type = ty; + o.v.reg = r; + return o; +} + +Operand api_op_local(FrameSlot s, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_LOCAL; + o.cls = RC_INT; + o.type = ty; + o.v.frame_slot = s; + return o; +} + +Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_GLOBAL; + o.cls = RC_INT; + o.type = ty; + o.v.global.sym = sym; + o.v.global.addend = addend; + return o; +} + +Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_INDIRECT; + o.cls = RC_INT; + o.type = ty; + o.v.ind.base = base; + o.v.ind.ofs = ofs; + return o; +} + +u8 api_residency_for(const Operand* o) { + if (o->kind == OPK_REG || o->kind == OPK_INDIRECT) return RES_REG; + return RES_INHERENT; +} + +ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_OPERAND; + sv.op = op; + sv.type = ty; + sv.res = api_residency_for(&op); + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty) { + ApiSValue sv = api_make_sv(op, ty); + sv.lvalue = 1; + return sv; +} + +ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, + int a_owned, int b_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_CMP; + sv.type = result_ty; + sv.delayed.cmp.op = op; + sv.delayed.cmp.a = a; + sv.delayed.cmp.b = b; + sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; + sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, + int a_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.delayed.arith.kind = API_DELAYED_UNOP; + sv.type = ty; + sv.delayed.arith.un_op = op; + sv.delayed.arith.a = a; + sv.delayed.arith.a_owned = a_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, + int a_owned, int b_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.delayed.arith.kind = API_DELAYED_BINOP; + sv.type = ty; + sv.delayed.arith.bin_op = op; + sv.delayed.arith.a = a; + sv.delayed.arith.b = b; + sv.delayed.arith.a_owned = a_owned ? 1u : 0u; + sv.delayed.arith.b_owned = b_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, + int owned) { + ApiSValue sv = api_make_sv(op, ty); + if (op.kind == OPK_REG && !owned) sv.res = RES_FIXED_REG; + return sv; +} + +CfreeCgTypeId api_sv_type(const ApiSValue* sv) { + return sv->type ? sv->type : sv->op.type; +} + +int api_operand_can_address(const Operand* o) { + return o->kind == OPK_LOCAL || o->kind == OPK_GLOBAL || + o->kind == OPK_INDIRECT; +} + +int api_sv_op_is(const ApiSValue* sv, OpKind kind) { + return sv->kind == SV_OPERAND && sv->op.kind == kind; +} + +int api_sv_op_is_reg_or_imm(const ApiSValue* sv) { + return sv->kind == SV_OPERAND && + (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG); +} + +int api_is_lvalue_sv(const ApiSValue* sv) { + return sv->lvalue && + (sv->bitfield_lvalue || api_operand_can_address(&sv->op) || + (sv->source_local != CFREE_CG_LOCAL_NONE && sv->op.kind == OPK_REG)); +} + +void api_stack_grow(CfreeCg* g, u32 want) { + Heap* h = g->c->ctx->heap; + u32 cap = g->cap; + ApiSValue* nb; + if (cap >= want) return; + while (cap < want) cap = cap ? cap * 2u : API_CG_STACK_INITIAL; + nb = (ApiSValue*)h->alloc(h, sizeof(ApiSValue) * cap, _Alignof(ApiSValue)); + if (g->stack) { + memcpy(nb, g->stack, sizeof(ApiSValue) * g->sp); + h->free(h, g->stack, sizeof(ApiSValue) * g->cap); + } + g->stack = nb; + g->cap = cap; +} + +void api_push(CfreeCg* g, ApiSValue v) { + api_stack_grow(g, g->sp + 1); + g->stack[g->sp++] = v; +} + +ApiSValue api_pop(CfreeCg* g) { + if (g->sp == 0) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: stack underflow"); + } + return g->stack[--g->sp]; +} + +/* ---- register class helpers ---- */ + +u8 api_class_of_sv(const ApiSValue* sv) { + if (sv->kind == SV_CMP || sv->kind == SV_ARITH) return RC_INT; + if (sv->op.kind == OPK_INDIRECT) return RC_INT; + if (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG) return sv->op.cls; + return api_type_class(api_sv_type(sv)); +} + +Reg api_reg_of_sv(const ApiSValue* sv) { + if (sv->kind == SV_ARITH || sv->kind == SV_CMP) return (Reg)REG_NONE; + if (sv->op.kind == OPK_REG) return sv->op.v.reg; + if (sv->op.kind == OPK_INDIRECT) return sv->op.v.ind.base; + return (Reg)REG_NONE; +} + +void api_set_owned_reg(ApiSValue* sv, Reg r) { + if (sv->op.kind == OPK_REG) + sv->op.v.reg = r; + else if (sv->op.kind == OPK_INDIRECT) + sv->op.v.ind.base = r; +} + +CfreeCgTypeId api_owned_reg_type(CfreeCg* g, const ApiSValue* sv) { + if (sv->op.kind == OPK_INDIRECT) { + CfreeCgTypeId base = + sv->type ? sv->type : builtin_id(CFREE_CG_BUILTIN_VOID); + return cg_type_ptr_to(g->c, base); + } + return api_sv_type(sv); +} + +/* ---- spill slot management ---- */ + +void api_take_spill_slot_alloc(CfreeCg* g, u8 cls, FrameSlot* out) { + CGTarget* T = g->target; + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.kind = FS_SPILL; + fsd.size = (cls == RC_FP) ? 16 : 8; + fsd.align = fsd.size; + *out = T->frame_slot(T, &fsd); +} + +FrameSlot api_take_spill_slot(CfreeCg* g, u8 cls) { + if (cls < 3 && g->slot_pools[cls].n > 0) { + return g->slot_pools[cls].free[--g->slot_pools[cls].n]; + } + FrameSlot s; + api_take_spill_slot_alloc(g, cls, &s); + return s; +} + +void api_return_spill_slot(CfreeCg* g, FrameSlot s, u8 cls) { + Heap* h; + if (s == FRAME_SLOT_NONE) return; + if (cls >= 3) return; + h = g->c->ctx->heap; + if (g->slot_pools[cls].n >= g->slot_pools[cls].cap) { + u32 new_cap = g->slot_pools[cls].cap ? g->slot_pools[cls].cap * 2 : 8; + FrameSlot* nb = (FrameSlot*)h->alloc(h, sizeof(FrameSlot) * new_cap, + _Alignof(FrameSlot)); + if (g->slot_pools[cls].free) { + memcpy(nb, g->slot_pools[cls].free, + sizeof(FrameSlot) * g->slot_pools[cls].n); + h->free(h, g->slot_pools[cls].free, + sizeof(FrameSlot) * g->slot_pools[cls].cap); + } + g->slot_pools[cls].free = nb; + g->slot_pools[cls].cap = new_cap; + } + g->slot_pools[cls].free[g->slot_pools[cls].n++] = s; +} + +/* ---- register allocation / spill ---- */ + +ApiSValue* api_pick_victim(CfreeCg* g, u8 cls) { + for (u32 i = 0; i < g->sp; ++i) { + ApiSValue* sv = &g->stack[i]; + if (sv->res != RES_REG || sv->pinned) continue; + if (api_class_of_sv(sv) != cls) continue; + return sv; + } + return NULL; +} + +MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv); +u8 api_type_class(CfreeCgTypeId ty); + +void api_regalloc_begin(CfreeCg* g) { + CGTarget* T = g->target; + if (T->virtual_regs) { + cg_simple_regalloc_init_virtual(&g->regalloc); + return; + } + cg_simple_regalloc_init(&g->regalloc); + for (u32 c = 0; c < 3u; ++c) { + const Reg* regs = NULL; + u32 nregs = 0; + if (T->get_allocable_regs) + T->get_allocable_regs(T, (RegClass)c, &regs, &nregs); + if (regs && nregs) + cg_simple_regalloc_set_ordered(&g->regalloc, (RegClass)c, regs, nregs); + } +} + +void api_regalloc_finish(CfreeCg* g) { + if (cg_simple_regalloc_is_virtual(&g->regalloc)) return; + if (!g->target->reserve_hard_regs) return; + for (u32 c = 0; c < 3u; ++c) { + Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; + u32 nused = cg_simple_regalloc_used_regs(&g->regalloc, (RegClass)c, used, + CG_SIMPLE_REGALLOC_MAX_REGS); + if (nused) + g->target->reserve_hard_regs(g->target, (RegClass)c, used, nused); + } +} + +Reg api_alloc_reg(CfreeCg* g, u8 cls) { + Reg r = cg_simple_regalloc_alloc(&g->regalloc, (RegClass)cls); + if (r == (Reg)REG_NONE && cg_simple_regalloc_is_virtual(&g->regalloc)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: virtual regalloc exhausted"); + } + return r; +} + +void api_free_reg(CfreeCg* g, Reg r, u8 cls) { + int rc; + if (r == (Reg)REG_NONE) return; + rc = cg_simple_regalloc_free(&g->regalloc, (RegClass)cls, r); + if (rc == 1) return; + if (rc == -1) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - reg %u already free in class %u", + (unsigned)r, (unsigned)cls); + } + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - reg %u not in class %u pool", (unsigned)r, + (unsigned)cls); +} + +int api_spill_avs_victim(CfreeCg* g, u8 cls) { + CGTarget* T = g->target; + if (!g->avs_in_flight) return 0; + for (u32 i = 0; i < g->avs_in_flight_n; ++i) { + CGABIValue* av = &g->avs_in_flight[i]; + if (av->storage.kind != OPK_REG) continue; + if (av->storage.cls != cls) continue; + FrameSlot slot = api_take_spill_slot(g, cls); + ApiSValue tmp = api_make_sv(av->storage, av->type); + T->spill_reg(T, av->storage, slot, api_mem_for_spill(g, &tmp)); + api_free_reg(g, av->storage.v.reg, cls); + Operand local = api_op_local(slot, av->type); + local.cls = cls; + av->storage = local; + return 1; + } + return 0; +} + +MemAccess api_mem_for_lvalue(CfreeCg* g, const Operand* lv, CfreeCgTypeId ty) { + MemAccess m; + memset(&m, 0, sizeof m); + m.type = ty; + m.size = ty ? abi_cg_sizeof(g->c->abi, ty) : 0; + m.align = ty ? abi_cg_alignof(g->c->abi, ty) : 0; + m.flags = MF_NONE; + if (lv->kind == OPK_LOCAL) { + m.alias.kind = (u8)ALIAS_LOCAL; + m.alias.v.local_id = (i32)lv->v.frame_slot; + } else if (lv->kind == OPK_GLOBAL) { + m.alias.kind = (u8)ALIAS_GLOBAL; + } else { + m.alias.kind = (u8)ALIAS_UNKNOWN; + } + return m; +} + +MemAccess api_mem_from_access(CfreeCg* g, const Operand* lv, + CfreeCgMemAccess access) { + CfreeCgTypeId ty = resolve_type(g->c, access.type); + MemAccess m = api_mem_for_lvalue(g, lv, ty); + if (access.align) m.align = access.align; + m.addr_space = (u16)access.address_space; + if (access.flags & CFREE_CG_MEM_VOLATILE) m.flags |= MF_VOLATILE; + if (!access.align || (ty && access.align < abi_cg_alignof(g->c->abi, ty))) { + m.flags |= MF_UNALIGNED; + } + return m; +} + +CfreeCgTypeId api_mem_access_type(CfreeCg* g, CfreeCgMemAccess access, + CfreeCgTypeId fallback, const char* who) { + CfreeCgTypeId ty = resolve_type(g->c, access.type); + if (!ty) ty = resolve_type(g->c, fallback); + if (!ty) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: %s has no value type", who); + } + return ty; +} + +u32 api_mem_type_size(CfreeCg* g, CfreeCgTypeId ty, const char* who) { + ty = resolve_type(g->c, ty); + if (!ty) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: %s has invalid type", who); + } + if (cg_type_is_void(g->c, ty)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: %s uses void type", who); + } + return abi_cg_sizeof(g->c->abi, ty); +} + +void api_require_scalar_mem_type(CfreeCg* g, const char* who, + CfreeCgTypeId ty) { + if (cg_type_is_aggregate(g->c, ty)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: %s cannot use aggregate value type (size %u); " + "copy fields or use byte memory operations", + who, (unsigned)api_mem_type_size(g, ty, who)); + } + (void)api_mem_type_size(g, ty, who); +} + +void api_require_pointer_value(CfreeCg* g, const char* who, CfreeCgTypeId ty) { + if (!cg_type_pointee(g->c, ty)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: %s operand must be a pointer", + who); + } +} + +void api_validate_memory_value(CfreeCg* g, const char* who, + CfreeCgTypeId access_ty, + CfreeCgTypeId value_ty) { + u32 access_size; + u32 value_size; + access_ty = resolve_type(g->c, access_ty); + value_ty = resolve_type(g->c, value_ty); + api_require_scalar_mem_type(g, who, access_ty); + if (!value_ty) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: %s value has no type", who); + } + if (cg_type_is_aggregate(g->c, value_ty)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: %s value is aggregate (size %u); copy fields or " + "use byte memory operations", + who, (unsigned)api_mem_type_size(g, value_ty, who)); + } + access_size = api_mem_type_size(g, access_ty, who); + value_size = api_mem_type_size(g, value_ty, who); + if (access_size != value_size || + api_type_class(access_ty) != api_type_class(value_ty)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: %s value type/size mismatch: access size %u, " + "value size %u", + who, (unsigned)access_size, (unsigned)value_size); + } +} + +MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv) { + CfreeCgTypeId ty = api_owned_reg_type(g, sv); + MemAccess m; + memset(&m, 0, sizeof m); + m.type = ty; + m.size = ty ? abi_cg_sizeof(g->c->abi, ty) : 8; + m.align = ty ? abi_cg_alignof(g->c->abi, ty) : 8; + m.alias.kind = (u8)ALIAS_UNKNOWN; + return m; +} + +void api_release_operand_reg(CfreeCg* g, Operand op) { + if (op.kind == OPK_REG) api_free_reg(g, op.v.reg, op.cls); +} + +int api_sv_owns_operand_reg(const ApiSValue* sv, const Operand* op) { + return sv->res == RES_REG && op->kind == OPK_REG && sv->op.kind == OPK_REG && + sv->op.v.reg == op->v.reg && sv->op.cls == op->cls; +} + +void api_release_cmp(CfreeCg* g, ApiSValue* sv) { + if (sv->delayed.cmp.a_owned) api_release_operand_reg(g, sv->delayed.cmp.a); + if (sv->delayed.cmp.b_owned && + (sv->delayed.cmp.b.kind != OPK_REG || sv->delayed.cmp.a.kind != OPK_REG || + sv->delayed.cmp.b.v.reg != sv->delayed.cmp.a.v.reg || + sv->delayed.cmp.b.cls != sv->delayed.cmp.a.cls || + !sv->delayed.cmp.a_owned)) { + api_release_operand_reg(g, sv->delayed.cmp.b); + } + memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); + memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); + sv->delayed.cmp.a_owned = 0; + sv->delayed.cmp.b_owned = 0; + sv->kind = SV_OPERAND; +} + +void api_release_arith(CfreeCg* g, ApiSValue* sv) { + if (sv->delayed.arith.a_owned) + api_release_operand_reg(g, sv->delayed.arith.a); + if (sv->delayed.arith.b_owned && + (sv->delayed.arith.b.kind != OPK_REG || + sv->delayed.arith.a.kind != OPK_REG || + sv->delayed.arith.b.v.reg != sv->delayed.arith.a.v.reg || + sv->delayed.arith.b.cls != sv->delayed.arith.a.cls || + !sv->delayed.arith.a_owned)) { + api_release_operand_reg(g, sv->delayed.arith.b); + } + memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); + memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); + sv->delayed.arith.a_owned = 0; + sv->delayed.arith.b_owned = 0; + sv->kind = SV_OPERAND; +} + +void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst) { + g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, + sv->delayed.cmp.b); + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && + (sv->delayed.cmp.a.v.reg != dst.v.reg || + sv->delayed.cmp.a.cls != dst.cls)) { + api_release_operand_reg(g, sv->delayed.cmp.a); + } + if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && + (sv->delayed.cmp.b.v.reg != dst.v.reg || + sv->delayed.cmp.b.cls != dst.cls)) { + api_release_operand_reg(g, sv->delayed.cmp.b); + } + memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); + memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); + sv->delayed.cmp.a_owned = 0; + sv->delayed.cmp.b_owned = 0; + sv->kind = SV_OPERAND; + sv->op = dst; + sv->type = dst.type; + sv->res = RES_REG; + sv->lvalue = 0; +} + +void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst) { + if (sv->delayed.arith.kind == API_DELAYED_UNOP) { + g->target->unop(g->target, sv->delayed.arith.un_op, dst, + sv->delayed.arith.a); + } else { + g->target->binop(g->target, sv->delayed.arith.bin_op, dst, + sv->delayed.arith.a, sv->delayed.arith.b); + } + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && + (sv->delayed.arith.a.v.reg != dst.v.reg || + sv->delayed.arith.a.cls != dst.cls)) { + api_release_operand_reg(g, sv->delayed.arith.a); + } + if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_REG && + (sv->delayed.arith.b.v.reg != dst.v.reg || + sv->delayed.arith.b.cls != dst.cls)) { + api_release_operand_reg(g, sv->delayed.arith.b); + } + memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); + memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); + sv->delayed.arith.a_owned = 0; + sv->delayed.arith.b_owned = 0; + sv->kind = SV_OPERAND; + sv->op = dst; + sv->type = dst.type; + sv->res = RES_REG; + sv->lvalue = 0; +} + +int api_arith_rhs_reusable(const ApiSValue* sv) { + if (sv->delayed.arith.kind == API_DELAYED_UNOP) return 0; + switch (sv->delayed.arith.bin_op) { + case BO_IADD: + case BO_IMUL: + case BO_AND: + case BO_OR: + case BO_XOR: + return 1; + default: + return 0; + } +} + +int api_materialize_cmp_victim(CfreeCg* g, u8 cls) { + if (cls != RC_INT) return 0; + for (u32 i = 0; i < g->sp; ++i) { + ApiSValue* sv = &g->stack[i]; + Operand dst; + if (sv->kind != SV_CMP || sv->pinned) continue; + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && + sv->delayed.cmp.a.cls == RC_INT) { + dst = api_op_reg(sv->delayed.cmp.a.v.reg, api_sv_type(sv)); + } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && + sv->delayed.cmp.b.cls == RC_INT) { + dst = api_op_reg(sv->delayed.cmp.b.v.reg, api_sv_type(sv)); + } else { + continue; + } + api_materialize_cmp_to(g, sv, dst); + return 1; + } + return 0; +} + +int api_materialize_arith_victim(CfreeCg* g, u8 cls) { + if (cls != RC_INT) return 0; + for (u32 i = 0; i < g->sp; ++i) { + ApiSValue* sv = &g->stack[i]; + Operand dst; + if (sv->kind != SV_ARITH || sv->pinned) continue; + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && + sv->delayed.arith.a.cls == RC_INT) { + dst = api_op_reg(sv->delayed.arith.a.v.reg, api_sv_type(sv)); + } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && + sv->delayed.arith.b.kind == OPK_REG && + sv->delayed.arith.b.cls == RC_INT) { + dst = api_op_reg(sv->delayed.arith.b.v.reg, api_sv_type(sv)); + } else { + continue; + } + api_materialize_arith_to(g, sv, dst); + return 1; + } + return 0; +} + +Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) { + CGTarget* T = g->target; + Reg r; + (void)ty; + r = api_alloc_reg(g, cls); + if (r != (Reg)REG_NONE) return r; + + ApiSValue* victim = api_pick_victim(g, cls); + if (!victim && api_materialize_cmp_victim(g, cls)) { + r = api_alloc_reg(g, cls); + if (r != (Reg)REG_NONE) return r; + victim = api_pick_victim(g, cls); + } + if (!victim && api_materialize_arith_victim(g, cls)) { + r = api_alloc_reg(g, cls); + if (r != (Reg)REG_NONE) return r; + victim = api_pick_victim(g, cls); + } + if (victim) { + FrameSlot slot = api_take_spill_slot(g, cls); + CfreeCgTypeId rty = api_owned_reg_type(g, victim); + Operand victim_reg = api_op_reg((Reg)api_reg_of_sv(victim), rty); + T->spill_reg(T, victim_reg, slot, api_mem_for_spill(g, victim)); + api_free_reg(g, victim_reg.v.reg, cls); + victim->spill_slot = slot; + victim->res = RES_SPILLED; + api_set_owned_reg(victim, (Reg)REG_NONE); + } else if (!api_spill_avs_victim(g, cls)) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - no spillable victim (class %u)", + (unsigned)cls); + } + + r = api_alloc_reg(g, cls); + if (r == (Reg)REG_NONE) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - class %u still empty after spill", + (unsigned)cls); + } + return r; +} + +void api_ensure_reg(CfreeCg* g, ApiSValue* sv) { + if (sv->kind == SV_CMP) { + CfreeCgTypeId ty = api_sv_type(sv); + Operand dst; + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && + sv->delayed.cmp.a.cls == RC_INT) { + dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty); + } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && + sv->delayed.cmp.b.cls == RC_INT) { + dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty); + } else { + Reg r = api_alloc_reg_or_spill( + g, RC_INT, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + dst = api_op_reg(r, ty); + } + api_materialize_cmp_to(g, sv, dst); + return; + } + if (sv->kind == SV_ARITH) { + CfreeCgTypeId ty = api_sv_type(sv); + Operand dst; + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && + sv->delayed.arith.a.cls == RC_INT) { + dst = api_op_reg(sv->delayed.arith.a.v.reg, ty); + } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && + sv->delayed.arith.b.kind == OPK_REG && + sv->delayed.arith.b.cls == RC_INT) { + dst = api_op_reg(sv->delayed.arith.b.v.reg, ty); + } else { + Reg r = api_alloc_reg_or_spill( + g, RC_INT, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + dst = api_op_reg(r, ty); + } + api_materialize_arith_to(g, sv, dst); + return; + } + if (sv->res != RES_SPILLED) return; + CGTarget* T = g->target; + u8 cls = api_class_of_sv(sv); + CfreeCgTypeId ty = api_owned_reg_type(g, sv); + Reg r = api_alloc_reg_or_spill(g, cls, + ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + T->reload_reg(T, api_op_reg(r, ty), sv->spill_slot, api_mem_for_spill(g, sv)); + api_return_spill_slot(g, sv->spill_slot, cls); + sv->spill_slot = FRAME_SLOT_NONE; + if (sv->op.kind == OPK_INDIRECT) { + sv->op.v.ind.base = r; + } else { + sv->op = api_op_reg(r, api_sv_type(sv)); + } + sv->res = RES_REG; +} + +Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { + CGTarget* T = g->target; + ty = api_unalias_type(g->c, ty); + api_ensure_reg(g, v); + if (v->op.kind == OPK_REG) { + if (ty) { + v->op.type = ty; + v->type = ty; + } + return v->op; + } + Reg r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); + Operand dst = api_op_reg(r, ty); + if (v->op.kind == OPK_IMM) { + T->load_imm(T, dst, v->op.v.imm); + } else if (api_is_lvalue_sv(v)) { + T->load(T, dst, v->op, api_mem_for_lvalue(g, &v->op, ty)); + if (v->op.kind == OPK_INDIRECT) { + api_free_reg(g, v->op.v.ind.base, RC_INT); + } + } else if (v->op.kind == OPK_GLOBAL) { + T->addr_of(T, dst, v->op); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: cannot force operand to register"); + } + v->op = dst; + v->res = RES_REG; + return dst; +} + +Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { + if (api_sv_op_is(v, OPK_IMM)) return v->op; + return api_force_reg(g, v, ty); +} + +void api_release(CfreeCg* g, ApiSValue* sv) { + if (sv->kind == SV_CMP) { + api_release_cmp(g, sv); + } else if (sv->kind == SV_ARITH) { + api_release_arith(g, sv); + } else if (sv->res == RES_REG) { + api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); + } else if (sv->res == RES_SPILLED) { + api_return_spill_slot(g, sv->spill_slot, api_class_of_sv(sv)); + sv->spill_slot = FRAME_SLOT_NONE; + } + sv->res = RES_INHERENT; +} + +void api_release_arg_storage(CfreeCg* g, Operand* storage) { + if (storage->kind == OPK_REG) { + api_free_reg(g, storage->v.reg, storage->cls); + } else if (storage->kind == OPK_LOCAL && storage->cls < 3) { + CfreeCgTypeId ty = storage->type; + if (api_arg_storage_must_be_addr(g->c, ty)) return; + api_return_spill_slot(g, storage->v.frame_slot, storage->cls); + } else if (storage->kind == OPK_INDIRECT) { + api_free_reg(g, storage->v.ind.base, RC_INT); + } +} + +/* ---- BinOp / UnOp / CmpOp mapping ---- */ + +BinOp api_map_int_binop(CfreeCgIntBinOp op) { + switch (op) { + case CFREE_CG_INT_ADD: + return BO_IADD; + case CFREE_CG_INT_SUB: + return BO_ISUB; + case CFREE_CG_INT_MUL: + return BO_IMUL; + case CFREE_CG_INT_SDIV: + return BO_SDIV; + case CFREE_CG_INT_UDIV: + return BO_UDIV; + case CFREE_CG_INT_SREM: + return BO_SREM; + case CFREE_CG_INT_UREM: + return BO_UREM; + case CFREE_CG_INT_AND: + return BO_AND; + case CFREE_CG_INT_OR: + return BO_OR; + case CFREE_CG_INT_XOR: + return BO_XOR; + case CFREE_CG_INT_SHL: + return BO_SHL; + case CFREE_CG_INT_LSHR: + return BO_SHR_U; + case CFREE_CG_INT_ASHR: + return BO_SHR_S; + } + return BO_IADD; +} + +BinOp api_map_fp_binop(CfreeCgFpBinOp op) { + switch (op) { + case CFREE_CG_FP_ADD: + return BO_FADD; + case CFREE_CG_FP_SUB: + return BO_FSUB; + case CFREE_CG_FP_MUL: + return BO_FMUL; + case CFREE_CG_FP_DIV: + return BO_FDIV; + case CFREE_CG_FP_REM: + return BO_FDIV; + } + return BO_FADD; +} + +UnOp api_map_int_unop(CfreeCgIntUnOp op) { + switch (op) { + case CFREE_CG_INT_NEG: + return UO_NEG; + case CFREE_CG_INT_NOT: + return UO_NOT; + case CFREE_CG_INT_BNOT: + return UO_BNOT; + } + return UO_NEG; +} + +CmpOp api_map_int_cmp(CfreeCgIntCmpOp op) { + switch (op) { + case CFREE_CG_INT_EQ: + return CMP_EQ; + case CFREE_CG_INT_NE: + return CMP_NE; + case CFREE_CG_INT_LT_S: + return CMP_LT_S; + case CFREE_CG_INT_LE_S: + return CMP_LE_S; + case CFREE_CG_INT_GT_S: + return CMP_GT_S; + case CFREE_CG_INT_GE_S: + return CMP_GE_S; + case CFREE_CG_INT_LT_U: + return CMP_LT_U; + case CFREE_CG_INT_LE_U: + return CMP_LE_U; + case CFREE_CG_INT_GT_U: + return CMP_GT_U; + case CFREE_CG_INT_GE_U: + return CMP_GE_U; + } + return CMP_EQ; +} + +CmpOp api_map_fp_cmp(CfreeCgFpCmpOp op) { + switch (op) { + case CFREE_CG_FP_OEQ: + case CFREE_CG_FP_UEQ: + return CMP_EQ; + case CFREE_CG_FP_ONE: + case CFREE_CG_FP_UNE: + return CMP_NE; + case CFREE_CG_FP_OLT: + case CFREE_CG_FP_ULT: + return CMP_LT_F; + case CFREE_CG_FP_OLE: + case CFREE_CG_FP_ULE: + return CMP_LE_F; + case CFREE_CG_FP_OGT: + case CFREE_CG_FP_UGT: + return CMP_GT_F; + case CFREE_CG_FP_OGE: + case CFREE_CG_FP_UGE: + return CMP_GE_F; + } + return CMP_EQ; +} + +CmpOp api_invert_cmp(CmpOp op) { + switch (op) { + case CMP_EQ: + return CMP_NE; + case CMP_NE: + return CMP_EQ; + case CMP_LT_S: + return CMP_GE_S; + case CMP_LE_S: + return CMP_GT_S; + case CMP_GT_S: + return CMP_LE_S; + case CMP_GE_S: + return CMP_LT_S; + case CMP_LT_U: + return CMP_GE_U; + case CMP_LE_U: + return CMP_GT_U; + case CMP_GT_U: + return CMP_LE_U; + case CMP_GE_U: + return CMP_LT_U; + case CMP_LT_F: + return CMP_GE_F; + case CMP_LE_F: + return CMP_GT_F; + case CMP_GT_F: + return CMP_LE_F; + case CMP_GE_F: + return CMP_LT_F; + } + return CMP_EQ; +} + +AtomicOp api_map_atomic_op(CfreeCgAtomicOp op) { + switch (op) { + case CFREE_CG_ATOMIC_XCHG: + return AO_XCHG; + case CFREE_CG_ATOMIC_ADD: + return AO_ADD; + case CFREE_CG_ATOMIC_SUB: + return AO_SUB; + case CFREE_CG_ATOMIC_AND: + return AO_AND; + case CFREE_CG_ATOMIC_OR: + return AO_OR; + case CFREE_CG_ATOMIC_XOR: + return AO_XOR; + case CFREE_CG_ATOMIC_NAND: + return AO_NAND; + } + return AO_XCHG; +} + +MemOrder api_map_mem_order(CfreeCgMemOrder order) { + switch (order) { + case CFREE_CG_MO_RELAXED: + return MO_RELAXED; + case CFREE_CG_MO_CONSUME: + return MO_CONSUME; + case CFREE_CG_MO_ACQUIRE: + return MO_ACQUIRE; + case CFREE_CG_MO_RELEASE: + return MO_RELEASE; + case CFREE_CG_MO_ACQ_REL: + return MO_ACQ_REL; + case CFREE_CG_MO_SEQ_CST: + return MO_SEQ_CST; + } + return MO_RELAXED; +} + +AsmDir api_map_asm_dir(uint8_t dir) { + switch ((CfreeCgAsmDir)dir) { + case CFREE_CG_ASM_IN: + return ASM_IN; + case CFREE_CG_ASM_OUT: + return ASM_OUT; + case CFREE_CG_ASM_INOUT: + return ASM_INOUT; + } + return ASM_IN; +} + +/* ---- immediate integer folding ---- */ + +u32 api_int_like_width(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_ALIAS) + return api_int_like_width(c, ty->alias.base); + if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) + return ty->integer.width; + if (ty->kind == CFREE_CG_TYPE_ENUM) return (u32)(ty->size * 8u); + if (ty->kind == CFREE_CG_TYPE_PTR) return (u32)(ty->size * 8u); + return 0; +} + +int api_type_is_bool(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_ALIAS) + return api_type_is_bool(c, ty->alias.base); + return ty->kind == CFREE_CG_TYPE_BOOL; +} + +u64 api_width_mask(u32 width) { + if (width >= 64) return UINT64_MAX; + return (1ull << width) - 1ull; +} + +u64 api_mask_width(u64 v, u32 width) { return v & api_width_mask(width); } + +i64 api_sign_extend_width(u64 v, u32 width) { + v = api_mask_width(v, width); + if (width >= 64) return (i64)v; + u64 sign = 1ull << (width - 1u); + return (i64)((v ^ sign) - sign); +} + +int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { + u32 width = api_int_like_width(c, ty); + if (!width || width > 64) return 0; + *width_out = width; + return 1; +} + +int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { + if (!cg_type_is_int(c, ty)) return 0; + return api_foldable_int_like_type(c, ty, width_out); +} + +i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width) { + v = api_mask_width(v, width); + if (api_type_is_bool(c, ty)) v = v != 0; + return (i64)v; +} + +int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub, r; + if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + ub = api_mask_width((u64)b, width); + r = 0; + switch (op) { + case BO_IADD: + r = ua + ub; + break; + case BO_ISUB: + r = ua - ub; + break; + case BO_IMUL: + r = ua * ub; + break; + case BO_AND: + r = ua & ub; + break; + case BO_OR: + r = ua | ub; + break; + case BO_XOR: + r = ua ^ ub; + break; + case BO_SHL: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + r = ua << sh; + break; + } + case BO_SHR_U: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + r = ua >> sh; + break; + } + case BO_SHR_S: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + if (!sh) { + r = ua; + } else { + u64 sign = 1ull << (width - 1u); + r = ua >> sh; + if (ua & sign) r |= api_width_mask(width) << (width - sh); + } + break; + } + default: + return 0; + } + *out = api_fold_result(g->c, ty, r, width); + return 1; +} + +int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, + i64* out) { + u32 width; + u64 ua, r; + if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + switch (op) { + case UO_NEG: + r = 0u - ua; + break; + case UO_NOT: + r = ua == 0; + break; + case UO_BNOT: + r = ~ua; + break; + default: + return 0; + } + *out = api_fold_result(g->c, ty, r, width); + return 1; +} + +int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub; + i64 sa, sb; + int r; + if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + ub = api_mask_width((u64)b, width); + sa = api_sign_extend_width(ua, width); + sb = api_sign_extend_width(ub, width); + switch (op) { + case CMP_EQ: + r = ua == ub; + break; + case CMP_NE: + r = ua != ub; + break; + case CMP_LT_S: + r = sa < sb; + break; + case CMP_LE_S: + r = sa <= sb; + break; + case CMP_GT_S: + r = sa > sb; + break; + case CMP_GE_S: + r = sa >= sb; + break; + case CMP_LT_U: + r = ua < ub; + break; + case CMP_LE_U: + r = ua <= ub; + break; + case CMP_GT_U: + r = ua > ub; + break; + case CMP_GE_U: + r = ua >= ub; + break; + default: + return 0; + } + *out = r ? 1 : 0; + return 1; +} + +int api_source_flags_addr_taken(u32 flags); +ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local); + +void api_local_const_clear(ApiSourceLocal* rec) { + if (!rec) return; + rec->const_valid = 0; + rec->const_value = 0; +} + +void api_local_const_clear_all(CfreeCg* g) { + if (!g) return; + for (u32 i = 0; i < g->nlocals; ++i) api_local_const_clear(&g->locals[i]); +} + +void api_local_const_memory_boundary(CfreeCg* g) { + api_local_const_clear_all(g); +} + +void api_local_const_control_boundary(CfreeCg* g) { + api_local_const_clear_all(g); +} + +void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local) { + api_local_const_clear_all(g); + api_local_const_clear(api_local_from_handle(g, local)); +} + +Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty) { + CGTarget* T; + ApiSourceLocal* rec; + Reg r; + Operand dst; + api_local_const_address_taken(g, v->source_local); + api_ensure_reg(g, v); + if (!api_is_lvalue_sv(v)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: addr operand is not an lvalue"); + } + T = g->target; + r = api_alloc_reg_or_spill(g, RC_INT, pty); + dst = api_op_reg(r, pty); + rec = v->source_local != CFREE_CG_LOCAL_NONE + ? api_local_from_handle(g, v->source_local) + : NULL; + if (rec && rec->storage.kind == CG_LOCAL_STORAGE_REG && T->local_addr) + T->local_addr(T, dst, &rec->desc, rec->storage); + else + T->addr_of(T, dst, v->op); + return dst; +} + +int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, + CfreeCgMemAccess access) { + u32 width; + CfreeCgTypeId ty; + u64 access_size; + u64 local_size; + if (!g || !rec) return 0; + if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0; + if (api_source_flags_addr_taken(rec->attrs.flags)) return 0; + if (access.flags & CFREE_CG_MEM_VOLATILE) return 0; + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (ty != rec->type) return 0; + access_size = abi_cg_sizeof(g->c->abi, ty); + local_size = abi_cg_sizeof(g->c->abi, rec->type); + if (access_size != local_size) return 0; + return api_foldable_int_like_type(g->c, ty, &width); +} + +void api_local_const_store(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, i64 value) { + ApiSourceLocal* rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!api_local_const_can_track(g, rec, access)) { + api_local_const_clear(rec); + return; + } + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) { + api_local_const_clear(rec); + return; + } + rec->const_value = api_fold_result(g->c, ty, (u64)value, width); + rec->const_valid = 1; +} + +int api_local_const_load(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, Operand* out) { + ApiSourceLocal* rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!out || !api_local_const_can_track(g, rec, access)) return 0; + if (!rec->const_valid) return 0; + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) return 0; + *out = + api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); + return 1; +} + +int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags) { + u32 width; + return g && !flags && api_foldable_int_type(g->c, ty, &width); +} + +int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) { + u32 width; + u64 v; + if (!api_foldable_int_type(g->c, ty, &width)) return 0; + v = api_mask_width((u64)imm, width); + switch (op) { + case BO_IADD: + case BO_ISUB: + case BO_OR: + case BO_XOR: + return v == 0; + case BO_AND: + return v == api_width_mask(width); + default: + return 0; + } +} + +int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, + ApiSValue* out) { + if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && + a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { + *out = api_make_sv_with_reg_ownership(a->op, ty, + api_sv_owns_operand_reg(a, &a->op)); + a->res = RES_INHERENT; + return 1; + } + if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && + b->op.kind != OPK_IMM && + (op == BO_IADD || op == BO_OR || op == BO_XOR || op == BO_AND) && + api_op_is_int_identity(g, op, ty, a->op.v.imm)) { + *out = api_make_sv_with_reg_ownership(b->op, ty, + api_sv_owns_operand_reg(b, &b->op)); + b->res = RES_INHERENT; + return 1; + } + return 0; +} + +int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out) { + i64 folded; + BinOp result_op; + if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || + a->delayed.arith.a.kind != OPK_REG || + a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND || + b->op.kind != OPK_IMM) { + return 0; + } + result_op = a->delayed.arith.bin_op; + switch (a->delayed.arith.bin_op) { + case BO_IADD: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else { + return 0; + } + break; + case BO_ISUB: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, + a->delayed.arith.b.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_ISUB; + } else { + return 0; + } + break; + case BO_XOR: + if (op != BO_XOR || + !api_try_fold_int_binop(g, BO_XOR, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_XOR; + break; + case BO_AND: + if (op != BO_AND || + !api_try_fold_int_binop(g, BO_AND, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_AND; + break; + case BO_OR: + if (op != BO_OR || + !api_try_fold_int_binop(g, BO_OR, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_OR; + break; + default: + return 0; + } + if (api_op_is_int_identity(g, result_op, ty, folded)) { + *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, + a->delayed.arith.a_owned); + a->delayed.arith.a_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + return 1; + } + a->delayed.arith.bin_op = result_op; + a->delayed.arith.b.v.imm = folded; + *out = *a; + a->delayed.arith.a_owned = 0; + a->delayed.arith.b_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b); + return 1; +} + +int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, + ApiSValue* out) { + if (op != UO_BNOT || a->kind != SV_ARITH || + a->delayed.arith.kind != API_DELAYED_UNOP || + a->delayed.arith.un_op != UO_BNOT || a->delayed.arith.a.kind != OPK_REG) { + return 0; + } + *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, + a->delayed.arith.a_owned); + a->delayed.arith.a_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + return 1; +} + +/* ---- C-symbol mangling ---- */ diff --git a/src/cg/wide.c b/src/cg/wide.c @@ -0,0 +1,192 @@ +#include "cg/internal.h" + +FrameSlot api_f128_temp_slot(CfreeCg* g, CfreeCgTypeId ty) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty; + fsd.size = 16; + fsd.align = 16; + fsd.kind = FS_LOCAL; + fsd.flags = FSF_ADDR_TAKEN; + return g->target->frame_slot(g->target, &fsd); +} + +u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes) { + u64 v = 0; + for (u32 i = 0; i < 8; ++i) { + u32 shift = g->c->target.big_endian ? (7u - i) * 8u : i * 8u; + v |= (u64)bytes[i] << shift; + } + return v; +} + +void api_store_f128_bytes(CfreeCg* g, FrameSlot slot, CfreeCgTypeId ty, + const u8 bytes[16]) { + CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Reg ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + Operand base = api_op_reg(ar, ptr_ty); + MemAccess ma; + memset(&ma, 0, sizeof ma); + ma.type = i64_ty; + ma.size = 8; + ma.align = 8; + g->target->addr_of(g->target, base, api_op_local(slot, ty)); + g->target->store(g->target, api_op_indirect(ar, 0, i64_ty), + api_op_imm((i64)api_u64_from_target_bytes(g, bytes), i64_ty), + ma); + g->target->store( + g->target, api_op_indirect(ar, 8, i64_ty), + api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), i64_ty), ma); + api_free_reg(g, ar, RC_INT); +} + +void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]) { + union { + double d; + u64 u; + } in; + u64 lo = 0; + u64 hi = 0; + u64 frac; + u32 sign; + u32 exp; + in.d = value; + sign = (u32)(in.u >> 63); + exp = (u32)((in.u >> 52) & 0x7ffu); + frac = in.u & 0x000fffffffffffffull; + if (sign) hi |= 1ull << 63; + if (exp == 0x7ffu) { + hi |= (u64)0x7fffu << 48; + if (frac) { + lo |= (frac & 0xfu) << 60; + hi |= frac >> 4; + hi |= 1ull << 47; + } + } else if (exp != 0 || frac != 0) { + i32 e; + u64 sig; + if (exp == 0) { + e = -1022; + sig = frac; + while ((sig & (1ull << 52)) == 0) { + sig <<= 1; + --e; + } + frac = sig & 0x000fffffffffffffull; + } else { + e = (i32)exp - 1023; + } + hi |= (u64)(u32)(e + 16383) << 48; + lo |= (frac & 0xfu) << 60; + hi |= frac >> 4; + } + for (u32 i = 0; i < 16; ++i) { + if (g->c->target.big_endian) { + u64 lane = i < 8u ? hi : lo; + u32 shift = (7u - (i & 7u)) * 8u; + out[i] = (u8)(lane >> shift); + } else { + u64 lane = i < 8u ? lo : hi; + u32 shift = (i & 7u) * 8u; + out[i] = (u8)(lane >> shift); + } + } +} + +ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty) { + u8 bytes[16]; + FrameSlot slot; + api_encode_binary128_from_double(g, value, bytes); + slot = api_f128_temp_slot(g, ty); + api_store_f128_bytes(g, slot, ty, bytes); + return api_make_lv(api_op_local(slot, ty), ty); +} + +ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, + CfreeCgTypeId ty) { + if (v->op.kind == OPK_LOCAL || v->op.kind == OPK_INDIRECT) { + v->type = ty; + v->op.type = ty; + v->lvalue = 1; + return *v; + } + if (v->op.kind == OPK_GLOBAL) { + FrameSlot slot = api_f128_temp_slot(g, ty); + Operand dst_lv = api_op_local(slot, ty); + Operand dst_addr; + Operand src_addr; + AggregateAccess agg; + ApiSValue tmp = api_make_lv(dst_lv, ty); + ApiSValue src = api_make_lv(v->op, ty); + dst_addr = api_lvalue_addr(g, &tmp, cg_type_ptr_to(g->c, ty)); + src_addr = api_lvalue_addr(g, &src, cg_type_ptr_to(g->c, ty)); + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = 16; + g->target->copy_bytes(g->target, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + return api_make_lv(dst_lv, ty); + } + if (v->op.kind == OPK_REG) { + FrameSlot slot = api_f128_temp_slot(g, ty); + Operand dst = api_op_local(slot, ty); + g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty)); + return api_make_lv(dst, ty); + } + if (v->op.kind == OPK_IMM) { + u8 bytes[16]; + u64 lo = (u64)v->op.v.imm; + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 idx = g->c->target.big_endian ? 15u - i : i; + bytes[idx] = (u8)(lo >> (i * 8u)); + } + FrameSlot slot = api_f128_temp_slot(g, ty); + api_store_f128_bytes(g, slot, ty, bytes); + return api_make_lv(api_op_local(slot, ty), ty); + } + compiler_panic( + g->c, g->cur_loc, + "CfreeCg: 16-byte scalar value is not addressable (kind %u, op %u)", + (unsigned)v->kind, (unsigned)v->op.kind); + return *v; +} + +CfreeCgSym api_runtime_helper(CfreeCg* g, const char* name, CfreeCgTypeId ret, + const CfreeCgTypeId* params, u32 nparams) { + CfreeCgFuncParam ps[3]; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + if (nparams > 3) return CFREE_CG_SYM_NONE; + memset(ps, 0, sizeof ps); + for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i]; + memset(&sig, 0, sizeof sig); + sig.ret = ret; + sig.params = ps; + sig.nparams = nparams; + sig.call_conv = CFREE_CG_CC_TARGET_C; + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = pool_intern_cstr(g->c->global, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func((CfreeCompiler*)g->c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + return cfree_cg_decl(g, decl); +} + +void api_runtime_call_values(CfreeCg* g, const char* name, CfreeCgTypeId ret, + const CfreeCgTypeId* params, u32 nparams, + ApiSValue* args) { + CfreeCgCallAttrs attrs; + CfreeCgSym sym = api_runtime_helper(g, name, ret, params, nparams); + memset(&attrs, 0, sizeof attrs); + for (u32 i = 0; i < nparams; ++i) api_push(g, args[i]); + api_call_symbol_common(g, sym, nparams, attrs); +} + +/* ============================================================ + * Locals and params + * ============================================================ */ diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -14,15 +14,13 @@ * they're meaningless for opt's vreg space — calling them is a * wiring bug, so we panic loudly. */ -#include "opt/opt.h" - #include <string.h> -#include "arch/regalloc.h" #include "core/arena.h" #include "core/core.h" #include "core/metrics.h" #include "opt/ir.h" +#include "opt/opt_internal.h" /* ---- wrapper state ---- */ @@ -381,15 +379,13 @@ static void opt_frame_home_addr_taken_locals(Func* f) { IRLocal* used = &f->locals[j]; if (!used->address_taken || used->home_slot == FRAME_SLOT_NONE) continue; - if (used->storage.kind != CG_LOCAL_STORAGE_REG) - continue; + if (used->storage.kind != CG_LOCAL_STORAGE_REG) continue; if (inst_uses_local_reg(&in, used->storage.v.reg)) opt_make_local_load(f, &out[nout++], used, in.loc); } out[nout++] = in; IRLocal* defined = opt_addr_taken_reg_local_defined_by(f, &in); - if (defined) - opt_make_local_store(f, &out[nout++], defined, in.loc); + if (defined) opt_make_local_store(f, &out[nout++], defined, in.loc); } bl->insts = out; bl->ninsts = nout; @@ -496,8 +492,7 @@ static int w_is_caller_saved(CGTarget* t, RegClass cls, Reg r) { return 0; } -static u32 w_call_clobber_mask(CGTarget* t, const CGCallDesc* d, - RegClass cls) { +static u32 w_call_clobber_mask(CGTarget* t, const CGCallDesc* d, RegClass cls) { CGTarget* wr = impl_of(t)->target; if (wr->call_clobber_mask) return wr->call_clobber_mask(wr, d, cls); return 0; @@ -918,8 +913,7 @@ static void w_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (wr->emit_call_plan) wr->emit_call_plan(wr, p); } -static void w_load_call_arg(CGTarget* t, Operand dst, - const CGCallPlanMove* m) { +static void w_load_call_arg(CGTarget* t, Operand dst, const CGCallPlanMove* m) { CGTarget* wr = impl_of(t)->target; if (wr->load_call_arg) wr->load_call_arg(wr, dst, m); } @@ -1173,1051 +1167,6 @@ static void w_set_loc(CGTarget* t, SrcLoc loc) { o->pending_loc = loc; } -/* ============================================================ - * Replay: walk the recorded Func and emit to the wrapped target. - * ============================================================ */ - -typedef struct ReplayCtx { - Compiler* c; - Func* f; - CGTarget* tgt; - Reg* val_to_reg; - FrameSlot* slot_map; - Label* label_map; - CGScope* scope_map; - u8* val_alloced; - u8* block_label_placed; - u8 identity_regs; - CGSimpleRegAlloc regalloc; -} ReplayCtx; - -static Reg val_to_target_reg(ReplayCtx* r, Val v) { - Func* f = r->f; - if (v == VAL_NONE) return REG_NONE; - if (r->identity_regs) return (Reg)v; - if (v >= f->nvals) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(r->c, loc, "opt replay: Val %u out of range", v); - } - if (!r->val_alloced[v]) { - r->val_to_reg[v] = - cg_simple_regalloc_alloc(&r->regalloc, (RegClass)f->val_cls[v]); - if (r->val_to_reg[v] == (Reg)REG_NONE) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(r->c, loc, "opt replay: hard reg pool exhausted"); - } - r->val_alloced[v] = 1; - } - return r->val_to_reg[v]; -} - -static FrameSlot slot_to_target(ReplayCtx* r, FrameSlot vs) { - if (vs == FRAME_SLOT_NONE) return FRAME_SLOT_NONE; - if (vs >= r->f->nframe_slots + 1u) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(r->c, loc, "opt replay: vslot %u out of range", - (unsigned)vs); - } - return r->slot_map[vs]; -} - -static CGLocalStorage xlat_storage(ReplayCtx* r, CGLocalStorage st, - CfreeCgTypeId ty) { - (void)ty; - if (st.kind == CG_LOCAL_STORAGE_REG) { - Val v = (Val)st.v.reg; - if (r->identity_regs && r->f->opt_rewritten && v < r->f->nvals && - r->f->val_info) { - OptValInfo* vi = &r->f->val_info[v]; - if (vi->alloc_kind == OPT_ALLOC_HARD) { - st.v.reg = vi->hard_reg; - } else if (vi->alloc_kind == OPT_ALLOC_SPILL) { - st.kind = CG_LOCAL_STORAGE_FRAME; - st.v.frame_slot = slot_to_target(r, vi->spill_slot); - } else { - st.v.reg = val_to_target_reg(r, v); - } - } else { - st.v.reg = val_to_target_reg(r, v); - } - } else { - st.v.frame_slot = slot_to_target(r, st.v.frame_slot); - } - return st; -} - -static int replay_reg_storage_unused(ReplayCtx* r, CGLocalStorage st) { - if (!r || st.kind != CG_LOCAL_STORAGE_REG) return 0; - if (!(r->identity_regs && r->f->opt_rewritten && r->f->val_info)) return 0; - Val v = (Val)st.v.reg; - if (v == VAL_NONE || v >= r->f->nvals) return 0; - return r->f->val_info[v].alloc_kind == OPT_ALLOC_NONE || - r->f->val_info[v].use_freq == 0; -} - -static Operand xlat_op(ReplayCtx* r, Operand op) { - switch ((OpKind)op.kind) { - case OPK_IMM: - case OPK_GLOBAL: - return op; - case OPK_REG: - if (r->identity_regs && r->f->opt_rewritten) return op; - op.v.reg = val_to_target_reg(r, (Val)op.v.reg); - return op; - case OPK_LOCAL: - op.v.frame_slot = slot_to_target(r, op.v.frame_slot); - return op; - case OPK_INDIRECT: - if (!(r->identity_regs && r->f->opt_rewritten)) - op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base); - return op; - } - return op; -} - -static CGABIValue xlat_abivalue(ReplayCtx* r, const CGABIValue* in, - CGABIPart* parts_out) { - CGABIValue out = *in; - out.storage = xlat_op(r, in->storage); - if (in->nparts && parts_out) { - for (u32 i = 0; i < in->nparts; ++i) { - parts_out[i] = in->parts[i]; - parts_out[i].op = xlat_op(r, in->parts[i].op); - } - out.parts = parts_out; - } else { - out.parts = NULL; - } - return out; -} - -typedef struct ReplayParallelMove { - Operand dst; - Operand src; - MemAccess mem; - const CGCallPlanRet* ret; - u32 src_offset; - u32 dst_offset; - u32 stack_offset; - u8 dst_kind; - u8 src_kind; - u8 is_ret; - u8 done; -} ReplayParallelMove; - -static Operand phys_reg_operand(Reg r, RegClass cls, CfreeCgTypeId ty) { - Operand op; - memset(&op, 0, sizeof op); - op.kind = OPK_REG; - op.cls = (u8)cls; - op.type = ty; - op.v.reg = r; - return op; -} - -static int operand_reg_eq(const Operand* a, const Operand* b) { - return a && b && a->kind == OPK_REG && b->kind == OPK_REG && - a->cls == b->cls && a->v.reg == b->v.reg; -} - -static int operand_uses_reg_for_replay(const Operand* op, const Operand* r) { - if (!op || !r || r->kind != OPK_REG) return 0; - if (op->kind == OPK_REG) return operand_reg_eq(op, r); - if (op->kind == OPK_INDIRECT) - return r->cls == RC_INT && op->v.ind.base == r->v.reg; - return 0; -} - -static int replay_move_src_ready(const ReplayParallelMove* moves, u32 n, - u32 idx) { - const Operand* dst = &moves[idx].dst; - for (u32 i = 0; i < n; ++i) { - if (i == idx || moves[i].done) continue; - if (operand_uses_reg_for_replay(&moves[i].src, dst)) return 0; - } - return 1; -} - -static int replay_find_move_dst(const ReplayParallelMove* moves, u32 n, - const Operand* dst) { - for (u32 i = 0; i < n; ++i) { - if (!moves[i].done && operand_reg_eq(&moves[i].dst, dst)) return (int)i; - } - return -1; -} - -static Reg replay_scratch_reg(ReplayCtx* r, RegClass cls, Reg avoid) { - if ((u32)cls >= OPT_REG_CLASSES) return REG_NONE; - for (u32 i = 0; i < r->f->opt_scratch_reg_count[cls]; ++i) { - Reg sr = r->f->opt_scratch_regs[cls][i]; - if (sr != avoid) return sr; - } - return REG_NONE; -} - -static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) { - Operand dst = move->dst; - Operand src = move->src; - MemAccess mem = move->mem; - if (move->dst_kind == CG_CALL_PLAN_STACK || - move->dst_kind == CG_CALL_PLAN_TAIL_STACK) { - CGCallPlanMove m; - memset(&m, 0, sizeof m); - m.src = src; - m.src_kind = move->src_kind; - m.dst_kind = move->dst_kind; - m.cls = dst.cls; - m.src_offset = move->src_offset; - m.stack_offset = move->stack_offset; - m.mem = mem; - w->store_call_arg(w, &m); - } else if (dst.kind == OPK_REG) { - if (move->src_kind == CG_CALL_PLAN_SRC_ADDR || move->src_offset) { - CGCallPlanMove m; - memset(&m, 0, sizeof m); - m.src = src; - m.src_kind = move->src_kind; - m.dst_kind = CG_CALL_PLAN_REG; - m.cls = dst.cls; - m.dst_reg = dst.v.reg; - m.src_offset = move->src_offset; - m.mem = mem; - w->load_call_arg(w, dst, &m); - return; - } - if (src.kind == OPK_REG) { - if (!operand_reg_eq(&dst, &src)) w->copy(w, dst, src); - } else if (src.kind == OPK_IMM) { - w->load_imm(w, dst, src.v.imm); - } else if (src.kind == OPK_LOCAL || src.kind == OPK_INDIRECT) { - w->load(w, dst, src, mem); - } else if (src.kind == OPK_GLOBAL) { - w->addr_of(w, dst, src); - } - } else if (dst.kind == OPK_LOCAL || dst.kind == OPK_INDIRECT) { - if (move->is_ret && move->dst_offset) { - CGCallPlanRet ret = move->ret ? *move->ret : (CGCallPlanRet){0}; - ret.dst = dst; - ret.dst_offset = move->dst_offset; - ret.mem = mem; - w->store_call_ret(w, &ret, src); - return; - } - w->store(w, dst, src, mem); - } -} - -static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, - u32 n) { - CGTarget* w = r->tgt; - u32 remaining = 0; - for (u32 i = 0; i < n; ++i) { - if (operand_reg_eq(&moves[i].dst, &moves[i].src)) { - moves[i].done = 1; - } else { - ++remaining; - } - } - - while (remaining) { - int progressed = 0; - for (u32 i = 0; i < n; ++i) { - if (moves[i].done || !replay_move_src_ready(moves, n, i)) continue; - replay_emit_move(w, &moves[i]); - moves[i].done = 1; - --remaining; - progressed = 1; - } - if (progressed) continue; - - for (u32 i = 0; i < n; ++i) { - if (moves[i].done || moves[i].src.kind == OPK_REG) continue; - Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, REG_NONE); - if (sr == (Reg)REG_NONE) continue; - Operand tmp = phys_reg_operand(sr, (RegClass)moves[i].dst.cls, - moves[i].dst.type); - ReplayParallelMove tmp_move = moves[i]; - tmp_move.dst = tmp; - tmp_move.dst_kind = CG_CALL_PLAN_REG; - replay_emit_move(w, &tmp_move); - moves[i].src = tmp; - moves[i].src_kind = CG_CALL_PLAN_SRC_VALUE; - moves[i].src_offset = 0; - progressed = 1; - break; - } - if (progressed) continue; - - u32 first = 0; - while (first < n && moves[first].done) ++first; - if (first == n) break; - Operand save = moves[first].src; - Reg sr = replay_scratch_reg(r, (RegClass)save.cls, REG_NONE); - if (sr == (Reg)REG_NONE) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(r->c, loc, - "opt replay: no scratch register for parallel call move"); - } - Operand tmp = phys_reg_operand(sr, (RegClass)save.cls, save.type); - w->copy(w, tmp, save); - - Operand hole = save; - for (;;) { - int idx = replay_find_move_dst(moves, n, &hole); - if (idx < 0 || (u32)idx == first) break; - replay_emit_move(w, &moves[idx]); - hole = moves[idx].src; - moves[idx].done = 1; - --remaining; - } - moves[first].src = tmp; - moves[first].src_kind = CG_CALL_PLAN_SRC_VALUE; - moves[first].src_offset = 0; - replay_emit_move(w, &moves[first]); - moves[first].done = 1; - --remaining; - } -} - -static int replay_plan_supported(CGTarget* w, const CGCallPlan* p) { - if (!p) return 0; - for (u32 i = 0; i < p->nargs; ++i) { - if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK || - p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && - !w->store_call_arg) - return 0; - if (p->args[i].dst_kind == CG_CALL_PLAN_REG && - (p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR || - p->args[i].src_offset) && - !w->load_call_arg) - return 0; - } - for (u32 i = 0; i < p->nrets; ++i) - if (p->rets[i].dst.kind != OPK_REG && p->rets[i].dst.kind != OPK_LOCAL && - p->rets[i].dst.kind != OPK_INDIRECT) - return 0; - for (u32 i = 0; i < p->nrets; ++i) - if (p->rets[i].dst_offset && - (p->rets[i].dst.kind == OPK_LOCAL || - p->rets[i].dst.kind == OPK_INDIRECT) && - !w->store_call_ret) - return 0; - return 1; -} - -static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { - const CGCallPlan* src_plan = &aux->plan; - CGCallPlan plan = *src_plan; - plan.callee = xlat_op(r, src_plan->callee); - plan.args = src_plan->nargs - ? arena_array(r->f->arena, CGCallPlanMove, src_plan->nargs) - : NULL; - plan.rets = src_plan->nrets - ? arena_array(r->f->arena, CGCallPlanRet, src_plan->nrets) - : NULL; - - ReplayParallelMove* arg_moves = - src_plan->nargs ? arena_zarray(r->f->arena, ReplayParallelMove, - src_plan->nargs) - : NULL; - u32 nargs = 0; - for (u32 i = 0; i < src_plan->nargs; ++i) { - plan.args[i] = src_plan->args[i]; - plan.args[i].src = xlat_op(r, src_plan->args[i].src); - if ((src_plan->flags & CG_CALL_TAIL) && - plan.args[i].dst_kind == CG_CALL_PLAN_STACK) { - plan.args[i].dst_kind = CG_CALL_PLAN_TAIL_STACK; - } - Operand dst; - if (plan.args[i].dst_kind == CG_CALL_PLAN_REG) { - dst = phys_reg_operand(plan.args[i].dst_reg, - (RegClass)plan.args[i].cls, - plan.args[i].mem.type); - } else { - memset(&dst, 0, sizeof dst); - dst.kind = OPK_LOCAL; - dst.cls = plan.args[i].cls; - dst.type = plan.args[i].mem.type; - } - arg_moves[nargs].dst = dst; - arg_moves[nargs].src = plan.args[i].src; - arg_moves[nargs].mem = plan.args[i].mem; - arg_moves[nargs].src_offset = plan.args[i].src_offset; - arg_moves[nargs].stack_offset = plan.args[i].stack_offset; - arg_moves[nargs].dst_kind = plan.args[i].dst_kind; - arg_moves[nargs].src_kind = plan.args[i].src_kind; - ++nargs; - } - - Reg callee_scratch = REG_NONE; - if (plan.callee.kind == OPK_REG) { - for (u32 i = 0; i < nargs; ++i) { - if (arg_moves[i].dst_kind != CG_CALL_PLAN_REG || - !operand_reg_eq(&arg_moves[i].dst, &plan.callee)) - continue; - callee_scratch = replay_scratch_reg(r, RC_INT, REG_NONE); - if (callee_scratch == (Reg)REG_NONE) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(r->c, loc, - "opt replay: no scratch register for indirect call"); - } - Operand tmp = phys_reg_operand(callee_scratch, RC_INT, plan.callee.type); - r->tgt->copy(r->tgt, tmp, plan.callee); - plan.callee = tmp; - break; - } - } - - replay_parallel_moves(r, arg_moves, nargs); - r->tgt->emit_call_plan(r->tgt, &plan); - - if (plan.flags & CG_CALL_TAIL) return; - - ReplayParallelMove* ret_moves = - src_plan->nrets ? arena_zarray(r->f->arena, ReplayParallelMove, - src_plan->nrets) - : NULL; - u32 nrets = 0; - for (u32 i = 0; i < src_plan->nrets; ++i) { - plan.rets[i] = src_plan->rets[i]; - plan.rets[i].dst = xlat_op(r, src_plan->rets[i].dst); - Operand src = phys_reg_operand(plan.rets[i].src_reg, - (RegClass)plan.rets[i].cls, - plan.rets[i].mem.type); - ret_moves[nrets].dst = plan.rets[i].dst; - ret_moves[nrets].src = src; - ret_moves[nrets].mem = plan.rets[i].mem; - ret_moves[nrets].ret = &plan.rets[i]; - ret_moves[nrets].dst_offset = plan.rets[i].dst_offset; - ret_moves[nrets].dst_kind = CG_CALL_PLAN_REG; - ret_moves[nrets].src_kind = CG_CALL_PLAN_SRC_VALUE; - ret_moves[nrets].is_ret = 1; - ++nrets; - } - replay_parallel_moves(r, ret_moves, nrets); -} - -static Label ensure_label(ReplayCtx* r, u32 b) { - if (b >= r->f->nblocks) return LABEL_NONE; - if (r->label_map[b] == LABEL_NONE) { - r->label_map[b] = r->tgt->label_new(r->tgt); - } - return r->label_map[b]; -} - -static void ensure_label_placed(ReplayCtx* r, u32 b) { - if (r->block_label_placed[b]) return; - r->block_label_placed[b] = 1; - if (b == r->f->entry) return; - Label l = ensure_label(r, b); - r->tgt->label_place(r->tgt, l); -} - -static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { - CGTarget* w = r->tgt; - w->set_loc(w, in->loc); - - switch ((IROp)in->op) { - case IR_NOP: - case IR_CONST_I: - case IR_CONST_BYTES: - case IR_PARAM_DECL: - case IR_PHI: - case IR_CONDBR: - break; - case IR_ASM_BLOCK: { - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - Operand* in_ops_ = NULL; - Operand* out_ops_ = NULL; - if (aux->nin) { - in_ops_ = arena_array(r->f->arena, Operand, aux->nin); - for (u32 k = 0; k < aux->nin; ++k) { - in_ops_[k] = xlat_op(r, aux->in_ops[k]); - } - } - if (aux->nout) { - out_ops_ = arena_array(r->f->arena, Operand, aux->nout); - for (u32 k = 0; k < aux->nout; ++k) { - out_ops_[k] = xlat_op(r, aux->out_ops[k]); - } - } - w->asm_block(w, aux->tmpl, aux->outs, aux->nout, out_ops_, aux->ins, - aux->nin, in_ops_, aux->clobbers, aux->nclob); - break; - } - case IR_LOAD_IMM: { - Operand dst = xlat_op(r, in->opnds[0]); - w->load_imm(w, dst, in->extra.imm); - break; - } - case IR_LOAD_CONST: { - Operand dst = xlat_op(r, in->opnds[0]); - w->load_const(w, dst, in->extra.cbytes); - break; - } - case IR_COPY: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - w->copy(w, dst, src); - break; - } - case IR_LOAD: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand addr = xlat_op(r, in->opnds[1]); - w->load(w, dst, addr, in->extra.mem); - break; - } - case IR_STORE: { - Operand addr = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - w->store(w, addr, src, in->extra.mem); - break; - } - case IR_ADDR_OF: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand lv = xlat_op(r, in->opnds[1]); - w->addr_of(w, dst, lv); - break; - } - case IR_TLS_ADDR_OF: { - Operand dst = xlat_op(r, in->opnds[0]); - IRTlsAux* aux = (IRTlsAux*)in->extra.aux; - w->tls_addr_of(w, dst, aux->sym, aux->addend); - break; - } - case IR_AGG_COPY: { - Operand a = xlat_op(r, in->opnds[0]); - Operand bo = xlat_op(r, in->opnds[1]); - IRAggAux* aux = (IRAggAux*)in->extra.aux; - w->copy_bytes(w, a, bo, aux->access); - break; - } - case IR_AGG_SET: { - Operand a = xlat_op(r, in->opnds[0]); - Operand bo = xlat_op(r, in->opnds[1]); - IRAggAux* aux = (IRAggAux*)in->extra.aux; - w->set_bytes(w, a, bo, aux->access); - break; - } - case IR_BITFIELD_LOAD: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand rec_ = xlat_op(r, in->opnds[1]); - IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; - w->bitfield_load(w, dst, rec_, aux->access); - break; - } - case IR_BITFIELD_STORE: { - Operand rec_ = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; - w->bitfield_store(w, rec_, src, aux->access); - break; - } - case IR_BINOP: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand a = xlat_op(r, in->opnds[1]); - Operand bo = xlat_op(r, in->opnds[2]); - w->binop(w, (BinOp)in->extra.imm, dst, a, bo); - break; - } - case IR_UNOP: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand a = xlat_op(r, in->opnds[1]); - w->unop(w, (UnOp)in->extra.imm, dst, a); - break; - } - case IR_CMP: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand a = xlat_op(r, in->opnds[1]); - Operand bo = xlat_op(r, in->opnds[2]); - w->cmp(w, (CmpOp)in->extra.imm, dst, a, bo); - break; - } - case IR_CONVERT: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - w->convert(w, (ConvKind)in->extra.imm, dst, src); - break; - } - case IR_CALL: { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (aux && aux->use_plan_replay && w->emit_call_plan && - replay_plan_supported(w, &aux->plan)) { - replay_planned_call(r, aux); - break; - } - compiler_panic(r->c, in->loc, - "opt replay: call has no supported call plan"); - break; - } - case IR_BR: { - Block* bl = &r->f->blocks[b]; - if (bl->nsucc < 1) break; - Label l = ensure_label(r, bl->succ[0]); - w->jump(w, l); - break; - } - case IR_CMP_BRANCH: { - Operand a = xlat_op(r, in->opnds[0]); - Operand bo = xlat_op(r, in->opnds[1]); - Block* bl = &r->f->blocks[b]; - Label taken = ensure_label(r, bl->succ[0]); - w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken); - break; - } - case IR_RET: { - IRRetAux* aux = (IRRetAux*)in->extra.aux; - if (!aux || !aux->present) { - w->ret(w, NULL); - } else { - CGABIPart* parts = aux->val.nparts ? arena_array(r->f->arena, CGABIPart, - aux->val.nparts) - : NULL; - CGABIValue v = xlat_abivalue(r, &aux->val, parts); - w->ret(w, &v); - } - break; - } - case IR_SCOPE_BEGIN: { - IRScopeAux* aux = (IRScopeAux*)in->extra.aux; - CGScopeDesc d = aux->desc; - d.cond = xlat_op(r, d.cond); - if (aux->desc.kind == SCOPE_LOOP || aux->desc.kind == SCOPE_BLOCK) { - d.break_label = aux->loop_break_block - ? ensure_label(r, aux->loop_break_block) - : LABEL_NONE; - d.continue_label = aux->loop_continue_block - ? ensure_label(r, aux->loop_continue_block) - : LABEL_NONE; - } - CGScope cs = w->scope_begin(w, &d); - r->scope_map[aux->scope_id] = cs; - break; - } - case IR_SCOPE_ELSE: - w->scope_else(w, r->scope_map[(u32)in->extra.imm]); - break; - case IR_SCOPE_END: - w->scope_end(w, r->scope_map[(u32)in->extra.imm]); - break; - case IR_BREAK_TO: - w->break_to(w, r->scope_map[(u32)in->extra.imm]); - break; - case IR_CONTINUE_TO: - w->continue_to(w, r->scope_map[(u32)in->extra.imm]); - break; - case IR_ALLOCA: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand size = xlat_op(r, in->opnds[1]); - w->alloca_(w, dst, size, (u32)in->extra.imm); - break; - } - case IR_VA_START: { - Operand ap = xlat_op(r, in->opnds[0]); - w->va_start_(w, ap); - break; - } - case IR_VA_ARG: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand ap = xlat_op(r, in->opnds[1]); - CfreeCgTypeId ty = (CfreeCgTypeId)(uintptr_t)in->extra.aux; - w->va_arg_(w, dst, ap, ty); - break; - } - case IR_VA_END: { - Operand ap = xlat_op(r, in->opnds[0]); - w->va_end_(w, ap); - break; - } - case IR_VA_COPY: { - Operand a = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - w->va_copy_(w, a, src); - break; - } - case IR_ATOMIC_LOAD: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand addr = xlat_op(r, in->opnds[1]); - IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; - w->atomic_load(w, dst, addr, aux->mem, aux->mo); - break; - } - case IR_ATOMIC_STORE: { - Operand addr = xlat_op(r, in->opnds[0]); - Operand src = xlat_op(r, in->opnds[1]); - IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; - w->atomic_store(w, addr, src, aux->mem, aux->mo); - break; - } - case IR_ATOMIC_RMW: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand addr = xlat_op(r, in->opnds[1]); - Operand val = xlat_op(r, in->opnds[2]); - IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; - w->atomic_rmw(w, (AtomicOp)aux->op, dst, addr, val, aux->mem, aux->mo); - break; - } - case IR_ATOMIC_CAS: { - Operand prior = xlat_op(r, in->opnds[0]); - Operand ok = xlat_op(r, in->opnds[1]); - Operand addr = xlat_op(r, in->opnds[2]); - Operand expected = xlat_op(r, in->opnds[3]); - Operand desired = xlat_op(r, in->opnds[4]); - IRCasAux* aux = (IRCasAux*)in->extra.aux; - w->atomic_cas(w, prior, ok, addr, expected, desired, aux->mem, - aux->success, aux->failure); - break; - } - case IR_FENCE: - w->fence(w, (MemOrder)in->extra.imm); - break; - case IR_INTRINSIC: { - IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; - Operand* dsts = - aux->ndst ? arena_array(r->f->arena, Operand, aux->ndst) : NULL; - Operand* args = - aux->narg ? arena_array(r->f->arena, Operand, aux->narg) : NULL; - for (u32 k = 0; k < aux->ndst; ++k) dsts[k] = xlat_op(r, aux->dsts[k]); - for (u32 k = 0; k < aux->narg; ++k) args[k] = xlat_op(r, aux->args[k]); - w->intrinsic(w, aux->kind, dsts, aux->ndst, args, aux->narg); - break; - } - } -} - -static void replay_block(ReplayCtx* r, u32 b) { - Func* f = r->f; - if (b >= f->nblocks) return; - ensure_label_placed(r, b); - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - replay_inst(r, b, &bl->insts[i]); - } -} - -static void add_unique_reg(Reg* used, u32* nused, u32 cap, Reg r) { - for (u32 i = 0; i < *nused; ++i) { - if (used[i] == r) return; - } - if (*nused < cap) used[(*nused)++] = r; -} - -static void collect_replayed_operand_reg(const Operand* op, RegClass cls, - Reg* used, u32* nused, u32 cap) { - if (!op) return; - if (op->kind == OPK_REG) { - if (op->cls == cls) add_unique_reg(used, nused, cap, op->v.reg); - } else if (op->kind == OPK_INDIRECT) { - if (cls == RC_INT) add_unique_reg(used, nused, cap, op->v.ind.base); - } -} - -static void collect_replayed_abivalue_regs(const CGABIValue* v, RegClass cls, - Reg* used, u32* nused, u32 cap) { - if (!v) return; - collect_replayed_operand_reg(&v->storage, cls, used, nused, cap); - for (u32 i = 0; i < v->nparts; ++i) - collect_replayed_operand_reg(&v->parts[i].op, cls, used, nused, cap); -} - -static void collect_replayed_param_regs(Func* f, RegClass cls, Reg* used, - u32* nused, u32 cap) { - if (!f->opt_rewritten || !f->val_info) return; - for (u32 i = 0; i < f->nparams; ++i) { - IRParam* p = &f->params[i]; - if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue; - Val v = (Val)p->storage.v.reg; - if (v == VAL_NONE || v >= f->nvals) continue; - OptValInfo* vi = &f->val_info[v]; - if (vi->alloc_kind != OPT_ALLOC_HARD || vi->cls != cls) continue; - add_unique_reg(used, nused, cap, vi->hard_reg); - } -} - -static u32 collect_replayed_hard_regs(Func* f, CGTarget* w, RegClass cls, - Reg* used, u32 cap) { - u32 nused = 0; - collect_replayed_param_regs(f, cls, used, &nused, cap); - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_PARAM_DECL) continue; - for (u32 j = 0; j < in->nopnds; ++j) - collect_replayed_operand_reg(&in->opnds[j], cls, used, &nused, cap); - - switch ((IROp)in->op) { - case IR_CALL: { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) break; - if (aux->use_plan_replay) { - collect_replayed_operand_reg(&aux->plan.callee, cls, used, &nused, - cap); - for (u32 j = 0; j < aux->plan.nargs; ++j) { - collect_replayed_operand_reg(&aux->plan.args[j].src, cls, used, - &nused, cap); - if (aux->plan.args[j].dst_kind == CG_CALL_PLAN_REG && - aux->plan.args[j].cls == (u8)cls) - add_unique_reg(used, &nused, cap, aux->plan.args[j].dst_reg); - } - for (u32 j = 0; j < aux->plan.nrets; ++j) { - collect_replayed_operand_reg(&aux->plan.rets[j].dst, cls, used, - &nused, cap); - if (aux->plan.rets[j].cls == (u8)cls) - add_unique_reg(used, &nused, cap, aux->plan.rets[j].src_reg); - } - } else { - collect_replayed_operand_reg(&aux->desc.callee, cls, used, &nused, - cap); - for (u32 j = 0; j < aux->desc.nargs; ++j) - collect_replayed_abivalue_regs(&aux->desc.args[j], cls, used, - &nused, cap); - collect_replayed_abivalue_regs(&aux->desc.ret, cls, used, &nused, - cap); - } - break; - } - case IR_RET: { - IRRetAux* aux = (IRRetAux*)in->extra.aux; - if (aux && aux->present) - collect_replayed_abivalue_regs(&aux->val, cls, used, &nused, cap); - break; - } - case IR_SCOPE_BEGIN: { - IRScopeAux* aux = (IRScopeAux*)in->extra.aux; - if (aux) - collect_replayed_operand_reg(&aux->desc.cond, cls, used, &nused, - cap); - break; - } - case IR_ASM_BLOCK: { - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux) break; - for (u32 j = 0; j < aux->nin; ++j) - collect_replayed_operand_reg(&aux->in_ops[j], cls, used, &nused, - cap); - for (u32 j = 0; j < aux->nout; ++j) - collect_replayed_operand_reg(&aux->out_ops[j], cls, used, &nused, - cap); - break; - } - case IR_INTRINSIC: { - IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; - if (!aux) break; - for (u32 j = 0; j < aux->narg; ++j) - collect_replayed_operand_reg(&aux->args[j], cls, used, &nused, - cap); - for (u32 j = 0; j < aux->ndst; ++j) - collect_replayed_operand_reg(&aux->dsts[j], cls, used, &nused, - cap); - break; - } - default: - break; - } - } - } - if (w->resolve_reg_name) { - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op != IR_ASM_BLOCK) continue; - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux) continue; - for (u32 j = 0; j < aux->nclob; ++j) { - Reg r; - RegClass rcls; - if (w->resolve_reg_name(w, aux->clobbers[j], &r, &rcls) != 0) - continue; - if (rcls == cls) add_unique_reg(used, &nused, cap, r); - } - } - } - } - return nused; -} - -static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) { - memset(out, 0, sizeof(*out)); - FrameSlotDesc* slots = NULL; - if (f->nframe_slots) { - slots = arena_zarray(f->arena, FrameSlotDesc, f->nframe_slots); - for (u32 i = 0; i < f->nframe_slots; ++i) { - IRFrameSlot* s = &f->frame_slots[i]; - slots[i].type = s->type; - slots[i].name = s->name; - slots[i].loc = s->loc; - slots[i].size = s->size; - slots[i].align = s->align; - slots[i].kind = s->kind; - slots[i].flags = s->flags; - } - } - out->slots = slots; - out->nslots = f->nframe_slots; - - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_ALLOCA) { - out->has_alloca = 1; - } else if ((IROp)in->op == IR_CALL) { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) { - out->has_call = 1; - continue; - } - if ((aux->desc.flags & CG_CALL_TAIL) == 0) out->has_call = 1; - if ((aux->desc.flags & CG_CALL_TAIL) != 0) continue; - if (!w->call_stack_size) continue; - u32 need = w->call_stack_size(w, &aux->desc); - if (need > out->max_outgoing) out->max_outgoing = need; - } - } - } - out->may_omit_frame = - (!out->has_call && !out->has_alloca && out->nslots == 0 && - out->max_outgoing == 0) - ? 1u - : 0u; -} - -static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { - ReplayCtx r; - r.c = c; - r.f = f; - r.tgt = w; - r.identity_regs = identity ? 1u : 0u; - cg_simple_regalloc_init(&r.regalloc); - u32 nv = f->nvals ? f->nvals : 1u; - r.val_to_reg = arena_zarray(f->arena, Reg, nv); - for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE; - r.val_alloced = arena_zarray(f->arena, u8, nv); - r.slot_map = arena_zarray(f->arena, FrameSlot, f->nframe_slots + 1u); - for (u32 i = 0; i <= f->nframe_slots; ++i) r.slot_map[i] = FRAME_SLOT_NONE; - u32 nb = f->nblocks ? f->nblocks : 1u; - r.label_map = arena_zarray(f->arena, Label, nb); - for (u32 i = 0; i < f->nblocks; ++i) r.label_map[i] = LABEL_NONE; - r.scope_map = arena_zarray(f->arena, CGScope, f->nscopes + 1u); - for (u32 i = 0; i <= f->nscopes; ++i) r.scope_map[i] = CG_SCOPE_NONE; - r.block_label_placed = arena_zarray(f->arena, u8, nb); - - if (identity && w->plan_hard_regs) { - for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { - Reg used[OPT_MAX_HARD_REGS]; - u32 nused = collect_replayed_hard_regs(f, w, (RegClass)cidx, used, - OPT_MAX_HARD_REGS); - w->plan_hard_regs(w, (RegClass)cidx, used, nused); - } - } - - int known_frame = identity && w->func_begin_known_frame && w->call_stack_size; - if (known_frame) { - CGKnownFrameDesc frame; - FrameSlot* target_slots = f->nframe_slots - ? arena_zarray(f->arena, FrameSlot, - f->nframe_slots) - : NULL; - collect_known_frame(f, w, &frame); - w->func_begin_known_frame(w, &f->desc, &frame, target_slots); - for (u32 i = 0; i < f->nframe_slots; ++i) - r.slot_map[f->frame_slots[i].id] = target_slots[i]; - } else { - /* func_begin with the recorded descriptor. Parameter storage is replayed - * through target->param below after frame slots are mapped. */ - w->func_begin(w, &f->desc); - } - - if (!r.identity_regs) { - for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { - const Reg* regs = NULL; - u32 nregs = 0; - if (w->get_allocable_regs) - w->get_allocable_regs(w, (RegClass)cidx, &regs, &nregs); - if (regs && nregs) - cg_simple_regalloc_set_ordered(&r.regalloc, (RegClass)cidx, regs, - nregs); - } - } - - if (!known_frame) { - for (u32 i = 0; i < f->nframe_slots; ++i) { - IRFrameSlot* s = &f->frame_slots[i]; - FrameSlotDesc d = {0}; - d.type = s->type; - d.name = s->name; - d.loc = s->loc; - d.size = s->size; - d.align = s->align; - d.kind = s->kind; - d.flags = s->flags; - r.slot_map[s->id] = w->frame_slot(w, &d); - } - } - - for (u32 i = 0; i < f->nparams; ++i) { - IRParam* p = &f->params[i]; - CGParamDesc d = {0}; - d.index = p->index; - d.name = p->name; - d.type = p->type; - d.size = p->size; - d.align = p->align; - d.flags = p->flags; - if (replay_reg_storage_unused(&r, p->storage)) { - d.storage = p->storage; - d.storage.v.reg = REG_NONE; - } else { - d.storage = xlat_storage(&r, p->storage, p->type); - } - d.abi = p->abi; - d.loc = p->loc; - (void)w->param(w, &d); - } - - /* Body in emit order — the order CG's emit cursor visited each - * block. Block-creation order can differ when label_new precedes a - * cmp_branch whose fallthrough block must physically follow. */ - for (u32 i = 0; i < f->emit_order_n; ++i) { - replay_block(&r, f->emit_order[i]); - } - - /* At -O1, opt managed allocation and emitted hard regs directly, - * bypassing backend-local allocation. Tell the backend which hard - * regs are still visible in replay so it can save the right callee-saved - * subset in prologue/epilogue. - * - * The backend records only callee-saved members of this set for - * prologue/epilogue preservation. */ - if (r.identity_regs && w->reserve_hard_regs) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - Reg used[OPT_MAX_HARD_REGS]; - u32 nused = collect_replayed_hard_regs(f, w, (RegClass)c, used, - OPT_MAX_HARD_REGS); - if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); - } - } else if (!r.identity_regs && w->reserve_hard_regs) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; - u32 nused = cg_simple_regalloc_used_regs(&r.regalloc, (RegClass)c, used, - CG_SIMPLE_REGALLOC_MAX_REGS); - if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); - } - } - - w->func_end(w); -} - -static void replay_func(OptImpl* o) { - replay_func_to(o->c, o->f, o->target, 0); -} - -void opt_emit(Compiler* c, Func* f, CGTarget* target) { - replay_func_to(c, f, target, 1); -} - static u64 func_inst_count(Func* f) { u64 n = 0; if (!f) return 0; @@ -2315,14 +1264,12 @@ static void w_func_end(CGTarget* t) { metrics_scope_end(o->c, "opt.dead_def_elim"); metrics_scope_begin(o->c, "opt.regalloc"); opt_regalloc(o->f, 0); - metrics_count(o->c, "opt.alloc.used_loc_words", - o->f->opt_used_loc_words); + metrics_count(o->c, "opt.alloc.used_loc_words", o->f->opt_used_loc_words); metrics_count(o->c, "opt.alloc.hard_loc_words", o->f->opt_alloc_hard_loc_words); metrics_count(o->c, "opt.alloc.stack_loc_words", o->f->opt_alloc_stack_loc_words); - metrics_count(o->c, "opt.alloc.stack_slots", - o->f->opt_alloc_stack_slots); + metrics_count(o->c, "opt.alloc.stack_slots", o->f->opt_alloc_stack_slots); metrics_count(o->c, "opt.alloc.hard_point_visits", o->f->opt_alloc_hard_point_visits); metrics_count(o->c, "opt.alloc.stack_point_visits", @@ -2361,9 +1308,9 @@ static void w_func_end(CGTarget* t) { } else if (o->level >= 2) { opt_build_cfg(o->f); opt_build_ssa(o->f); - replay_func(o); + opt_replay(o->c, o->f, o->target); } else { - replay_func(o); + opt_replay(o->c, o->f, o->target); } o->f = NULL; o->cur = 0; diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h @@ -0,0 +1,35 @@ +#ifndef CFREE_OPT_INTERNAL_H +#define CFREE_OPT_INTERNAL_H + +#include "opt/opt.h" + +typedef struct OptHardRegSet { + u32 cls[OPT_REG_CLASSES]; +} OptHardRegSet; + +typedef struct OptHardBlockLive { + OptHardRegSet live_in; + OptHardRegSet live_out; + OptHardRegSet live_use; + OptHardRegSet live_def; +} OptHardBlockLive; + +int opt_mem_observable(const MemAccess*); +u32 opt_call_clobber_mask_for(Func*, const Inst*, u8 cls); + +int opt_inst_has_side_effect(Func*, const Inst*); + +int opt_hard_empty(const OptHardRegSet*); +int opt_hard_intersects(const OptHardRegSet*, const OptHardRegSet*); +void opt_hard_live_step(OptHardRegSet* live, const OptHardRegSet* use, + const OptHardRegSet* def); +void opt_hard_inst_use_def(Func*, const Inst*, OptHardRegSet* use, + OptHardRegSet* def); +OptHardBlockLive* opt_maybe_build_hard_live(Func*); +OptHardRegSet opt_hard_live_out_for_block(const OptHardBlockLive*); +int opt_block_live_out_has_phys_reg(Func*, const OptHardBlockLive*, u32 block, + const Operand*); + +void opt_replay(Compiler*, Func*, CGTarget* target); + +#endif diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c @@ -0,0 +1,5 @@ +#include "opt/opt_internal.h" + +int opt_mem_observable(const MemAccess* m) { + return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0; +} diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c @@ -0,0 +1,552 @@ +#include "core/arena.h" +#include "opt/opt_internal.h" + +static int same_reg_operand(const Operand* a, const Operand* b) { + return a->kind == OPK_REG && b->kind == OPK_REG && a->cls == b->cls && + a->v.reg == b->v.reg; +} + +static int frame_slot_is_spill(Func* f, FrameSlot fs) { + if (fs == FRAME_SLOT_NONE || fs > f->nframe_slots) return 0; + return f->frame_slots[fs - 1u].kind == FS_SPILL; +} + +static int spill_local_slot(Func* f, const Operand* addr, const MemAccess* mem, + FrameSlot* out) { + if (!addr || addr->kind != OPK_LOCAL) return 0; + if (opt_mem_observable(mem)) return 0; + if (mem->alias.kind != ALIAS_LOCAL) return 0; + if (mem->alias.v.local_id != (i32)addr->v.frame_slot) return 0; + if (!frame_slot_is_spill(f, addr->v.frame_slot)) return 0; + *out = addr->v.frame_slot; + return 1; +} + +static int same_spill_access(Func* f, const Inst* a, const Inst* b, + FrameSlot* slot_out) { + FrameSlot as = FRAME_SLOT_NONE; + FrameSlot bs = FRAME_SLOT_NONE; + if (!spill_local_slot(f, &a->opnds[0], &a->extra.mem, &as)) return 0; + if (!spill_local_slot(f, &b->opnds[0], &b->extra.mem, &bs)) return 0; + if (as != bs) return 0; + if (a->extra.mem.size != b->extra.mem.size) return 0; + if (a->extra.mem.addr_space != b->extra.mem.addr_space) return 0; + if (slot_out) *slot_out = as; + return 1; +} + +static int load_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { + if ((IROp)in->op != IR_LOAD || in->nopnds < 2) return 0; + return spill_local_slot(f, &in->opnds[1], &in->extra.mem, slot_out); +} + +static int store_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { + if ((IROp)in->op != IR_STORE || in->nopnds < 2) return 0; + return spill_local_slot(f, &in->opnds[0], &in->extra.mem, slot_out); +} + +static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) { + FrameSlot as = FRAME_SLOT_NONE; + FrameSlot bs = FRAME_SLOT_NONE; + if ((IROp)a->op == IR_LOAD) { + if (!load_spill_slot(f, a, &as)) return 0; + } else if (!store_spill_slot(f, a, &as)) { + return 0; + } + if ((IROp)b->op == IR_LOAD) { + if (!load_spill_slot(f, b, &bs)) return 0; + } else if (!store_spill_slot(f, b, &bs)) { + return 0; + } + return as == bs && a->extra.mem.size == b->extra.mem.size && + a->extra.mem.addr_space == b->extra.mem.addr_space; +} + +static int same_phys_reg(const Operand* a, const Operand* b) { + return a && b && a->kind == OPK_REG && b->kind == OPK_REG && + a->cls == b->cls && a->v.reg == b->v.reg; +} + +static int operand_uses_phys_reg(const Operand* op, const Operand* r) { + if (!op || !r || r->kind != OPK_REG) return 0; + if (op->kind == OPK_REG) return op->cls == r->cls && op->v.reg == r->v.reg; + if (op->kind == OPK_INDIRECT) + return r->cls == RC_INT && op->v.ind.base == r->v.reg; + return 0; +} + +static int count_operand_phys_uses(const Operand* op, const Operand* r) { + return operand_uses_phys_reg(op, r) ? 1 : 0; +} + +static int abi_uses_phys_reg(const CGABIValue* v, const Operand* r) { + int n = 0; + if (!v) return 0; + n += count_operand_phys_uses(&v->storage, r); + for (u32 i = 0; i < v->nparts; ++i) + n += count_operand_phys_uses(&v->parts[i].op, r); + return n; +} + +static int inst_uses_phys_reg(const Inst* in, const Operand* r) { + int n = 0; + switch ((IROp)in->op) { + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + case IR_VA_ARG: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_LOAD: + case IR_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_ATOMIC_LOAD: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_BINOP: + case IR_CMP: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + break; + case IR_STORE: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_STORE: + case IR_VA_COPY: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + if (aux->use_plan_replay) { + n += count_operand_phys_uses(&aux->plan.callee, r); + for (u32 i = 0; i < aux->plan.nargs; ++i) + n += count_operand_phys_uses(&aux->plan.args[i].src, r); + } else { + n += count_operand_phys_uses(&aux->desc.callee, r); + for (u32 i = 0; i < aux->desc.nargs; ++i) + n += abi_uses_phys_reg(&aux->desc.args[i], r); + } + break; + } + case IR_CMP_BRANCH: + case IR_CONDBR: + for (u32 i = 0; i < in->nopnds; ++i) + n += count_operand_phys_uses(&in->opnds[i], r); + break; + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) n += abi_uses_phys_reg(&aux->val, r); + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + if (aux) n += count_operand_phys_uses(&aux->desc.cond, r); + break; + } + case IR_ALLOCA: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_VA_START: + case IR_VA_END: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + break; + case IR_ATOMIC_STORE: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_ATOMIC_RMW: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + break; + case IR_ATOMIC_CAS: + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + if (in->nopnds >= 4) n += count_operand_phys_uses(&in->opnds[3], r); + if (in->nopnds >= 5) n += count_operand_phys_uses(&in->opnds[4], r); + break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->nin; ++i) + n += count_operand_phys_uses(&aux->in_ops[i], r); + break; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->narg; ++i) + n += count_operand_phys_uses(&aux->args[i], r); + break; + } + default: + break; + } + return n; +} + +static int abi_defines_phys_reg(const CGABIValue* v, const Operand* r) { + int n = 0; + if (!v) return 0; + if (same_phys_reg(&v->storage, r)) ++n; + for (u32 i = 0; i < v->nparts; ++i) + if (same_phys_reg(&v->parts[i].op, r)) ++n; + return n; +} + +static int inst_defines_phys_reg(const Inst* in, const Operand* r) { + if (!r || r->kind != OPK_REG) return 0; + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_LOAD_CONST: + case IR_COPY: + case IR_LOAD: + case IR_ADDR_OF: + case IR_TLS_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_BINOP: + case IR_UNOP: + case IR_CMP: + case IR_CONVERT: + case IR_ALLOCA: + case IR_VA_ARG: + case IR_ATOMIC_LOAD: + case IR_ATOMIC_RMW: + return in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r); + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) return 0; + if (aux->use_plan_replay) { + for (u32 i = 0; i < aux->plan.nargs; ++i) + if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG && + r->cls == aux->plan.args[i].cls && + r->v.reg == aux->plan.args[i].dst_reg) + return 1; + for (u32 i = 0; i < aux->plan.nrets; ++i) + if ((r->cls == aux->plan.rets[i].cls && + r->v.reg == aux->plan.rets[i].src_reg) || + same_phys_reg(&aux->plan.rets[i].dst, r)) + return 1; + return 0; + } + return abi_defines_phys_reg(&aux->desc.ret, r); + } + case IR_ATOMIC_CAS: + return (in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r)) || + (in->nopnds >= 2 && same_phys_reg(&in->opnds[1], r)); + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) return 0; + for (u32 i = 0; i < aux->nout; ++i) + if (same_phys_reg(&aux->out_ops[i], r)) return 1; + return 0; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) return 0; + for (u32 i = 0; i < aux->ndst; ++i) + if (same_phys_reg(&aux->dsts[i], r)) return 1; + return 0; + } + default: + return 0; + } +} + +static int copy_fold_slot(const Inst* in, u32 idx) { + switch ((IROp)in->op) { + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + return idx == 1; + case IR_BINOP: + case IR_CMP: + return idx == 1 || idx == 2; + case IR_CMP_BRANCH: + return idx == 0 || idx == 1; + case IR_CONDBR: + return idx == 0; + case IR_STORE: + return idx == 1; + case IR_ALLOCA: + return idx == 1; + case IR_ATOMIC_RMW: + return idx == 2; + default: + return 0; + } +} + +static int imm_fold_slot(const Inst* in, u32 idx) { + switch ((IROp)in->op) { + case IR_BINOP: + case IR_CMP: + return idx == 1 || idx == 2; + case IR_CMP_BRANCH: + return idx == 0 || idx == 1; + default: + return 0; + } +} + +static int identical_convert_pair(const Inst* a, const Inst* b) { + if ((IROp)a->op != IR_CONVERT || (IROp)b->op != IR_CONVERT) return 0; + if (a->nopnds < 2 || b->nopnds < 2) return 0; + if (a->extra.imm != b->extra.imm) return 0; + return a->opnds[1].type == b->opnds[1].type && + a->opnds[0].type == b->opnds[0].type; +} + +static int binop_is_commutative(BinOp op) { + switch (op) { + case BO_IADD: + case BO_IMUL: + case BO_FADD: + case BO_FMUL: + case BO_AND: + case BO_OR: + case BO_XOR: + return 1; + default: + return 0; + } +} + +static int no_intervening_phys_access(Block* bl, u32 first, u32 last, + const Operand* r) { + for (u32 i = first; i < last; ++i) { + Inst* in = &bl->insts[i]; + if (inst_uses_phys_reg(in, r) || inst_defines_phys_reg(in, r)) return 0; + } + return 1; +} + +static int retarget_producer_legal(Inst* producer, const Operand* copy_dst, + int* swap_binop) { + *swap_binop = 0; + if (!copy_dst || copy_dst->kind != OPK_REG) return 0; + if (producer->nopnds < 2 || producer->opnds[0].kind != OPK_REG) return 0; + if (producer->opnds[0].cls != copy_dst->cls) return 0; + if (producer->opnds[0].type != copy_dst->type) return 0; + + switch ((IROp)producer->op) { + case IR_UNOP: + return 1; + case IR_BINOP: { + if (producer->nopnds < 3) return 0; + int dst_is_lhs = operand_uses_phys_reg(&producer->opnds[1], copy_dst); + int dst_is_rhs = operand_uses_phys_reg(&producer->opnds[2], copy_dst); + if (!dst_is_lhs && !dst_is_rhs) return 1; + if (dst_is_lhs) return 1; + if (binop_is_commutative((BinOp)producer->extra.imm)) { + *swap_binop = 1; + return 1; + } + return 0; + } + default: + return 0; + } +} + +static int first_return_reg(Func* f, u8 cls, Reg* out) { + if (!f || cls >= OPT_REG_CLASSES) return 0; + u32 mask = f->opt_ret_regs[cls]; + for (Reg r = 0; r < 32; ++r) { + if (mask & (1u << r)) { + *out = r; + return 1; + } + } + return 0; +} + +static int ret_scalar_storage(CGABIValue* v, Operand** out) { + if (!v || v->storage.kind != OPK_REG) return 0; + if (v->nparts > 1) return 0; + *out = &v->storage; + return 1; +} + +static int find_single_direct_use(Func* f, Block* bl, + const OptHardBlockLive* hard_live, u32 def_i, + const Operand* def, const Operand* src, + int check_src, int imm_fold, int conv_fold, + u32* use_i_out, u32* op_i_out) { + int total_uses = 0; + int source_clobbered = 0; + int killed = 0; + int found = 0; + u32 found_i = 0; + u32 found_op = 0; + + for (u32 i = def_i + 1u; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + int uses = inst_uses_phys_reg(in, def); + if (uses) { + if (check_src && source_clobbered) return 0; + total_uses += uses; + if (total_uses > 1) return 0; + for (u32 oi = 0; oi < in->nopnds; ++oi) { + int ok = + conv_fold + ? (oi == 1 && identical_convert_pair(&bl->insts[def_i], in)) + : (imm_fold ? imm_fold_slot(in, oi) : copy_fold_slot(in, oi)); + if (!ok) continue; + if (!same_phys_reg(&in->opnds[oi], def)) continue; + found_i = i; + found_op = oi; + found = 1; + } + } + + if ((IROp)in->op == IR_CALL) { + if (check_src) source_clobbered = 1; + killed = 1; + break; + } + if (check_src && src && inst_defines_phys_reg(in, src)) + source_clobbered = 1; + if (inst_defines_phys_reg(in, def)) { + killed = 1; + break; + } + } + + if (total_uses != 1) return 0; + if (!found) return 0; + if (!killed && opt_block_live_out_has_phys_reg(f, hard_live, bl->id, def)) + return 0; + *use_i_out = found_i; + *op_i_out = found_op; + return 1; +} + +static void opt_combine_fold_block(Func* f, Block* bl, + const OptHardBlockLive* hard_live) { + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + u32 use_i = 0; + u32 op_i = 0; + + if (f->opt_rewritten && (IROp)in->op == IR_RET && i > 0) { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + Operand* ret_op = NULL; + Reg ret_reg = REG_NONE; + if (aux && aux->present && ret_scalar_storage(&aux->val, &ret_op) && + first_return_reg(f, ret_op->cls, &ret_reg) && + ret_reg != (Reg)REG_NONE && ret_reg != ret_op->v.reg) { + Inst* producer = &bl->insts[i - 1u]; + Operand ret_dst = *ret_op; + ret_dst.v.reg = ret_reg; + int swap_binop = 0; + if (producer->nopnds >= 1 && + same_phys_reg(&producer->opnds[0], ret_op) && + retarget_producer_legal(producer, &ret_dst, &swap_binop)) { + if (swap_binop) { + Operand tmp = producer->opnds[1]; + producer->opnds[1] = producer->opnds[2]; + producer->opnds[2] = tmp; + } + producer->opnds[0] = ret_dst; + *ret_op = ret_dst; + continue; + } + } + } + + if (f->opt_rewritten && + ((IROp)in->op == IR_BINOP || (IROp)in->op == IR_UNOP) && + in->nopnds >= 1 && in->opnds[0].kind == OPK_REG && + find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 0, + 0, &use_i, &op_i)) { + Inst* copy = &bl->insts[use_i]; + int swap_binop = 0; + if ((IROp)copy->op == IR_COPY && op_i == 1 && copy->nopnds >= 2 && + copy->opnds[0].kind == OPK_REG && + same_phys_reg(&copy->opnds[1], &in->opnds[0]) && + !same_phys_reg(&copy->opnds[0], &in->opnds[0]) && + no_intervening_phys_access(bl, i + 1u, use_i, &copy->opnds[0]) && + retarget_producer_legal(in, &copy->opnds[0], &swap_binop)) { + if (swap_binop) { + Operand tmp = in->opnds[1]; + in->opnds[1] = in->opnds[2]; + in->opnds[2] = tmp; + } + in->opnds[0] = copy->opnds[0]; + copy->opnds[1] = copy->opnds[0]; + continue; + } + } + + if ((IROp)in->op == IR_COPY && in->nopnds >= 2 && + in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && + !same_phys_reg(&in->opnds[0], &in->opnds[1]) && + find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], + &in->opnds[1], 1, 0, 0, &use_i, &op_i)) { + bl->insts[use_i].opnds[op_i] = in->opnds[1]; + continue; + } + + if ((IROp)in->op == IR_LOAD_IMM && in->nopnds >= 1 && + in->opnds[0].kind == OPK_REG && + find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 1, + 0, &use_i, &op_i)) { + Operand imm = in->opnds[0]; + imm.kind = OPK_IMM; + imm.v.imm = in->extra.imm; + bl->insts[use_i].opnds[op_i] = imm; + continue; + } + + if ((IROp)in->op == IR_CONVERT && in->nopnds >= 2 && + in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && + find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], + &in->opnds[1], 1, 0, 1, &use_i, &op_i)) { + bl->insts[use_i].opnds[op_i] = in->opnds[1]; + } + } +} + +void opt_combine(Func* f) { + OptHardBlockLive* hard_live = opt_maybe_build_hard_live(f); + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + opt_combine_fold_block(f, bl, hard_live); + u32 w = 0; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_COPY && in->nopnds == 2 && + same_reg_operand(&in->opnds[0], &in->opnds[1])) { + continue; + } + + if (w) { + Inst* prev = &bl->insts[w - 1u]; + if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_LOAD && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[1], &in->opnds[0])) { + continue; + } + if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_STORE && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[0], &in->opnds[1])) { + continue; + } + if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_LOAD && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[0], &in->opnds[0])) { + continue; + } + if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_STORE && + same_spill_access(f, prev, in, NULL)) { + bl->insts[w - 1u] = *in; + continue; + } + } + + bl->insts[w++] = *in; + } + bl->ninsts = w; + } +} diff --git a/src/opt/pass_dce.c b/src/opt/pass_dce.c @@ -0,0 +1,93 @@ +#include "core/arena.h" +#include "opt/opt_internal.h" + +int opt_inst_has_side_effect(Func* f, const Inst* in) { + (void)f; + switch ((IROp)in->op) { + case IR_LOAD: + return opt_mem_observable(&in->extra.mem); + case IR_BITFIELD_LOAD: { + IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; + return aux && opt_mem_observable(&aux->access.storage); + } + case IR_ALLOCA: + case IR_PARAM_DECL: + case IR_STORE: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_STORE: + case IR_CALL: + case IR_BR: + case IR_CONDBR: + case IR_CMP_BRANCH: + case IR_RET: + case IR_SCOPE_BEGIN: + case IR_SCOPE_ELSE: + case IR_SCOPE_END: + case IR_BREAK_TO: + case IR_CONTINUE_TO: + case IR_VA_START: + case IR_VA_ARG: + case IR_VA_END: + case IR_VA_COPY: + case IR_ATOMIC_LOAD: + case IR_ATOMIC_STORE: + case IR_ATOMIC_RMW: + case IR_ATOMIC_CAS: + case IR_FENCE: + case IR_ASM_BLOCK: + case IR_INTRINSIC: + return 1; + default: + return 0; + } +} +void opt_dce(Func* f) { + OptHardBlockLive* hard_live = opt_maybe_build_hard_live(f); + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + if (f->opt_rewritten) { + OptHardRegSet live = + opt_hard_live_out_for_block(hard_live ? &hard_live[b] : NULL); + Inst* new_insts = arena_array(f->arena, Inst, bl->ninsts); + u32 w = 0; + for (u32 ri = bl->ninsts; ri > 0; --ri) { + u32 i = ri - 1u; + Inst* in = &bl->insts[i]; + OptHardRegSet use, def; + if ((IROp)in->op == IR_NOP) continue; + opt_hard_inst_use_def(f, in, &use, &def); + if (!opt_inst_has_side_effect(f, in) && !opt_hard_empty(&def) && + !opt_hard_intersects(&def, &live)) { + continue; + } + if (!opt_inst_has_side_effect(f, in) && opt_hard_empty(&def) && + in->nopnds == 0) { + continue; + } + new_insts[w++] = *in; + opt_hard_live_step(&live, &use, &def); + } + for (u32 i = 0; i < w / 2; ++i) { + Inst tmp = new_insts[i]; + new_insts[i] = new_insts[w - 1u - i]; + new_insts[w - 1u - i] = tmp; + } + bl->insts = new_insts; + bl->ninsts = w; + bl->cap = w; + continue; + } + + u32 w = 0; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_NOP) continue; + if (!opt_inst_has_side_effect(f, in) && in->def == VAL_NONE && + in->ndefs == 0 && in->nopnds == 0) + continue; + bl->insts[w++] = *in; + } + bl->ninsts = w; + } +} diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c @@ -0,0 +1,1043 @@ +#include <string.h> + +#include "arch/regalloc.h" +#include "core/arena.h" +#include "core/core.h" +#include "opt/ir.h" +#include "opt/opt_internal.h" + +typedef struct ReplayCtx { + Compiler* c; + Func* f; + CGTarget* tgt; + Reg* val_to_reg; + FrameSlot* slot_map; + Label* label_map; + CGScope* scope_map; + u8* val_alloced; + u8* block_label_placed; + u8 identity_regs; + CGSimpleRegAlloc regalloc; +} ReplayCtx; + +static Reg val_to_target_reg(ReplayCtx* r, Val v) { + Func* f = r->f; + if (v == VAL_NONE) return REG_NONE; + if (r->identity_regs) return (Reg)v; + if (v >= f->nvals) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, "opt replay: Val %u out of range", v); + } + if (!r->val_alloced[v]) { + r->val_to_reg[v] = + cg_simple_regalloc_alloc(&r->regalloc, (RegClass)f->val_cls[v]); + if (r->val_to_reg[v] == (Reg)REG_NONE) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, "opt replay: hard reg pool exhausted"); + } + r->val_alloced[v] = 1; + } + return r->val_to_reg[v]; +} + +static FrameSlot slot_to_target(ReplayCtx* r, FrameSlot vs) { + if (vs == FRAME_SLOT_NONE) return FRAME_SLOT_NONE; + if (vs >= r->f->nframe_slots + 1u) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, "opt replay: vslot %u out of range", + (unsigned)vs); + } + return r->slot_map[vs]; +} + +static CGLocalStorage xlat_storage(ReplayCtx* r, CGLocalStorage st, + CfreeCgTypeId ty) { + (void)ty; + if (st.kind == CG_LOCAL_STORAGE_REG) { + Val v = (Val)st.v.reg; + if (r->identity_regs && r->f->opt_rewritten && v < r->f->nvals && + r->f->val_info) { + OptValInfo* vi = &r->f->val_info[v]; + if (vi->alloc_kind == OPT_ALLOC_HARD) { + st.v.reg = vi->hard_reg; + } else if (vi->alloc_kind == OPT_ALLOC_SPILL) { + st.kind = CG_LOCAL_STORAGE_FRAME; + st.v.frame_slot = slot_to_target(r, vi->spill_slot); + } else { + st.v.reg = val_to_target_reg(r, v); + } + } else { + st.v.reg = val_to_target_reg(r, v); + } + } else { + st.v.frame_slot = slot_to_target(r, st.v.frame_slot); + } + return st; +} + +static int replay_reg_storage_unused(ReplayCtx* r, CGLocalStorage st) { + if (!r || st.kind != CG_LOCAL_STORAGE_REG) return 0; + if (!(r->identity_regs && r->f->opt_rewritten && r->f->val_info)) return 0; + Val v = (Val)st.v.reg; + if (v == VAL_NONE || v >= r->f->nvals) return 0; + return r->f->val_info[v].alloc_kind == OPT_ALLOC_NONE || + r->f->val_info[v].use_freq == 0; +} + +static Operand xlat_op(ReplayCtx* r, Operand op) { + switch ((OpKind)op.kind) { + case OPK_IMM: + case OPK_GLOBAL: + return op; + case OPK_REG: + if (r->identity_regs && r->f->opt_rewritten) return op; + op.v.reg = val_to_target_reg(r, (Val)op.v.reg); + return op; + case OPK_LOCAL: + op.v.frame_slot = slot_to_target(r, op.v.frame_slot); + return op; + case OPK_INDIRECT: + if (!(r->identity_regs && r->f->opt_rewritten)) + op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base); + return op; + } + return op; +} + +static CGABIValue xlat_abivalue(ReplayCtx* r, const CGABIValue* in, + CGABIPart* parts_out) { + CGABIValue out = *in; + out.storage = xlat_op(r, in->storage); + if (in->nparts && parts_out) { + for (u32 i = 0; i < in->nparts; ++i) { + parts_out[i] = in->parts[i]; + parts_out[i].op = xlat_op(r, in->parts[i].op); + } + out.parts = parts_out; + } else { + out.parts = NULL; + } + return out; +} + +typedef struct ReplayParallelMove { + Operand dst; + Operand src; + MemAccess mem; + const CGCallPlanRet* ret; + u32 src_offset; + u32 dst_offset; + u32 stack_offset; + u8 dst_kind; + u8 src_kind; + u8 is_ret; + u8 done; +} ReplayParallelMove; + +static Operand phys_reg_operand(Reg r, RegClass cls, CfreeCgTypeId ty) { + Operand op; + memset(&op, 0, sizeof op); + op.kind = OPK_REG; + op.cls = (u8)cls; + op.type = ty; + op.v.reg = r; + return op; +} + +static int operand_reg_eq(const Operand* a, const Operand* b) { + return a && b && a->kind == OPK_REG && b->kind == OPK_REG && + a->cls == b->cls && a->v.reg == b->v.reg; +} + +static int operand_uses_reg_for_replay(const Operand* op, const Operand* r) { + if (!op || !r || r->kind != OPK_REG) return 0; + if (op->kind == OPK_REG) return operand_reg_eq(op, r); + if (op->kind == OPK_INDIRECT) + return r->cls == RC_INT && op->v.ind.base == r->v.reg; + return 0; +} + +static int replay_move_src_ready(const ReplayParallelMove* moves, u32 n, + u32 idx) { + const Operand* dst = &moves[idx].dst; + for (u32 i = 0; i < n; ++i) { + if (i == idx || moves[i].done) continue; + if (operand_uses_reg_for_replay(&moves[i].src, dst)) return 0; + } + return 1; +} + +static int replay_find_move_dst(const ReplayParallelMove* moves, u32 n, + const Operand* dst) { + for (u32 i = 0; i < n; ++i) { + if (!moves[i].done && operand_reg_eq(&moves[i].dst, dst)) return (int)i; + } + return -1; +} + +static Reg replay_scratch_reg(ReplayCtx* r, RegClass cls, Reg avoid) { + if ((u32)cls >= OPT_REG_CLASSES) return REG_NONE; + for (u32 i = 0; i < r->f->opt_scratch_reg_count[cls]; ++i) { + Reg sr = r->f->opt_scratch_regs[cls][i]; + if (sr != avoid) return sr; + } + return REG_NONE; +} + +static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) { + Operand dst = move->dst; + Operand src = move->src; + MemAccess mem = move->mem; + if (move->dst_kind == CG_CALL_PLAN_STACK || + move->dst_kind == CG_CALL_PLAN_TAIL_STACK) { + CGCallPlanMove m; + memset(&m, 0, sizeof m); + m.src = src; + m.src_kind = move->src_kind; + m.dst_kind = move->dst_kind; + m.cls = dst.cls; + m.src_offset = move->src_offset; + m.stack_offset = move->stack_offset; + m.mem = mem; + w->store_call_arg(w, &m); + } else if (dst.kind == OPK_REG) { + if (move->src_kind == CG_CALL_PLAN_SRC_ADDR || move->src_offset) { + CGCallPlanMove m; + memset(&m, 0, sizeof m); + m.src = src; + m.src_kind = move->src_kind; + m.dst_kind = CG_CALL_PLAN_REG; + m.cls = dst.cls; + m.dst_reg = dst.v.reg; + m.src_offset = move->src_offset; + m.mem = mem; + w->load_call_arg(w, dst, &m); + return; + } + if (src.kind == OPK_REG) { + if (!operand_reg_eq(&dst, &src)) w->copy(w, dst, src); + } else if (src.kind == OPK_IMM) { + w->load_imm(w, dst, src.v.imm); + } else if (src.kind == OPK_LOCAL || src.kind == OPK_INDIRECT) { + w->load(w, dst, src, mem); + } else if (src.kind == OPK_GLOBAL) { + w->addr_of(w, dst, src); + } + } else if (dst.kind == OPK_LOCAL || dst.kind == OPK_INDIRECT) { + if (move->is_ret && move->dst_offset) { + CGCallPlanRet ret = move->ret ? *move->ret : (CGCallPlanRet){0}; + ret.dst = dst; + ret.dst_offset = move->dst_offset; + ret.mem = mem; + w->store_call_ret(w, &ret, src); + return; + } + w->store(w, dst, src, mem); + } +} + +static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, + u32 n) { + CGTarget* w = r->tgt; + u32 remaining = 0; + for (u32 i = 0; i < n; ++i) { + if (operand_reg_eq(&moves[i].dst, &moves[i].src)) { + moves[i].done = 1; + } else { + ++remaining; + } + } + + while (remaining) { + int progressed = 0; + for (u32 i = 0; i < n; ++i) { + if (moves[i].done || !replay_move_src_ready(moves, n, i)) continue; + replay_emit_move(w, &moves[i]); + moves[i].done = 1; + --remaining; + progressed = 1; + } + if (progressed) continue; + + for (u32 i = 0; i < n; ++i) { + if (moves[i].done || moves[i].src.kind == OPK_REG) continue; + Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, REG_NONE); + if (sr == (Reg)REG_NONE) continue; + Operand tmp = + phys_reg_operand(sr, (RegClass)moves[i].dst.cls, moves[i].dst.type); + ReplayParallelMove tmp_move = moves[i]; + tmp_move.dst = tmp; + tmp_move.dst_kind = CG_CALL_PLAN_REG; + replay_emit_move(w, &tmp_move); + moves[i].src = tmp; + moves[i].src_kind = CG_CALL_PLAN_SRC_VALUE; + moves[i].src_offset = 0; + progressed = 1; + break; + } + if (progressed) continue; + + u32 first = 0; + while (first < n && moves[first].done) ++first; + if (first == n) break; + Operand save = moves[first].src; + Reg sr = replay_scratch_reg(r, (RegClass)save.cls, REG_NONE); + if (sr == (Reg)REG_NONE) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, + "opt replay: no scratch register for parallel call move"); + } + Operand tmp = phys_reg_operand(sr, (RegClass)save.cls, save.type); + w->copy(w, tmp, save); + + Operand hole = save; + for (;;) { + int idx = replay_find_move_dst(moves, n, &hole); + if (idx < 0 || (u32)idx == first) break; + replay_emit_move(w, &moves[idx]); + hole = moves[idx].src; + moves[idx].done = 1; + --remaining; + } + moves[first].src = tmp; + moves[first].src_kind = CG_CALL_PLAN_SRC_VALUE; + moves[first].src_offset = 0; + replay_emit_move(w, &moves[first]); + moves[first].done = 1; + --remaining; + } +} + +static int replay_plan_supported(CGTarget* w, const CGCallPlan* p) { + if (!p) return 0; + for (u32 i = 0; i < p->nargs; ++i) { + if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK || + p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && + !w->store_call_arg) + return 0; + if (p->args[i].dst_kind == CG_CALL_PLAN_REG && + (p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR || + p->args[i].src_offset) && + !w->load_call_arg) + return 0; + } + for (u32 i = 0; i < p->nrets; ++i) + if (p->rets[i].dst.kind != OPK_REG && p->rets[i].dst.kind != OPK_LOCAL && + p->rets[i].dst.kind != OPK_INDIRECT) + return 0; + for (u32 i = 0; i < p->nrets; ++i) + if (p->rets[i].dst_offset && + (p->rets[i].dst.kind == OPK_LOCAL || + p->rets[i].dst.kind == OPK_INDIRECT) && + !w->store_call_ret) + return 0; + return 1; +} + +static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { + const CGCallPlan* src_plan = &aux->plan; + CGCallPlan plan = *src_plan; + plan.callee = xlat_op(r, src_plan->callee); + plan.args = src_plan->nargs + ? arena_array(r->f->arena, CGCallPlanMove, src_plan->nargs) + : NULL; + plan.rets = src_plan->nrets + ? arena_array(r->f->arena, CGCallPlanRet, src_plan->nrets) + : NULL; + + ReplayParallelMove* arg_moves = + src_plan->nargs + ? arena_zarray(r->f->arena, ReplayParallelMove, src_plan->nargs) + : NULL; + u32 nargs = 0; + for (u32 i = 0; i < src_plan->nargs; ++i) { + plan.args[i] = src_plan->args[i]; + plan.args[i].src = xlat_op(r, src_plan->args[i].src); + if ((src_plan->flags & CG_CALL_TAIL) && + plan.args[i].dst_kind == CG_CALL_PLAN_STACK) { + plan.args[i].dst_kind = CG_CALL_PLAN_TAIL_STACK; + } + Operand dst; + if (plan.args[i].dst_kind == CG_CALL_PLAN_REG) { + dst = phys_reg_operand(plan.args[i].dst_reg, (RegClass)plan.args[i].cls, + plan.args[i].mem.type); + } else { + memset(&dst, 0, sizeof dst); + dst.kind = OPK_LOCAL; + dst.cls = plan.args[i].cls; + dst.type = plan.args[i].mem.type; + } + arg_moves[nargs].dst = dst; + arg_moves[nargs].src = plan.args[i].src; + arg_moves[nargs].mem = plan.args[i].mem; + arg_moves[nargs].src_offset = plan.args[i].src_offset; + arg_moves[nargs].stack_offset = plan.args[i].stack_offset; + arg_moves[nargs].dst_kind = plan.args[i].dst_kind; + arg_moves[nargs].src_kind = plan.args[i].src_kind; + ++nargs; + } + + Reg callee_scratch = REG_NONE; + if (plan.callee.kind == OPK_REG) { + for (u32 i = 0; i < nargs; ++i) { + if (arg_moves[i].dst_kind != CG_CALL_PLAN_REG || + !operand_reg_eq(&arg_moves[i].dst, &plan.callee)) + continue; + callee_scratch = replay_scratch_reg(r, RC_INT, REG_NONE); + if (callee_scratch == (Reg)REG_NONE) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, + "opt replay: no scratch register for indirect call"); + } + Operand tmp = phys_reg_operand(callee_scratch, RC_INT, plan.callee.type); + r->tgt->copy(r->tgt, tmp, plan.callee); + plan.callee = tmp; + break; + } + } + + replay_parallel_moves(r, arg_moves, nargs); + r->tgt->emit_call_plan(r->tgt, &plan); + + if (plan.flags & CG_CALL_TAIL) return; + + ReplayParallelMove* ret_moves = + src_plan->nrets + ? arena_zarray(r->f->arena, ReplayParallelMove, src_plan->nrets) + : NULL; + u32 nrets = 0; + for (u32 i = 0; i < src_plan->nrets; ++i) { + plan.rets[i] = src_plan->rets[i]; + plan.rets[i].dst = xlat_op(r, src_plan->rets[i].dst); + Operand src = + phys_reg_operand(plan.rets[i].src_reg, (RegClass)plan.rets[i].cls, + plan.rets[i].mem.type); + ret_moves[nrets].dst = plan.rets[i].dst; + ret_moves[nrets].src = src; + ret_moves[nrets].mem = plan.rets[i].mem; + ret_moves[nrets].ret = &plan.rets[i]; + ret_moves[nrets].dst_offset = plan.rets[i].dst_offset; + ret_moves[nrets].dst_kind = CG_CALL_PLAN_REG; + ret_moves[nrets].src_kind = CG_CALL_PLAN_SRC_VALUE; + ret_moves[nrets].is_ret = 1; + ++nrets; + } + replay_parallel_moves(r, ret_moves, nrets); +} + +static Label ensure_label(ReplayCtx* r, u32 b) { + if (b >= r->f->nblocks) return LABEL_NONE; + if (r->label_map[b] == LABEL_NONE) { + r->label_map[b] = r->tgt->label_new(r->tgt); + } + return r->label_map[b]; +} + +static void ensure_label_placed(ReplayCtx* r, u32 b) { + if (r->block_label_placed[b]) return; + r->block_label_placed[b] = 1; + if (b == r->f->entry) return; + Label l = ensure_label(r, b); + r->tgt->label_place(r->tgt, l); +} + +static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { + CGTarget* w = r->tgt; + w->set_loc(w, in->loc); + + switch ((IROp)in->op) { + case IR_NOP: + case IR_CONST_I: + case IR_CONST_BYTES: + case IR_PARAM_DECL: + case IR_PHI: + case IR_CONDBR: + break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + Operand* in_ops_ = NULL; + Operand* out_ops_ = NULL; + if (aux->nin) { + in_ops_ = arena_array(r->f->arena, Operand, aux->nin); + for (u32 k = 0; k < aux->nin; ++k) { + in_ops_[k] = xlat_op(r, aux->in_ops[k]); + } + } + if (aux->nout) { + out_ops_ = arena_array(r->f->arena, Operand, aux->nout); + for (u32 k = 0; k < aux->nout; ++k) { + out_ops_[k] = xlat_op(r, aux->out_ops[k]); + } + } + w->asm_block(w, aux->tmpl, aux->outs, aux->nout, out_ops_, aux->ins, + aux->nin, in_ops_, aux->clobbers, aux->nclob); + break; + } + case IR_LOAD_IMM: { + Operand dst = xlat_op(r, in->opnds[0]); + w->load_imm(w, dst, in->extra.imm); + break; + } + case IR_LOAD_CONST: { + Operand dst = xlat_op(r, in->opnds[0]); + w->load_const(w, dst, in->extra.cbytes); + break; + } + case IR_COPY: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + w->copy(w, dst, src); + break; + } + case IR_LOAD: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand addr = xlat_op(r, in->opnds[1]); + w->load(w, dst, addr, in->extra.mem); + break; + } + case IR_STORE: { + Operand addr = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + w->store(w, addr, src, in->extra.mem); + break; + } + case IR_ADDR_OF: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand lv = xlat_op(r, in->opnds[1]); + w->addr_of(w, dst, lv); + break; + } + case IR_TLS_ADDR_OF: { + Operand dst = xlat_op(r, in->opnds[0]); + IRTlsAux* aux = (IRTlsAux*)in->extra.aux; + w->tls_addr_of(w, dst, aux->sym, aux->addend); + break; + } + case IR_AGG_COPY: { + Operand a = xlat_op(r, in->opnds[0]); + Operand bo = xlat_op(r, in->opnds[1]); + IRAggAux* aux = (IRAggAux*)in->extra.aux; + w->copy_bytes(w, a, bo, aux->access); + break; + } + case IR_AGG_SET: { + Operand a = xlat_op(r, in->opnds[0]); + Operand bo = xlat_op(r, in->opnds[1]); + IRAggAux* aux = (IRAggAux*)in->extra.aux; + w->set_bytes(w, a, bo, aux->access); + break; + } + case IR_BITFIELD_LOAD: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand rec_ = xlat_op(r, in->opnds[1]); + IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; + w->bitfield_load(w, dst, rec_, aux->access); + break; + } + case IR_BITFIELD_STORE: { + Operand rec_ = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; + w->bitfield_store(w, rec_, src, aux->access); + break; + } + case IR_BINOP: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand a = xlat_op(r, in->opnds[1]); + Operand bo = xlat_op(r, in->opnds[2]); + w->binop(w, (BinOp)in->extra.imm, dst, a, bo); + break; + } + case IR_UNOP: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand a = xlat_op(r, in->opnds[1]); + w->unop(w, (UnOp)in->extra.imm, dst, a); + break; + } + case IR_CMP: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand a = xlat_op(r, in->opnds[1]); + Operand bo = xlat_op(r, in->opnds[2]); + w->cmp(w, (CmpOp)in->extra.imm, dst, a, bo); + break; + } + case IR_CONVERT: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + w->convert(w, (ConvKind)in->extra.imm, dst, src); + break; + } + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (aux && aux->use_plan_replay && w->emit_call_plan && + replay_plan_supported(w, &aux->plan)) { + replay_planned_call(r, aux); + break; + } + compiler_panic(r->c, in->loc, + "opt replay: call has no supported call plan"); + break; + } + case IR_BR: { + Block* bl = &r->f->blocks[b]; + if (bl->nsucc < 1) break; + Label l = ensure_label(r, bl->succ[0]); + w->jump(w, l); + break; + } + case IR_CMP_BRANCH: { + Operand a = xlat_op(r, in->opnds[0]); + Operand bo = xlat_op(r, in->opnds[1]); + Block* bl = &r->f->blocks[b]; + Label taken = ensure_label(r, bl->succ[0]); + w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken); + break; + } + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (!aux || !aux->present) { + w->ret(w, NULL); + } else { + CGABIPart* parts = aux->val.nparts ? arena_array(r->f->arena, CGABIPart, + aux->val.nparts) + : NULL; + CGABIValue v = xlat_abivalue(r, &aux->val, parts); + w->ret(w, &v); + } + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + CGScopeDesc d = aux->desc; + d.cond = xlat_op(r, d.cond); + if (aux->desc.kind == SCOPE_LOOP || aux->desc.kind == SCOPE_BLOCK) { + d.break_label = aux->loop_break_block + ? ensure_label(r, aux->loop_break_block) + : LABEL_NONE; + d.continue_label = aux->loop_continue_block + ? ensure_label(r, aux->loop_continue_block) + : LABEL_NONE; + } + CGScope cs = w->scope_begin(w, &d); + r->scope_map[aux->scope_id] = cs; + break; + } + case IR_SCOPE_ELSE: + w->scope_else(w, r->scope_map[(u32)in->extra.imm]); + break; + case IR_SCOPE_END: + w->scope_end(w, r->scope_map[(u32)in->extra.imm]); + break; + case IR_BREAK_TO: + w->break_to(w, r->scope_map[(u32)in->extra.imm]); + break; + case IR_CONTINUE_TO: + w->continue_to(w, r->scope_map[(u32)in->extra.imm]); + break; + case IR_ALLOCA: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand size = xlat_op(r, in->opnds[1]); + w->alloca_(w, dst, size, (u32)in->extra.imm); + break; + } + case IR_VA_START: { + Operand ap = xlat_op(r, in->opnds[0]); + w->va_start_(w, ap); + break; + } + case IR_VA_ARG: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand ap = xlat_op(r, in->opnds[1]); + CfreeCgTypeId ty = (CfreeCgTypeId)(uintptr_t)in->extra.aux; + w->va_arg_(w, dst, ap, ty); + break; + } + case IR_VA_END: { + Operand ap = xlat_op(r, in->opnds[0]); + w->va_end_(w, ap); + break; + } + case IR_VA_COPY: { + Operand a = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + w->va_copy_(w, a, src); + break; + } + case IR_ATOMIC_LOAD: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand addr = xlat_op(r, in->opnds[1]); + IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; + w->atomic_load(w, dst, addr, aux->mem, aux->mo); + break; + } + case IR_ATOMIC_STORE: { + Operand addr = xlat_op(r, in->opnds[0]); + Operand src = xlat_op(r, in->opnds[1]); + IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; + w->atomic_store(w, addr, src, aux->mem, aux->mo); + break; + } + case IR_ATOMIC_RMW: { + Operand dst = xlat_op(r, in->opnds[0]); + Operand addr = xlat_op(r, in->opnds[1]); + Operand val = xlat_op(r, in->opnds[2]); + IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; + w->atomic_rmw(w, (AtomicOp)aux->op, dst, addr, val, aux->mem, aux->mo); + break; + } + case IR_ATOMIC_CAS: { + Operand prior = xlat_op(r, in->opnds[0]); + Operand ok = xlat_op(r, in->opnds[1]); + Operand addr = xlat_op(r, in->opnds[2]); + Operand expected = xlat_op(r, in->opnds[3]); + Operand desired = xlat_op(r, in->opnds[4]); + IRCasAux* aux = (IRCasAux*)in->extra.aux; + w->atomic_cas(w, prior, ok, addr, expected, desired, aux->mem, + aux->success, aux->failure); + break; + } + case IR_FENCE: + w->fence(w, (MemOrder)in->extra.imm); + break; + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + Operand* dsts = + aux->ndst ? arena_array(r->f->arena, Operand, aux->ndst) : NULL; + Operand* args = + aux->narg ? arena_array(r->f->arena, Operand, aux->narg) : NULL; + for (u32 k = 0; k < aux->ndst; ++k) dsts[k] = xlat_op(r, aux->dsts[k]); + for (u32 k = 0; k < aux->narg; ++k) args[k] = xlat_op(r, aux->args[k]); + w->intrinsic(w, aux->kind, dsts, aux->ndst, args, aux->narg); + break; + } + } +} + +static void replay_block(ReplayCtx* r, u32 b) { + Func* f = r->f; + if (b >= f->nblocks) return; + ensure_label_placed(r, b); + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + replay_inst(r, b, &bl->insts[i]); + } +} + +static void add_unique_reg(Reg* used, u32* nused, u32 cap, Reg r) { + for (u32 i = 0; i < *nused; ++i) { + if (used[i] == r) return; + } + if (*nused < cap) used[(*nused)++] = r; +} + +static void collect_replayed_operand_reg(const Operand* op, RegClass cls, + Reg* used, u32* nused, u32 cap) { + if (!op) return; + if (op->kind == OPK_REG) { + if (op->cls == cls) add_unique_reg(used, nused, cap, op->v.reg); + } else if (op->kind == OPK_INDIRECT) { + if (cls == RC_INT) add_unique_reg(used, nused, cap, op->v.ind.base); + } +} + +static void collect_replayed_abivalue_regs(const CGABIValue* v, RegClass cls, + Reg* used, u32* nused, u32 cap) { + if (!v) return; + collect_replayed_operand_reg(&v->storage, cls, used, nused, cap); + for (u32 i = 0; i < v->nparts; ++i) + collect_replayed_operand_reg(&v->parts[i].op, cls, used, nused, cap); +} + +static void collect_replayed_param_regs(Func* f, RegClass cls, Reg* used, + u32* nused, u32 cap) { + if (!f->opt_rewritten || !f->val_info) return; + for (u32 i = 0; i < f->nparams; ++i) { + IRParam* p = &f->params[i]; + if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue; + Val v = (Val)p->storage.v.reg; + if (v == VAL_NONE || v >= f->nvals) continue; + OptValInfo* vi = &f->val_info[v]; + if (vi->alloc_kind != OPT_ALLOC_HARD || vi->cls != cls) continue; + add_unique_reg(used, nused, cap, vi->hard_reg); + } +} + +static u32 collect_replayed_hard_regs(Func* f, CGTarget* w, RegClass cls, + Reg* used, u32 cap) { + u32 nused = 0; + collect_replayed_param_regs(f, cls, used, &nused, cap); + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_PARAM_DECL) continue; + for (u32 j = 0; j < in->nopnds; ++j) + collect_replayed_operand_reg(&in->opnds[j], cls, used, &nused, cap); + + switch ((IROp)in->op) { + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + if (aux->use_plan_replay) { + collect_replayed_operand_reg(&aux->plan.callee, cls, used, &nused, + cap); + for (u32 j = 0; j < aux->plan.nargs; ++j) { + collect_replayed_operand_reg(&aux->plan.args[j].src, cls, used, + &nused, cap); + if (aux->plan.args[j].dst_kind == CG_CALL_PLAN_REG && + aux->plan.args[j].cls == (u8)cls) + add_unique_reg(used, &nused, cap, aux->plan.args[j].dst_reg); + } + for (u32 j = 0; j < aux->plan.nrets; ++j) { + collect_replayed_operand_reg(&aux->plan.rets[j].dst, cls, used, + &nused, cap); + if (aux->plan.rets[j].cls == (u8)cls) + add_unique_reg(used, &nused, cap, aux->plan.rets[j].src_reg); + } + } else { + collect_replayed_operand_reg(&aux->desc.callee, cls, used, &nused, + cap); + for (u32 j = 0; j < aux->desc.nargs; ++j) + collect_replayed_abivalue_regs(&aux->desc.args[j], cls, used, + &nused, cap); + collect_replayed_abivalue_regs(&aux->desc.ret, cls, used, &nused, + cap); + } + break; + } + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) + collect_replayed_abivalue_regs(&aux->val, cls, used, &nused, cap); + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + if (aux) + collect_replayed_operand_reg(&aux->desc.cond, cls, used, &nused, + cap); + break; + } + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) break; + for (u32 j = 0; j < aux->nin; ++j) + collect_replayed_operand_reg(&aux->in_ops[j], cls, used, &nused, + cap); + for (u32 j = 0; j < aux->nout; ++j) + collect_replayed_operand_reg(&aux->out_ops[j], cls, used, &nused, + cap); + break; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) break; + for (u32 j = 0; j < aux->narg; ++j) + collect_replayed_operand_reg(&aux->args[j], cls, used, &nused, cap); + for (u32 j = 0; j < aux->ndst; ++j) + collect_replayed_operand_reg(&aux->dsts[j], cls, used, &nused, cap); + break; + } + default: + break; + } + } + } + if (w->resolve_reg_name) { + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op != IR_ASM_BLOCK) continue; + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) continue; + for (u32 j = 0; j < aux->nclob; ++j) { + Reg r; + RegClass rcls; + if (w->resolve_reg_name(w, aux->clobbers[j], &r, &rcls) != 0) + continue; + if (rcls == cls) add_unique_reg(used, &nused, cap, r); + } + } + } + } + return nused; +} + +static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) { + memset(out, 0, sizeof(*out)); + FrameSlotDesc* slots = NULL; + if (f->nframe_slots) { + slots = arena_zarray(f->arena, FrameSlotDesc, f->nframe_slots); + for (u32 i = 0; i < f->nframe_slots; ++i) { + IRFrameSlot* s = &f->frame_slots[i]; + slots[i].type = s->type; + slots[i].name = s->name; + slots[i].loc = s->loc; + slots[i].size = s->size; + slots[i].align = s->align; + slots[i].kind = s->kind; + slots[i].flags = s->flags; + } + } + out->slots = slots; + out->nslots = f->nframe_slots; + + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_ALLOCA) { + out->has_alloca = 1; + } else if ((IROp)in->op == IR_CALL) { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) { + out->has_call = 1; + continue; + } + if ((aux->desc.flags & CG_CALL_TAIL) == 0) out->has_call = 1; + if ((aux->desc.flags & CG_CALL_TAIL) != 0) continue; + if (!w->call_stack_size) continue; + u32 need = w->call_stack_size(w, &aux->desc); + if (need > out->max_outgoing) out->max_outgoing = need; + } + } + } + out->may_omit_frame = (!out->has_call && !out->has_alloca && + out->nslots == 0 && out->max_outgoing == 0) + ? 1u + : 0u; +} + +static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { + ReplayCtx r; + r.c = c; + r.f = f; + r.tgt = w; + r.identity_regs = identity ? 1u : 0u; + cg_simple_regalloc_init(&r.regalloc); + u32 nv = f->nvals ? f->nvals : 1u; + r.val_to_reg = arena_zarray(f->arena, Reg, nv); + for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE; + r.val_alloced = arena_zarray(f->arena, u8, nv); + r.slot_map = arena_zarray(f->arena, FrameSlot, f->nframe_slots + 1u); + for (u32 i = 0; i <= f->nframe_slots; ++i) r.slot_map[i] = FRAME_SLOT_NONE; + u32 nb = f->nblocks ? f->nblocks : 1u; + r.label_map = arena_zarray(f->arena, Label, nb); + for (u32 i = 0; i < f->nblocks; ++i) r.label_map[i] = LABEL_NONE; + r.scope_map = arena_zarray(f->arena, CGScope, f->nscopes + 1u); + for (u32 i = 0; i <= f->nscopes; ++i) r.scope_map[i] = CG_SCOPE_NONE; + r.block_label_placed = arena_zarray(f->arena, u8, nb); + + if (identity && w->plan_hard_regs) { + for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { + Reg used[OPT_MAX_HARD_REGS]; + u32 nused = collect_replayed_hard_regs(f, w, (RegClass)cidx, used, + OPT_MAX_HARD_REGS); + w->plan_hard_regs(w, (RegClass)cidx, used, nused); + } + } + + int known_frame = identity && w->func_begin_known_frame && w->call_stack_size; + if (known_frame) { + CGKnownFrameDesc frame; + FrameSlot* target_slots = + f->nframe_slots ? arena_zarray(f->arena, FrameSlot, f->nframe_slots) + : NULL; + collect_known_frame(f, w, &frame); + w->func_begin_known_frame(w, &f->desc, &frame, target_slots); + for (u32 i = 0; i < f->nframe_slots; ++i) + r.slot_map[f->frame_slots[i].id] = target_slots[i]; + } else { + /* func_begin with the recorded descriptor. Parameter storage is replayed + * through target->param below after frame slots are mapped. */ + w->func_begin(w, &f->desc); + } + + if (!r.identity_regs) { + for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { + const Reg* regs = NULL; + u32 nregs = 0; + if (w->get_allocable_regs) + w->get_allocable_regs(w, (RegClass)cidx, &regs, &nregs); + if (regs && nregs) + cg_simple_regalloc_set_ordered(&r.regalloc, (RegClass)cidx, regs, + nregs); + } + } + + if (!known_frame) { + for (u32 i = 0; i < f->nframe_slots; ++i) { + IRFrameSlot* s = &f->frame_slots[i]; + FrameSlotDesc d = {0}; + d.type = s->type; + d.name = s->name; + d.loc = s->loc; + d.size = s->size; + d.align = s->align; + d.kind = s->kind; + d.flags = s->flags; + r.slot_map[s->id] = w->frame_slot(w, &d); + } + } + + for (u32 i = 0; i < f->nparams; ++i) { + IRParam* p = &f->params[i]; + CGParamDesc d = {0}; + d.index = p->index; + d.name = p->name; + d.type = p->type; + d.size = p->size; + d.align = p->align; + d.flags = p->flags; + if (replay_reg_storage_unused(&r, p->storage)) { + d.storage = p->storage; + d.storage.v.reg = REG_NONE; + } else { + d.storage = xlat_storage(&r, p->storage, p->type); + } + d.abi = p->abi; + d.loc = p->loc; + (void)w->param(w, &d); + } + + /* Body in emit order — the order CG's emit cursor visited each + * block. Block-creation order can differ when label_new precedes a + * cmp_branch whose fallthrough block must physically follow. */ + for (u32 i = 0; i < f->emit_order_n; ++i) { + replay_block(&r, f->emit_order[i]); + } + + /* At -O1, opt managed allocation and emitted hard regs directly, + * bypassing backend-local allocation. Tell the backend which hard + * regs are still visible in replay so it can save the right callee-saved + * subset in prologue/epilogue. + * + * The backend records only callee-saved members of this set for + * prologue/epilogue preservation. */ + if (r.identity_regs && w->reserve_hard_regs) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + Reg used[OPT_MAX_HARD_REGS]; + u32 nused = collect_replayed_hard_regs(f, w, (RegClass)c, used, + OPT_MAX_HARD_REGS); + if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); + } + } else if (!r.identity_regs && w->reserve_hard_regs) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; + u32 nused = cg_simple_regalloc_used_regs(&r.regalloc, (RegClass)c, used, + CG_SIMPLE_REGALLOC_MAX_REGS); + if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); + } + } + + w->func_end(w); +} + +void opt_replay(Compiler* c, Func* f, CGTarget* target) { + replay_func_to(c, f, target, 0); +} + +void opt_emit(Compiler* c, Func* f, CGTarget* target) { + replay_func_to(c, f, target, 1); +} diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c @@ -0,0 +1,269 @@ +#include <string.h> + +#include "core/arena.h" +#include "opt/opt_internal.h" + +u32 opt_call_clobber_mask_for(Func* f, const Inst* in, u8 cls) { + if (cls >= OPT_REG_CLASSES) return 0; + if (in && (IROp)in->op == IR_CALL) { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (aux && aux->plan_valid) return aux->plan.clobber_mask[cls]; + } + return f->opt_caller_saved[cls]; +} +static void hard_add(OptHardRegSet* s, u8 cls, Reg r) { + if (cls >= OPT_REG_CLASSES || r >= 32) return; + s->cls[cls] |= 1u << r; +} + +int opt_hard_empty(const OptHardRegSet* s) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + if (s->cls[c]) return 0; + return 1; +} + +int opt_hard_intersects(const OptHardRegSet* a, const OptHardRegSet* b) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + if (a->cls[c] & b->cls[c]) return 1; + return 0; +} + +static int hard_eq(const OptHardRegSet* a, const OptHardRegSet* b) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + if (a->cls[c] != b->cls[c]) return 0; + return 1; +} + +static void hard_or(OptHardRegSet* dst, const OptHardRegSet* src) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) dst->cls[c] |= src->cls[c]; +} + +static void hard_live_in_from_out(OptHardRegSet* dst, const OptHardRegSet* use, + const OptHardRegSet* out, + const OptHardRegSet* def) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + dst->cls[c] = use->cls[c] | (out->cls[c] & ~def->cls[c]); +} + +void opt_hard_live_step(OptHardRegSet* live, const OptHardRegSet* use, + const OptHardRegSet* def) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + live->cls[c] = (live->cls[c] & ~def->cls[c]) | use->cls[c]; +} + +static void hard_use_operand(OptHardRegSet* s, const Operand* op) { + if (!op) return; + if (op->kind == OPK_REG) { + hard_add(s, op->cls, op->v.reg); + } else if (op->kind == OPK_INDIRECT) { + hard_add(s, RC_INT, op->v.ind.base); + } +} + +static void hard_def_operand(OptHardRegSet* s, const Operand* op) { + if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg); +} + +static void hard_use_abivalue(OptHardRegSet* use, const CGABIValue* v) { + if (!v) return; + hard_use_operand(use, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) hard_use_operand(use, &v->parts[i].op); +} + +static void hard_def_abivalue(OptHardRegSet* def, const CGABIValue* v) { + if (!v) return; + hard_def_operand(def, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) hard_def_operand(def, &v->parts[i].op); +} + +void opt_hard_inst_use_def(Func* f, const Inst* in, OptHardRegSet* use, + OptHardRegSet* def) { + memset(use, 0, sizeof *use); + memset(def, 0, sizeof *def); + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_LOAD_CONST: + case IR_TLS_ADDR_OF: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + break; + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + case IR_VA_ARG: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_LOAD: + case IR_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_ATOMIC_LOAD: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_BINOP: + case IR_CMP: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + break; + case IR_STORE: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_STORE: + case IR_VA_COPY: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + if (aux->use_plan_replay) { + hard_use_operand(use, &aux->plan.callee); + for (u32 i = 0; i < aux->plan.nargs; ++i) { + hard_use_operand(use, &aux->plan.args[i].src); + if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG) + hard_add(def, aux->plan.args[i].cls, aux->plan.args[i].dst_reg); + } + for (u32 i = 0; i < aux->plan.nrets; ++i) { + hard_add(def, aux->plan.rets[i].cls, aux->plan.rets[i].src_reg); + hard_def_operand(def, &aux->plan.rets[i].dst); + } + } else { + hard_use_operand(use, &aux->desc.callee); + for (u32 i = 0; i < aux->desc.nargs; ++i) + hard_use_abivalue(use, &aux->desc.args[i]); + hard_def_abivalue(def, &aux->desc.ret); + } + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + def->cls[c] |= opt_call_clobber_mask_for(f, in, (u8)c); + break; + } + case IR_CMP_BRANCH: + case IR_CONDBR: + for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]); + break; + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) hard_use_abivalue(use, &aux->val); + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + if (aux) hard_use_operand(use, &aux->desc.cond); + break; + } + case IR_ALLOCA: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_VA_START: + case IR_VA_END: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + break; + case IR_ATOMIC_STORE: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_ATOMIC_RMW: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + break; + case IR_ATOMIC_CAS: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_def_operand(def, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + if (in->nopnds >= 4) hard_use_operand(use, &in->opnds[3]); + if (in->nopnds >= 5) hard_use_operand(use, &in->opnds[4]); + break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]); + for (u32 i = 0; i < aux->nout; ++i) + hard_def_operand(def, &aux->out_ops[i]); + break; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->narg; ++i) hard_use_operand(use, &aux->args[i]); + for (u32 i = 0; i < aux->ndst; ++i) hard_def_operand(def, &aux->dsts[i]); + break; + } + default: + break; + } +} + +static void hard_live_blocks(Func* f, OptHardBlockLive* live) { + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + OptHardRegSet seen_def; + memset(&seen_def, 0, sizeof seen_def); + memset(&live[b], 0, sizeof live[b]); + for (u32 i = 0; i < bl->ninsts; ++i) { + OptHardRegSet use, def; + opt_hard_inst_use_def(f, &bl->insts[i], &use, &def); + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + live[b].live_use.cls[c] |= use.cls[c] & ~seen_def.cls[c]; + hard_or(&seen_def, &def); + hard_or(&live[b].live_def, &def); + } + } + + int changed; + do { + changed = 0; + for (u32 bi = f->nblocks; bi > 0; --bi) { + u32 b = bi - 1u; + Block* bl = &f->blocks[b]; + OptHardRegSet new_out, new_in; + memset(&new_out, 0, sizeof new_out); + for (u32 s = 0; s < bl->nsucc; ++s) { + u32 t = bl->succ[s]; + if (t < f->nblocks) hard_or(&new_out, &live[t].live_in); + } + hard_live_in_from_out(&new_in, &live[b].live_use, &new_out, + &live[b].live_def); + if (!hard_eq(&live[b].live_out, &new_out)) { + live[b].live_out = new_out; + changed = 1; + } + if (!hard_eq(&live[b].live_in, &new_in)) { + live[b].live_in = new_in; + changed = 1; + } + } + } while (changed); +} + +static int hard_live_out_has_phys_reg(const OptHardBlockLive* live, + const Operand* r) { + if (!live || !r || r->kind != OPK_REG || r->cls >= OPT_REG_CLASSES || + r->v.reg >= 32) + return 0; + return (live->live_out.cls[r->cls] & (1u << r->v.reg)) != 0; +} + +OptHardBlockLive* opt_maybe_build_hard_live(Func* f) { + if (!f->opt_rewritten) return NULL; + OptHardBlockLive* live = + arena_zarray(f->arena, OptHardBlockLive, f->nblocks ? f->nblocks : 1u); + hard_live_blocks(f, live); + return live; +} + +OptHardRegSet opt_hard_live_out_for_block(const OptHardBlockLive* live) { + OptHardRegSet out; + memset(&out, 0, sizeof out); + if (live) out = live->live_out; + return out; +} + +int opt_block_live_out_has_phys_reg(Func* f, const OptHardBlockLive* hard_live, + u32 block, const Operand* r) { + (void)f; + if (!hard_live || block >= f->nblocks) return 0; + return hard_live_out_has_phys_reg(&hard_live[block], r); +} diff --git a/src/opt/pass_loop.c b/src/opt/pass_loop.c @@ -0,0 +1,150 @@ +#include <string.h> + +#include "core/arena.h" +#include "opt/opt.h" + +#define OPT_BLK_NONE 0xffffffffu + +typedef struct LoopPostorderCtx { + Func* f; + u32* po; + u32* po_idx; + u8* visited; + u32 count; +} LoopPostorderCtx; + +static void loop_postorder_dfs(LoopPostorderCtx* ctx, u32 b) { + if (b >= ctx->f->nblocks || ctx->visited[b]) return; + ctx->visited[b] = 1; + Block* bl = &ctx->f->blocks[b]; + for (u32 s = 0; s < bl->nsucc; ++s) { + u32 t = bl->succ[s]; + if (t < ctx->f->nblocks) loop_postorder_dfs(ctx, t); + } + ctx->po[ctx->count] = b; + ctx->po_idx[b] = ctx->count; + ctx->count++; +} + +static u32 loop_dom_intersect(u32 b1, u32 b2, const u32* idom, + const u32* po_idx) { + while (b1 != b2) { + while (po_idx[b1] < po_idx[b2]) b1 = idom[b1]; + while (po_idx[b2] < po_idx[b1]) b2 = idom[b2]; + } + return b1; +} + +static u32* loop_compute_idom(Func* f, const u8* visited, const u32* po, + const u32* po_idx, u32 npo, u32 entry) { + u32* idom = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); + for (u32 b = 0; b < f->nblocks; ++b) idom[b] = OPT_BLK_NONE; + idom[entry] = entry; + + int changed = 1; + while (changed) { + changed = 0; + for (u32 ri = npo; ri > 0; --ri) { + u32 b = po[ri - 1u]; + if (b == entry) continue; + Block* bl = &f->blocks[b]; + u32 new_idom = OPT_BLK_NONE; + for (u32 p = 0; p < bl->npreds; ++p) { + u32 pp = bl->preds[p]; + if (pp >= f->nblocks || !visited[pp]) continue; + if (idom[pp] == OPT_BLK_NONE) continue; + new_idom = (new_idom == OPT_BLK_NONE) + ? pp + : loop_dom_intersect(pp, new_idom, idom, po_idx); + } + if (new_idom != OPT_BLK_NONE && idom[b] != new_idom) { + idom[b] = new_idom; + changed = 1; + } + } + } + return idom; +} + +static int loop_dominates(const u32* idom, u32 entry, u32 dom, u32 node) { + u32 cur = node; + while (cur != OPT_BLK_NONE) { + if (cur == dom) return 1; + if (cur == entry) break; + cur = idom[cur]; + } + return 0; +} + +static void loop_mark_body(Func* f, const u8* visited, u32 header, u32 latch, + u8* body, u32* stack) { + u32 sp = 0; + if (!body[header]) body[header] = 1; + if (!body[latch]) { + body[latch] = 1; + stack[sp++] = latch; + } + + while (sp) { + u32 b = stack[--sp]; + if (b == header) continue; + Block* bl = &f->blocks[b]; + for (u32 p = 0; p < bl->npreds; ++p) { + u32 pred = bl->preds[p]; + if (pred >= f->nblocks || !visited[pred] || body[pred]) continue; + body[pred] = 1; + stack[sp++] = pred; + } + } +} + +static u32 loop_frequency(u8 depth) { + u8 capped = depth > 10 ? 10 : depth; + return 1u << capped; +} + +void opt_build_loop_tree(Func* f) { + for (u32 b = 0; b < f->nblocks; ++b) { + f->blocks[b].loop_depth = 0; + f->blocks[b].frequency = 1; + } + if (f->nblocks == 0 || f->entry >= f->nblocks) return; + + LoopPostorderCtx pctx; + memset(&pctx, 0, sizeof pctx); + pctx.f = f; + pctx.po = arena_array(f->arena, u32, f->nblocks); + pctx.po_idx = arena_array(f->arena, u32, f->nblocks); + pctx.visited = arena_zarray(f->arena, u8, f->nblocks); + loop_postorder_dfs(&pctx, f->entry); + if (pctx.count == 0) return; + + u32* idom = loop_compute_idom(f, pctx.visited, pctx.po, pctx.po_idx, + pctx.count, f->entry); + u8* body = arena_zarray(f->arena, u8, f->nblocks); + u32* stack = arena_array(f->arena, u32, f->nblocks); + + for (u32 header = 0; header < f->nblocks; ++header) { + if (!pctx.visited[header]) continue; + memset(body, 0, f->nblocks * sizeof body[0]); + int has_loop = 0; + + for (u32 latch = 0; latch < f->nblocks; ++latch) { + if (!pctx.visited[latch]) continue; + Block* lb = &f->blocks[latch]; + for (u32 s = 0; s < lb->nsucc; ++s) { + if (lb->succ[s] != header) continue; + if (!loop_dominates(idom, f->entry, header, latch)) continue; + has_loop = 1; + loop_mark_body(f, pctx.visited, header, latch, body, stack); + } + } + + if (!has_loop) continue; + for (u32 b = 0; b < f->nblocks; ++b) + if (body[b] && f->blocks[b].loop_depth < 31) ++f->blocks[b].loop_depth; + } + + for (u32 b = 0; b < f->nblocks; ++b) + f->blocks[b].frequency = loop_frequency(f->blocks[b].loop_depth); +} diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -6,7 +6,7 @@ #include "core/core.h" #include "core/metrics.h" #include "core/pool.h" -#include "opt/opt.h" +#include "opt/opt_internal.h" enum { OPT_CG_TYPE_SEG_SHIFT = 6, @@ -216,15 +216,12 @@ static int refs_has_def(const InstRefs* refs, Val v) { } static void live_update_refs_before(u64* live, const InstRefs* refs) { - for (u32 i = 0; i < refs->ndefs; ++i) - bit_clear(live, refs->defs[i]); - for (u32 i = 0; i < refs->nuses; ++i) - bit_set(live, refs->uses[i]); + for (u32 i = 0; i < refs->ndefs; ++i) bit_clear(live, refs->defs[i]); + for (u32 i = 0; i < refs->nuses; ++i) bit_set(live, refs->uses[i]); } static u32 live_update_refs_before_active(u64* live, u32 active_words, - u32 nwords, - const InstRefs* refs) { + u32 nwords, const InstRefs* refs) { for (u32 i = 0; i < refs->ndefs; ++i) { Val v = refs->defs[i]; if (v == VAL_NONE) continue; @@ -366,8 +363,7 @@ static void apply_param_incoming_register_hazards(Func* f) { if (has_incoming[i] && incoming_cls[i] == cls && f->val_info[v].tied_hard_reg < 0 && f->val_info[v].live_across_call_freq == 0 && - hard_available(f, cls, incoming_regs[i]) && - incoming_regs[i] < 32 && + hard_available(f, cls, incoming_regs[i]) && incoming_regs[i] < 32 && (f->val_info[v].forbidden_hard_regs & (1u << incoming_regs[i])) == 0) { f->val_info[v].tied_hard_reg = (i32)incoming_regs[i]; } @@ -378,173 +374,6 @@ static void apply_param_incoming_register_hazards(Func* f) { } } -static int mem_observable(const MemAccess* m) { - return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0; -} - -static const char* asm_constraint_body(const char* s) { - if (!s) return ""; - if (s[0] == '=' && s[1] == '&') return s + 2; - if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; - return s; -} - -static int asm_resolve_fixed_constraint(Func* f, CGTarget* target, - const char* constraint, Reg* reg_out, - RegClass* cls_out) { - const char* body = asm_constraint_body(constraint); - if (!target->resolve_reg_name) return 0; - if (body[0] != '{') return 0; - const char* end = body + 1; - while (*end && *end != '}') ++end; - if (*end != '}' || end == body + 1) return 0; - Sym name = pool_intern(f->c->global, body + 1, (size_t)(end - body - 1)); - return target->resolve_reg_name(target, name, reg_out, cls_out) == 0; -} - -static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) { - if (!aux) return; - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) aux->clobber_mask[c] = 0; - - if (aux->nout && !aux->out_fixed_regs) { - aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout); - aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout); - for (u32 i = 0; i < aux->nout; ++i) aux->out_fixed_regs[i] = -1; - } - if (aux->nin && !aux->in_fixed_regs) { - aux->in_fixed_regs = arena_array(f->arena, i32, aux->nin); - aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin); - for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1; - } - - if (target->resolve_reg_name) { - for (u32 i = 0; i < aux->nclob; ++i) { - Reg r; - RegClass cls; - if (target->resolve_reg_name(target, aux->clobbers[i], &r, &cls) != 0) - continue; - if ((u32)cls < OPT_REG_CLASSES && r < 32) - aux->clobber_mask[cls] |= 1u << r; - } - } - - for (u32 i = 0; i < aux->nout; ++i) { - Reg r; - RegClass cls; - if (asm_resolve_fixed_constraint(f, target, aux->outs[i].str, &r, &cls)) { - aux->out_fixed_regs[i] = (i32)r; - aux->out_fixed_cls[i] = (u8)cls; - } - } - for (u32 i = 0; i < aux->nin; ++i) { - Reg r; - RegClass cls; - if (asm_resolve_fixed_constraint(f, target, aux->ins[i].str, &r, &cls)) { - aux->in_fixed_regs[i] = (i32)r; - aux->in_fixed_cls[i] = (u8)cls; - } - } -} - -static int call_plan_replay_supported(const IRCallAux* aux, - const CGTarget* target); - -void opt_machinize(Func* f, CGTarget* target) { - f->opt_target = target->c->target; - f->opt_has_target = 1; - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - f->opt_hard_reg_count[c] = 0; - f->opt_phys_reg_count[c] = 0; - f->opt_scratch_reg_count[c] = 0; - f->opt_caller_saved[c] = 0; - f->opt_callee_saved[c] = 0; - f->opt_reserved_regs[c] = 0; - f->opt_arg_regs[c] = 0; - f->opt_ret_regs[c] = 0; - } - - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_ASM_BLOCK) { - asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux); - } else if ((IROp)in->op == IR_CALL && target->plan_call) { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (aux) { - target->plan_call(target, &aux->desc, &aux->plan); - aux->plan_valid = 1; - aux->use_plan_replay = call_plan_replay_supported(aux, target); - } - } - } - } - - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - const CGPhysRegInfo* phys = NULL; - u32 nphys = 0; - if (target->get_phys_regs) - target->get_phys_regs(target, (RegClass)c, &phys, &nphys); - if (phys) { - for (u32 i = 0; i < nphys && i < OPT_MAX_HARD_REGS; ++i) { - CGPhysRegInfo pi = phys[i]; - Reg hr = pi.reg; - if (hr < 32u) { - if (pi.flags & CG_REG_CALLER_SAVED) f->opt_caller_saved[c] |= 1u << hr; - if (pi.flags & CG_REG_CALLEE_SAVED) f->opt_callee_saved[c] |= 1u << hr; - if (pi.flags & CG_REG_RESERVED) f->opt_reserved_regs[c] |= 1u << hr; - if (pi.flags & CG_REG_ARG) f->opt_arg_regs[c] |= 1u << hr; - if (pi.flags & CG_REG_RET) f->opt_ret_regs[c] |= 1u << hr; - } - f->opt_phys_regs[c][f->opt_phys_reg_count[c]++] = pi; - if ((pi.flags & CG_REG_ALLOCABLE) && - !(pi.flags & CG_REG_RESERVED)) { - f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hr; - } - } - } else { - const Reg* hard = NULL; - u32 nhard = 0; - if (target->get_allocable_regs) - target->get_allocable_regs(target, (RegClass)c, &hard, &nhard); - for (u32 i = 0; i < nhard && i < OPT_MAX_HARD_REGS; ++i) - f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hard[i]; - } - - const Reg* scratch = NULL; - u32 nscratch = 0; - if (target->get_scratch_regs) - target->get_scratch_regs(target, (RegClass)c, &scratch, &nscratch); - for (u32 i = 0; i < nscratch && i < OPT_MAX_SCRATCH_REGS; ++i) - f->opt_scratch_regs[c][f->opt_scratch_reg_count[c]++] = scratch[i]; - - if (!phys && target->is_caller_saved) { - for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) { - Reg hr = f->opt_hard_regs[c][i]; - if (target->is_caller_saved(target, (RegClass)c, hr)) - f->opt_caller_saved[c] |= (1u << hr); - } - } - if (target->callee_save_mask) - f->opt_callee_saved[c] |= target->callee_save_mask(target, (RegClass)c); - } - - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) { - Reg hr = f->opt_hard_regs[c][i]; - for (u32 s = 0; s < f->opt_scratch_reg_count[c]; ++s) { - if (f->opt_scratch_regs[c][s] == hr) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(f->c, loc, - "opt_machinize: hard reg %u overlaps scratch reg " - "in class %u", - (unsigned)hr, (unsigned)c); - } - } - } - } -} - static int is_caller_saved(Func* f, u8 cls, Reg r) { if (cls >= OPT_REG_CLASSES || r >= 32) return 0; return (f->opt_caller_saved[cls] & (1u << r)) != 0; @@ -552,190 +381,6 @@ static int is_caller_saved(Func* f, u8 cls, Reg r) { typedef struct OptAllocator OptAllocator; -static u32 call_clobber_mask_for(Func* f, const Inst* in, u8 cls) { - if (cls >= OPT_REG_CLASSES) return 0; - if (in && (IROp)in->op == IR_CALL) { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (aux && aux->plan_valid) - return aux->plan.clobber_mask[cls]; - } - return f->opt_caller_saved[cls]; -} - -static int call_plan_replay_supported(const IRCallAux* aux, - const CGTarget* target) { - if (!aux || !aux->plan_valid || !target || !target->emit_call_plan) return 0; - for (u32 i = 0; i < aux->plan.nargs; ++i) { - if ((aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK || - aux->plan.args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && - !target->store_call_arg) - return 0; - if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG && - (aux->plan.args[i].src_kind == CG_CALL_PLAN_SRC_ADDR || - aux->plan.args[i].src_offset) && - !target->load_call_arg) - return 0; - } - for (u32 i = 0; i < aux->plan.nrets; ++i) - if (aux->plan.rets[i].dst.kind != OPK_REG && - aux->plan.rets[i].dst.kind != OPK_LOCAL && - aux->plan.rets[i].dst.kind != OPK_INDIRECT) - return 0; - for (u32 i = 0; i < aux->plan.nrets; ++i) - if (aux->plan.rets[i].dst_offset && - (aux->plan.rets[i].dst.kind == OPK_LOCAL || - aux->plan.rets[i].dst.kind == OPK_INDIRECT) && - !target->store_call_ret) - return 0; - return 1; -} - -#define OPT_BLK_NONE 0xffffffffu - -typedef struct LoopPostorderCtx { - Func* f; - u32* po; - u32* po_idx; - u8* visited; - u32 count; -} LoopPostorderCtx; - -static void loop_postorder_dfs(LoopPostorderCtx* ctx, u32 b) { - if (b >= ctx->f->nblocks || ctx->visited[b]) return; - ctx->visited[b] = 1; - Block* bl = &ctx->f->blocks[b]; - for (u32 s = 0; s < bl->nsucc; ++s) { - u32 t = bl->succ[s]; - if (t < ctx->f->nblocks) loop_postorder_dfs(ctx, t); - } - ctx->po[ctx->count] = b; - ctx->po_idx[b] = ctx->count; - ctx->count++; -} - -static u32 loop_dom_intersect(u32 b1, u32 b2, const u32* idom, - const u32* po_idx) { - while (b1 != b2) { - while (po_idx[b1] < po_idx[b2]) b1 = idom[b1]; - while (po_idx[b2] < po_idx[b1]) b2 = idom[b2]; - } - return b1; -} - -static u32* loop_compute_idom(Func* f, const u8* visited, const u32* po, - const u32* po_idx, u32 npo, u32 entry) { - u32* idom = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); - for (u32 b = 0; b < f->nblocks; ++b) idom[b] = OPT_BLK_NONE; - idom[entry] = entry; - - int changed = 1; - while (changed) { - changed = 0; - for (u32 ri = npo; ri > 0; --ri) { - u32 b = po[ri - 1u]; - if (b == entry) continue; - Block* bl = &f->blocks[b]; - u32 new_idom = OPT_BLK_NONE; - for (u32 p = 0; p < bl->npreds; ++p) { - u32 pp = bl->preds[p]; - if (pp >= f->nblocks || !visited[pp]) continue; - if (idom[pp] == OPT_BLK_NONE) continue; - new_idom = (new_idom == OPT_BLK_NONE) - ? pp - : loop_dom_intersect(pp, new_idom, idom, po_idx); - } - if (new_idom != OPT_BLK_NONE && idom[b] != new_idom) { - idom[b] = new_idom; - changed = 1; - } - } - } - return idom; -} - -static int loop_dominates(const u32* idom, u32 entry, u32 dom, u32 node) { - u32 cur = node; - while (cur != OPT_BLK_NONE) { - if (cur == dom) return 1; - if (cur == entry) break; - cur = idom[cur]; - } - return 0; -} - -static void loop_mark_body(Func* f, const u8* visited, u32 header, u32 latch, - u8* body, u32* stack) { - u32 sp = 0; - if (!body[header]) body[header] = 1; - if (!body[latch]) { - body[latch] = 1; - stack[sp++] = latch; - } - - while (sp) { - u32 b = stack[--sp]; - if (b == header) continue; - Block* bl = &f->blocks[b]; - for (u32 p = 0; p < bl->npreds; ++p) { - u32 pred = bl->preds[p]; - if (pred >= f->nblocks || !visited[pred] || body[pred]) continue; - body[pred] = 1; - stack[sp++] = pred; - } - } -} - -static u32 loop_frequency(u8 depth) { - u8 capped = depth > 10 ? 10 : depth; - return 1u << capped; -} - -void opt_build_loop_tree(Func* f) { - for (u32 b = 0; b < f->nblocks; ++b) { - f->blocks[b].loop_depth = 0; - f->blocks[b].frequency = 1; - } - if (f->nblocks == 0 || f->entry >= f->nblocks) return; - - LoopPostorderCtx pctx; - memset(&pctx, 0, sizeof pctx); - pctx.f = f; - pctx.po = arena_array(f->arena, u32, f->nblocks); - pctx.po_idx = arena_array(f->arena, u32, f->nblocks); - pctx.visited = arena_zarray(f->arena, u8, f->nblocks); - loop_postorder_dfs(&pctx, f->entry); - if (pctx.count == 0) return; - - u32* idom = loop_compute_idom(f, pctx.visited, pctx.po, pctx.po_idx, - pctx.count, f->entry); - u8* body = arena_zarray(f->arena, u8, f->nblocks); - u32* stack = arena_array(f->arena, u32, f->nblocks); - - for (u32 header = 0; header < f->nblocks; ++header) { - if (!pctx.visited[header]) continue; - memset(body, 0, f->nblocks * sizeof body[0]); - int has_loop = 0; - - for (u32 latch = 0; latch < f->nblocks; ++latch) { - if (!pctx.visited[latch]) continue; - Block* lb = &f->blocks[latch]; - for (u32 s = 0; s < lb->nsucc; ++s) { - if (lb->succ[s] != header) continue; - if (!loop_dominates(idom, f->entry, header, latch)) continue; - has_loop = 1; - loop_mark_body(f, pctx.visited, header, latch, body, stack); - } - } - - if (!has_loop) continue; - for (u32 b = 0; b < f->nblocks; ++b) - if (body[b] && f->blocks[b].loop_depth < 31) ++f->blocks[b].loop_depth; - } - - for (u32 b = 0; b < f->nblocks; ++b) - f->blocks[b].frequency = loop_frequency(f->blocks[b].loop_depth); -} - static int hard_available(Func* f, u8 cls, Reg r) { if (cls >= OPT_REG_CLASSES) return 0; for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) @@ -816,8 +461,7 @@ static u32 hard_reg_alloc_score(Func* f, const OptAllocator* a, score += 20u; } else if (!is_caller_saved(f, vi->cls, hr)) { u32 bit = hard_loc_bit(vi->cls, hr); - int already_open = bit < a->hard_loc_bits && - a->hard_used_locs[bit].n != 0; + int already_open = bit < a->hard_loc_bits && a->hard_used_locs[bit].n != 0; if (!already_open) score += pi ? pi->save_cost : 50u; } return score; @@ -909,8 +553,7 @@ static void alloc_interval_insert(Func* f, AllocIntervalVec* v, u32 start, static int alloc_ranges_overlap_vec(OptAllocator* a, const OptLiveRangeSet* ranges, Val v, - const AllocIntervalVec* vec, - u64* visits) { + const AllocIntervalVec* vec, u64* visits) { for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; r = ranges->ranges[r].next) { const OptLiveRange* lr = &ranges->ranges[r]; @@ -938,7 +581,8 @@ static void alloc_mark_vec(Func* f, OptAllocator* a, static void opt_init_val_info_from_ranges(Func* f, const OptLiveRangeSet* ranges) { OptValInfo* old = f->val_info; - OptValInfo* info = arena_zarray(f->arena, OptValInfo, f->nvals ? f->nvals : 1u); + OptValInfo* info = + arena_zarray(f->arena, OptValInfo, f->nvals ? f->nvals : 1u); for (Val v = 0; v < f->nvals; ++v) { i32 tied = old ? old[v].tied_hard_reg : -1; u32 forbidden = old ? old[v].forbidden_hard_regs : 0; @@ -991,8 +635,7 @@ static void live_copy_block_out(Func* f, const OptLiveInfo* live_info, u32 b, bits_clear(live, words); if (live_info) { const OptBitset* out = &live_info->blocks[b].live_out; - for (u32 w = 0; w < words && w < out->nwords; ++w) - live[w] = out->words[w]; + for (u32 w = 0; w < words && w < out->nwords; ++w) live[w] = out->words[w]; } } @@ -1056,17 +699,15 @@ static int spill_slot_compatible(Func* f, FrameSlot fs, Val v) { return 1; } -static int alloc_hard_conflicts(OptAllocator* a, - const OptLiveRangeSet* ranges, Val v, - u32 bit) { +static int alloc_hard_conflicts(OptAllocator* a, const OptLiveRangeSet* ranges, + Val v, u32 bit) { if (bit >= a->hard_loc_bits) return 1; return alloc_ranges_overlap_vec(a, ranges, v, &a->hard_used_locs[bit], &a->hard_point_visits); } -static int alloc_stack_conflicts(OptAllocator* a, - const OptLiveRangeSet* ranges, Val v, - u32 stack_idx) { +static int alloc_stack_conflicts(OptAllocator* a, const OptLiveRangeSet* ranges, + Val v, u32 stack_idx) { if (stack_idx >= a->stack_slot_count) return 1; return alloc_ranges_overlap_vec(a, ranges, v, &a->stack_used_locs[stack_idx], &a->stack_point_visits); @@ -1086,10 +727,9 @@ static void alloc_grow_stack_locs(Func* f, OptAllocator* a, u32 need_slots) { FrameSlot* ns = arena_array(f->arena, FrameSlot, ncap); AllocIntervalVec* ni = arena_zarray(f->arena, AllocIntervalVec, ncap); if (a->stack_slots) { - memcpy(ns, a->stack_slots, sizeof(a->stack_slots[0]) * - a->stack_slot_count); - memcpy(ni, a->stack_used_locs, sizeof(a->stack_used_locs[0]) * - a->stack_slot_count); + memcpy(ns, a->stack_slots, sizeof(a->stack_slots[0]) * a->stack_slot_count); + memcpy(ni, a->stack_used_locs, + sizeof(a->stack_used_locs[0]) * a->stack_slot_count); } a->stack_slots = ns; a->stack_used_locs = ni; @@ -1154,9 +794,8 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, if (ranges->first_range_by_val[v] != OPT_RANGE_NONE) ++ncands; a->hard_loc_words = loc_bit_words(a->hard_loc_bits); a->locs = arena_zarray(f->arena, OptLoc, f->nvals ? f->nvals : 1u); - a->hard_used_locs = - arena_zarray(f->arena, AllocIntervalVec, - a->hard_loc_bits ? a->hard_loc_bits : 1u); + a->hard_used_locs = arena_zarray(f->arena, AllocIntervalVec, + a->hard_loc_bits ? a->hard_loc_bits : 1u); a->stack_slots = NULL; a->stack_slot_cap = 0; a->stack_used_locs = NULL; @@ -1460,15 +1099,14 @@ static void rewrite_call_save_one(Val v, void* arg) { u8 cls = f->val_info[v].cls; Reg hr = f->val_info[v].hard_reg; if (cls >= OPT_REG_CLASSES || hr >= 32u) return; - if ((call_clobber_mask_for(f, c->call, cls) & (1u << hr)) == 0) return; + if ((opt_call_clobber_mask_for(f, c->call, cls) & (1u << hr)) == 0) return; if (c->emit_restore) append_load_val(f, c->out, v); else append_store_val(f, c->out, v); } -static void append_live_call_saves(Func* f, RewriteList* out, - const Inst* call, +static void append_live_call_saves(Func* f, RewriteList* out, const Inst* call, const u64* live_after, u32 live_active_words, const InstRefs* refs, const Val* call_save_vals, @@ -1526,9 +1164,8 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) { memset(&after, 0, sizeof after); memset(&call_saves, 0, sizeof call_saves); memset(&call_restores, 0, sizeof call_restores); - live_active_words = - live_copy_block_out_active(live_info, b, live, words, - live_active_words); + live_active_words = live_copy_block_out_active(live_info, b, live, words, + live_active_words); f->opt_rewrite_live_words_touched += live_active_words; for (u32 ri = bl->ninsts; ri > 0; --ri) { @@ -1565,16 +1202,15 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) { for (u32 k = 0; k < aux->desc.nargs; ++k) rewrite_call_arg_value(f, &in, (CGABIValue*)&aux->desc.args[k], &ctx); - walk_abivalue(f, &in, &aux->desc.ret, 1, rewrite_one_operand, - &ctx); + walk_abivalue(f, &in, &aux->desc.ret, 1, rewrite_one_operand, &ctx); } } } else { walk_inst_operands(f, &in, rewrite_one_operand, &ctx); } if ((IROp)in.op == IR_CALL) { - append_live_call_saves(f, &call_saves, &in, live, live_active_words, &refs, - call_save_vals, ncall_save_vals, 0); + append_live_call_saves(f, &call_saves, &in, live, live_active_words, + &refs, call_save_vals, ncall_save_vals, 0); append_live_call_saves(f, &call_restores, &in, live, live_active_words, &refs, call_save_vals, ncall_save_vals, 1); } @@ -1620,8 +1256,6 @@ void opt_rewrite_dump(Func* f, Writer* w) { } } -static int inst_has_side_effect(Func* f, const Inst* in); - static int all_defs_dead(Func* f, Inst* in, u64* live) { (void)f; if (in->def != VAL_NONE && bit_has(live, in->def)) return 0; @@ -1638,7 +1272,8 @@ void opt_dead_def_elim_with_live(Func* f, const OptLiveInfo* live_info) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; u64* live = arena_zarray(f->arena, u64, words); - const u64* live_out = live_info ? live_info->blocks[b].live_out.words : NULL; + const u64* live_out = + live_info ? live_info->blocks[b].live_out.words : NULL; f->opt_dde_live_words_touched += words; if (live_out) { for (u32 w = 0; w < words; ++w) live[w] = live_out[w]; @@ -1649,7 +1284,7 @@ void opt_dead_def_elim_with_live(Func* f, const OptLiveInfo* live_info) { for (u32 ri = bl->ninsts; ri > 0; --ri) { u32 i = ri - 1u; Inst* in = &bl->insts[i]; - if (!inst_has_side_effect(f, in) && all_defs_dead(f, in, live)) { + if (!opt_inst_has_side_effect(f, in) && all_defs_dead(f, in, live)) { continue; } new_insts[w++] = *in; @@ -1694,12 +1329,10 @@ void opt_regalloc(Func* f, int allow_live_range_split) { metrics_count(f->c, "opt.range_raw_points", ranges.raw_point_count); metrics_count(f->c, "opt.range_max_per_val", ranges.max_ranges_per_val); metrics_count(f->c, "opt.range_max_length", ranges.max_live_length); - metrics_count(f->c, "opt.range_whole_block_spans", - ranges.whole_block_spans); + metrics_count(f->c, "opt.range_whole_block_spans", ranges.whole_block_spans); metrics_count(f->c, "opt.live.bitset_words_touched", live.bitset_words_touched); - metrics_count(f->c, "opt.live.dataflow_iterations", - live.dataflow_iterations); + metrics_count(f->c, "opt.live.dataflow_iterations", live.dataflow_iterations); metrics_count(f->c, "opt.live.dataflow_block_visits", live.dataflow_block_visits); metrics_count(f->c, "opt.range.point_visits", ranges.range_point_visits); @@ -1713,917 +1346,3 @@ void opt_regalloc(Func* f, int allow_live_range_split) { opt_assign_ranges(f, &ranges, &alloc); rewrite_func(f, &live); } - -static int same_reg_operand(const Operand* a, const Operand* b) { - return a->kind == OPK_REG && b->kind == OPK_REG && a->cls == b->cls && - a->v.reg == b->v.reg; -} - -static int frame_slot_is_spill(Func* f, FrameSlot fs) { - if (fs == FRAME_SLOT_NONE || fs > f->nframe_slots) return 0; - return f->frame_slots[fs - 1u].kind == FS_SPILL; -} - -static int spill_local_slot(Func* f, const Operand* addr, const MemAccess* mem, - FrameSlot* out) { - if (!addr || addr->kind != OPK_LOCAL) return 0; - if (mem_observable(mem)) return 0; - if (mem->alias.kind != ALIAS_LOCAL) return 0; - if (mem->alias.v.local_id != (i32)addr->v.frame_slot) return 0; - if (!frame_slot_is_spill(f, addr->v.frame_slot)) return 0; - *out = addr->v.frame_slot; - return 1; -} - -static int same_spill_access(Func* f, const Inst* a, const Inst* b, - FrameSlot* slot_out) { - FrameSlot as = FRAME_SLOT_NONE; - FrameSlot bs = FRAME_SLOT_NONE; - if (!spill_local_slot(f, &a->opnds[0], &a->extra.mem, &as)) return 0; - if (!spill_local_slot(f, &b->opnds[0], &b->extra.mem, &bs)) return 0; - if (as != bs) return 0; - if (a->extra.mem.size != b->extra.mem.size) return 0; - if (a->extra.mem.addr_space != b->extra.mem.addr_space) return 0; - if (slot_out) *slot_out = as; - return 1; -} - -static int load_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { - if ((IROp)in->op != IR_LOAD || in->nopnds < 2) return 0; - return spill_local_slot(f, &in->opnds[1], &in->extra.mem, slot_out); -} - -static int store_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { - if ((IROp)in->op != IR_STORE || in->nopnds < 2) return 0; - return spill_local_slot(f, &in->opnds[0], &in->extra.mem, slot_out); -} - -static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) { - FrameSlot as = FRAME_SLOT_NONE; - FrameSlot bs = FRAME_SLOT_NONE; - if ((IROp)a->op == IR_LOAD) { - if (!load_spill_slot(f, a, &as)) return 0; - } else if (!store_spill_slot(f, a, &as)) { - return 0; - } - if ((IROp)b->op == IR_LOAD) { - if (!load_spill_slot(f, b, &bs)) return 0; - } else if (!store_spill_slot(f, b, &bs)) { - return 0; - } - return as == bs && a->extra.mem.size == b->extra.mem.size && - a->extra.mem.addr_space == b->extra.mem.addr_space; -} - -static int same_phys_reg(const Operand* a, const Operand* b) { - return a && b && a->kind == OPK_REG && b->kind == OPK_REG && - a->cls == b->cls && a->v.reg == b->v.reg; -} - -static int operand_uses_phys_reg(const Operand* op, const Operand* r) { - if (!op || !r || r->kind != OPK_REG) return 0; - if (op->kind == OPK_REG) return op->cls == r->cls && op->v.reg == r->v.reg; - if (op->kind == OPK_INDIRECT) - return r->cls == RC_INT && op->v.ind.base == r->v.reg; - return 0; -} - -static int count_operand_phys_uses(const Operand* op, const Operand* r) { - return operand_uses_phys_reg(op, r) ? 1 : 0; -} - -static int abi_uses_phys_reg(const CGABIValue* v, const Operand* r) { - int n = 0; - if (!v) return 0; - n += count_operand_phys_uses(&v->storage, r); - for (u32 i = 0; i < v->nparts; ++i) - n += count_operand_phys_uses(&v->parts[i].op, r); - return n; -} - -static int inst_uses_phys_reg(const Inst* in, const Operand* r) { - int n = 0; - switch ((IROp)in->op) { - case IR_COPY: - case IR_CONVERT: - case IR_UNOP: - case IR_VA_ARG: - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - break; - case IR_LOAD: - case IR_ADDR_OF: - case IR_BITFIELD_LOAD: - case IR_ATOMIC_LOAD: - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - break; - case IR_BINOP: - case IR_CMP: - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); - break; - case IR_STORE: - case IR_AGG_COPY: - case IR_AGG_SET: - case IR_BITFIELD_STORE: - case IR_VA_COPY: - if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - break; - case IR_CALL: { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) break; - if (aux->use_plan_replay) { - n += count_operand_phys_uses(&aux->plan.callee, r); - for (u32 i = 0; i < aux->plan.nargs; ++i) - n += count_operand_phys_uses(&aux->plan.args[i].src, r); - } else { - n += count_operand_phys_uses(&aux->desc.callee, r); - for (u32 i = 0; i < aux->desc.nargs; ++i) - n += abi_uses_phys_reg(&aux->desc.args[i], r); - } - break; - } - case IR_CMP_BRANCH: - case IR_CONDBR: - for (u32 i = 0; i < in->nopnds; ++i) - n += count_operand_phys_uses(&in->opnds[i], r); - break; - case IR_RET: { - IRRetAux* aux = (IRRetAux*)in->extra.aux; - if (aux && aux->present) n += abi_uses_phys_reg(&aux->val, r); - break; - } - case IR_SCOPE_BEGIN: { - IRScopeAux* aux = (IRScopeAux*)in->extra.aux; - if (aux) n += count_operand_phys_uses(&aux->desc.cond, r); - break; - } - case IR_ALLOCA: - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - break; - case IR_VA_START: - case IR_VA_END: - if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); - break; - case IR_ATOMIC_STORE: - if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - break; - case IR_ATOMIC_RMW: - if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); - if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); - break; - case IR_ATOMIC_CAS: - if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); - if (in->nopnds >= 4) n += count_operand_phys_uses(&in->opnds[3], r); - if (in->nopnds >= 5) n += count_operand_phys_uses(&in->opnds[4], r); - break; - case IR_ASM_BLOCK: { - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux) break; - for (u32 i = 0; i < aux->nin; ++i) - n += count_operand_phys_uses(&aux->in_ops[i], r); - break; - } - case IR_INTRINSIC: { - IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; - if (!aux) break; - for (u32 i = 0; i < aux->narg; ++i) - n += count_operand_phys_uses(&aux->args[i], r); - break; - } - default: - break; - } - return n; -} - -static int abi_defines_phys_reg(const CGABIValue* v, const Operand* r) { - int n = 0; - if (!v) return 0; - if (same_phys_reg(&v->storage, r)) ++n; - for (u32 i = 0; i < v->nparts; ++i) - if (same_phys_reg(&v->parts[i].op, r)) ++n; - return n; -} - -static int inst_defines_phys_reg(const Inst* in, const Operand* r) { - if (!r || r->kind != OPK_REG) return 0; - switch ((IROp)in->op) { - case IR_LOAD_IMM: - case IR_LOAD_CONST: - case IR_COPY: - case IR_LOAD: - case IR_ADDR_OF: - case IR_TLS_ADDR_OF: - case IR_BITFIELD_LOAD: - case IR_BINOP: - case IR_UNOP: - case IR_CMP: - case IR_CONVERT: - case IR_ALLOCA: - case IR_VA_ARG: - case IR_ATOMIC_LOAD: - case IR_ATOMIC_RMW: - return in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r); - case IR_CALL: { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) return 0; - if (aux->use_plan_replay) { - for (u32 i = 0; i < aux->plan.nargs; ++i) - if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG && - r->cls == aux->plan.args[i].cls && - r->v.reg == aux->plan.args[i].dst_reg) - return 1; - for (u32 i = 0; i < aux->plan.nrets; ++i) - if ((r->cls == aux->plan.rets[i].cls && - r->v.reg == aux->plan.rets[i].src_reg) || - same_phys_reg(&aux->plan.rets[i].dst, r)) - return 1; - return 0; - } - return abi_defines_phys_reg(&aux->desc.ret, r); - } - case IR_ATOMIC_CAS: - return (in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r)) || - (in->nopnds >= 2 && same_phys_reg(&in->opnds[1], r)); - case IR_ASM_BLOCK: { - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux) return 0; - for (u32 i = 0; i < aux->nout; ++i) - if (same_phys_reg(&aux->out_ops[i], r)) return 1; - return 0; - } - case IR_INTRINSIC: { - IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; - if (!aux) return 0; - for (u32 i = 0; i < aux->ndst; ++i) - if (same_phys_reg(&aux->dsts[i], r)) return 1; - return 0; - } - default: - return 0; - } -} - -typedef struct HardBlockLive HardBlockLive; -static HardBlockLive* maybe_build_hard_live(Func* f); -static int block_live_out_has_phys_reg(Func* f, const HardBlockLive* hard_live, - u32 block, const Operand* r); - -static int copy_fold_slot(const Inst* in, u32 idx) { - switch ((IROp)in->op) { - case IR_COPY: - case IR_CONVERT: - case IR_UNOP: - return idx == 1; - case IR_BINOP: - case IR_CMP: - return idx == 1 || idx == 2; - case IR_CMP_BRANCH: - return idx == 0 || idx == 1; - case IR_CONDBR: - return idx == 0; - case IR_STORE: - return idx == 1; - case IR_ALLOCA: - return idx == 1; - case IR_ATOMIC_RMW: - return idx == 2; - default: - return 0; - } -} - -static int imm_fold_slot(const Inst* in, u32 idx) { - switch ((IROp)in->op) { - case IR_BINOP: - case IR_CMP: - return idx == 1 || idx == 2; - case IR_CMP_BRANCH: - return idx == 0 || idx == 1; - default: - return 0; - } -} - -static int identical_convert_pair(const Inst* a, const Inst* b) { - if ((IROp)a->op != IR_CONVERT || (IROp)b->op != IR_CONVERT) return 0; - if (a->nopnds < 2 || b->nopnds < 2) return 0; - if (a->extra.imm != b->extra.imm) return 0; - return a->opnds[1].type == b->opnds[1].type && - a->opnds[0].type == b->opnds[0].type; -} - -static int binop_is_commutative(BinOp op) { - switch (op) { - case BO_IADD: - case BO_IMUL: - case BO_FADD: - case BO_FMUL: - case BO_AND: - case BO_OR: - case BO_XOR: - return 1; - default: - return 0; - } -} - -static int no_intervening_phys_access(Block* bl, u32 first, u32 last, - const Operand* r) { - for (u32 i = first; i < last; ++i) { - Inst* in = &bl->insts[i]; - if (inst_uses_phys_reg(in, r) || inst_defines_phys_reg(in, r)) return 0; - } - return 1; -} - -static int retarget_producer_legal(Inst* producer, const Operand* copy_dst, - int* swap_binop) { - *swap_binop = 0; - if (!copy_dst || copy_dst->kind != OPK_REG) return 0; - if (producer->nopnds < 2 || producer->opnds[0].kind != OPK_REG) return 0; - if (producer->opnds[0].cls != copy_dst->cls) return 0; - if (producer->opnds[0].type != copy_dst->type) return 0; - - switch ((IROp)producer->op) { - case IR_UNOP: - return 1; - case IR_BINOP: { - if (producer->nopnds < 3) return 0; - int dst_is_lhs = operand_uses_phys_reg(&producer->opnds[1], copy_dst); - int dst_is_rhs = operand_uses_phys_reg(&producer->opnds[2], copy_dst); - if (!dst_is_lhs && !dst_is_rhs) return 1; - if (dst_is_lhs) return 1; - if (binop_is_commutative((BinOp)producer->extra.imm)) { - *swap_binop = 1; - return 1; - } - return 0; - } - default: - return 0; - } -} - -static int first_return_reg(Func* f, u8 cls, Reg* out) { - if (!f || cls >= OPT_REG_CLASSES) return 0; - u32 mask = f->opt_ret_regs[cls]; - for (Reg r = 0; r < 32; ++r) { - if (mask & (1u << r)) { - *out = r; - return 1; - } - } - return 0; -} - -static int ret_scalar_storage(CGABIValue* v, Operand** out) { - if (!v || v->storage.kind != OPK_REG) return 0; - if (v->nparts > 1) return 0; - *out = &v->storage; - return 1; -} - -static int find_single_direct_use(Func* f, Block* bl, - const HardBlockLive* hard_live, u32 def_i, - const Operand* def, const Operand* src, - int check_src, int imm_fold, int conv_fold, - u32* use_i_out, u32* op_i_out) { - int total_uses = 0; - int source_clobbered = 0; - int killed = 0; - int found = 0; - u32 found_i = 0; - u32 found_op = 0; - - for (u32 i = def_i + 1u; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - int uses = inst_uses_phys_reg(in, def); - if (uses) { - if (check_src && source_clobbered) return 0; - total_uses += uses; - if (total_uses > 1) return 0; - for (u32 oi = 0; oi < in->nopnds; ++oi) { - int ok = - conv_fold - ? (oi == 1 && identical_convert_pair(&bl->insts[def_i], in)) - : (imm_fold ? imm_fold_slot(in, oi) : copy_fold_slot(in, oi)); - if (!ok) continue; - if (!same_phys_reg(&in->opnds[oi], def)) continue; - found_i = i; - found_op = oi; - found = 1; - } - } - - if ((IROp)in->op == IR_CALL) { - if (check_src) source_clobbered = 1; - killed = 1; - break; - } - if (check_src && src && inst_defines_phys_reg(in, src)) - source_clobbered = 1; - if (inst_defines_phys_reg(in, def)) { - killed = 1; - break; - } - } - - if (total_uses != 1) return 0; - if (!found) return 0; - if (!killed && block_live_out_has_phys_reg(f, hard_live, bl->id, def)) - return 0; - *use_i_out = found_i; - *op_i_out = found_op; - return 1; -} - -static void opt_combine_fold_block(Func* f, Block* bl, - const HardBlockLive* hard_live) { - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - u32 use_i = 0; - u32 op_i = 0; - - if (f->opt_rewritten && (IROp)in->op == IR_RET && i > 0) { - IRRetAux* aux = (IRRetAux*)in->extra.aux; - Operand* ret_op = NULL; - Reg ret_reg = REG_NONE; - if (aux && aux->present && ret_scalar_storage(&aux->val, &ret_op) && - first_return_reg(f, ret_op->cls, &ret_reg) && - ret_reg != (Reg)REG_NONE && ret_reg != ret_op->v.reg) { - Inst* producer = &bl->insts[i - 1u]; - Operand ret_dst = *ret_op; - ret_dst.v.reg = ret_reg; - int swap_binop = 0; - if (producer->nopnds >= 1 && - same_phys_reg(&producer->opnds[0], ret_op) && - retarget_producer_legal(producer, &ret_dst, &swap_binop)) { - if (swap_binop) { - Operand tmp = producer->opnds[1]; - producer->opnds[1] = producer->opnds[2]; - producer->opnds[2] = tmp; - } - producer->opnds[0] = ret_dst; - *ret_op = ret_dst; - continue; - } - } - } - - if (f->opt_rewritten && - ((IROp)in->op == IR_BINOP || (IROp)in->op == IR_UNOP) && - in->nopnds >= 1 && in->opnds[0].kind == OPK_REG && - find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 0, - 0, &use_i, &op_i)) { - Inst* copy = &bl->insts[use_i]; - int swap_binop = 0; - if ((IROp)copy->op == IR_COPY && op_i == 1 && copy->nopnds >= 2 && - copy->opnds[0].kind == OPK_REG && - same_phys_reg(&copy->opnds[1], &in->opnds[0]) && - !same_phys_reg(&copy->opnds[0], &in->opnds[0]) && - no_intervening_phys_access(bl, i + 1u, use_i, &copy->opnds[0]) && - retarget_producer_legal(in, &copy->opnds[0], &swap_binop)) { - if (swap_binop) { - Operand tmp = in->opnds[1]; - in->opnds[1] = in->opnds[2]; - in->opnds[2] = tmp; - } - in->opnds[0] = copy->opnds[0]; - copy->opnds[1] = copy->opnds[0]; - continue; - } - } - - if ((IROp)in->op == IR_COPY && in->nopnds >= 2 && - in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && - !same_phys_reg(&in->opnds[0], &in->opnds[1]) && - find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], - &in->opnds[1], 1, 0, 0, &use_i, &op_i)) { - bl->insts[use_i].opnds[op_i] = in->opnds[1]; - continue; - } - - if ((IROp)in->op == IR_LOAD_IMM && in->nopnds >= 1 && - in->opnds[0].kind == OPK_REG && - find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 1, - 0, &use_i, &op_i)) { - Operand imm = in->opnds[0]; - imm.kind = OPK_IMM; - imm.v.imm = in->extra.imm; - bl->insts[use_i].opnds[op_i] = imm; - continue; - } - - if ((IROp)in->op == IR_CONVERT && in->nopnds >= 2 && - in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && - find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], - &in->opnds[1], 1, 0, 1, &use_i, &op_i)) { - bl->insts[use_i].opnds[op_i] = in->opnds[1]; - } - } -} - -void opt_combine(Func* f) { - HardBlockLive* hard_live = maybe_build_hard_live(f); - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - opt_combine_fold_block(f, bl, hard_live); - u32 w = 0; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_COPY && in->nopnds == 2 && - same_reg_operand(&in->opnds[0], &in->opnds[1])) { - continue; - } - - if (w) { - Inst* prev = &bl->insts[w - 1u]; - if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_LOAD && - same_spill_slot_and_size(f, prev, in) && - same_reg_operand(&prev->opnds[1], &in->opnds[0])) { - continue; - } - if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_STORE && - same_spill_slot_and_size(f, prev, in) && - same_reg_operand(&prev->opnds[0], &in->opnds[1])) { - continue; - } - if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_LOAD && - same_spill_slot_and_size(f, prev, in) && - same_reg_operand(&prev->opnds[0], &in->opnds[0])) { - continue; - } - if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_STORE && - same_spill_access(f, prev, in, NULL)) { - bl->insts[w - 1u] = *in; - continue; - } - } - - bl->insts[w++] = *in; - } - bl->ninsts = w; - } -} - -static int inst_has_side_effect(Func* f, const Inst* in) { - (void)f; - switch ((IROp)in->op) { - case IR_LOAD: - return mem_observable(&in->extra.mem); - case IR_BITFIELD_LOAD: { - IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; - return aux && mem_observable(&aux->access.storage); - } - case IR_ALLOCA: - case IR_PARAM_DECL: - case IR_STORE: - case IR_AGG_COPY: - case IR_AGG_SET: - case IR_BITFIELD_STORE: - case IR_CALL: - case IR_BR: - case IR_CONDBR: - case IR_CMP_BRANCH: - case IR_RET: - case IR_SCOPE_BEGIN: - case IR_SCOPE_ELSE: - case IR_SCOPE_END: - case IR_BREAK_TO: - case IR_CONTINUE_TO: - case IR_VA_START: - case IR_VA_ARG: - case IR_VA_END: - case IR_VA_COPY: - case IR_ATOMIC_LOAD: - case IR_ATOMIC_STORE: - case IR_ATOMIC_RMW: - case IR_ATOMIC_CAS: - case IR_FENCE: - case IR_ASM_BLOCK: - case IR_INTRINSIC: - return 1; - default: - return 0; - } -} - -typedef struct HardRegSet { - u32 cls[OPT_REG_CLASSES]; -} HardRegSet; - -struct HardBlockLive { - HardRegSet live_in; - HardRegSet live_out; - HardRegSet live_use; - HardRegSet live_def; -}; - -static void hard_add(HardRegSet* s, u8 cls, Reg r) { - if (cls >= OPT_REG_CLASSES || r >= 32) return; - s->cls[cls] |= 1u << r; -} - -static int hard_empty(const HardRegSet* s) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - if (s->cls[c]) return 0; - return 1; -} - -static int hard_intersects(const HardRegSet* a, const HardRegSet* b) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - if (a->cls[c] & b->cls[c]) return 1; - return 0; -} - -static int hard_eq(const HardRegSet* a, const HardRegSet* b) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - if (a->cls[c] != b->cls[c]) return 0; - return 1; -} - -static void hard_or(HardRegSet* dst, const HardRegSet* src) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) dst->cls[c] |= src->cls[c]; -} - -static void hard_live_in_from_out(HardRegSet* dst, const HardRegSet* use, - const HardRegSet* out, - const HardRegSet* def) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - dst->cls[c] = use->cls[c] | (out->cls[c] & ~def->cls[c]); -} - -static void hard_live_step(HardRegSet* live, const HardRegSet* use, - const HardRegSet* def) { - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - live->cls[c] = (live->cls[c] & ~def->cls[c]) | use->cls[c]; -} - -static void hard_use_operand(HardRegSet* s, const Operand* op) { - if (!op) return; - if (op->kind == OPK_REG) { - hard_add(s, op->cls, op->v.reg); - } else if (op->kind == OPK_INDIRECT) { - hard_add(s, RC_INT, op->v.ind.base); - } -} - -static void hard_def_operand(HardRegSet* s, const Operand* op) { - if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg); -} - -static void hard_use_abivalue(HardRegSet* use, const CGABIValue* v) { - if (!v) return; - hard_use_operand(use, &v->storage); - for (u32 i = 0; i < v->nparts; ++i) hard_use_operand(use, &v->parts[i].op); -} - -static void hard_def_abivalue(HardRegSet* def, const CGABIValue* v) { - if (!v) return; - hard_def_operand(def, &v->storage); - for (u32 i = 0; i < v->nparts; ++i) hard_def_operand(def, &v->parts[i].op); -} - -static void hard_inst_use_def(Func* f, const Inst* in, HardRegSet* use, - HardRegSet* def) { - memset(use, 0, sizeof *use); - memset(def, 0, sizeof *def); - switch ((IROp)in->op) { - case IR_LOAD_IMM: - case IR_LOAD_CONST: - case IR_TLS_ADDR_OF: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - break; - case IR_COPY: - case IR_CONVERT: - case IR_UNOP: - case IR_VA_ARG: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - break; - case IR_LOAD: - case IR_ADDR_OF: - case IR_BITFIELD_LOAD: - case IR_ATOMIC_LOAD: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - break; - case IR_BINOP: - case IR_CMP: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); - break; - case IR_STORE: - case IR_AGG_COPY: - case IR_AGG_SET: - case IR_BITFIELD_STORE: - case IR_VA_COPY: - if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - break; - case IR_CALL: { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) break; - if (aux->use_plan_replay) { - hard_use_operand(use, &aux->plan.callee); - for (u32 i = 0; i < aux->plan.nargs; ++i) { - hard_use_operand(use, &aux->plan.args[i].src); - if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG) - hard_add(def, aux->plan.args[i].cls, aux->plan.args[i].dst_reg); - } - for (u32 i = 0; i < aux->plan.nrets; ++i) { - hard_add(def, aux->plan.rets[i].cls, aux->plan.rets[i].src_reg); - hard_def_operand(def, &aux->plan.rets[i].dst); - } - } else { - hard_use_operand(use, &aux->desc.callee); - for (u32 i = 0; i < aux->desc.nargs; ++i) - hard_use_abivalue(use, &aux->desc.args[i]); - hard_def_abivalue(def, &aux->desc.ret); - } - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - def->cls[c] |= call_clobber_mask_for(f, in, (u8)c); - break; - } - case IR_CMP_BRANCH: - case IR_CONDBR: - for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]); - break; - case IR_RET: { - IRRetAux* aux = (IRRetAux*)in->extra.aux; - if (aux && aux->present) hard_use_abivalue(use, &aux->val); - break; - } - case IR_SCOPE_BEGIN: { - IRScopeAux* aux = (IRScopeAux*)in->extra.aux; - if (aux) hard_use_operand(use, &aux->desc.cond); - break; - } - case IR_ALLOCA: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - break; - case IR_VA_START: - case IR_VA_END: - if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); - break; - case IR_ATOMIC_STORE: - if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - break; - case IR_ATOMIC_RMW: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); - if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); - break; - case IR_ATOMIC_CAS: - if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); - if (in->nopnds >= 2) hard_def_operand(def, &in->opnds[1]); - if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); - if (in->nopnds >= 4) hard_use_operand(use, &in->opnds[3]); - if (in->nopnds >= 5) hard_use_operand(use, &in->opnds[4]); - break; - case IR_ASM_BLOCK: { - IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux) break; - for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]); - for (u32 i = 0; i < aux->nout; ++i) - hard_def_operand(def, &aux->out_ops[i]); - break; - } - case IR_INTRINSIC: { - IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; - if (!aux) break; - for (u32 i = 0; i < aux->narg; ++i) hard_use_operand(use, &aux->args[i]); - for (u32 i = 0; i < aux->ndst; ++i) hard_def_operand(def, &aux->dsts[i]); - break; - } - default: - break; - } -} - -static void hard_live_blocks(Func* f, HardBlockLive* live) { - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - HardRegSet seen_def; - memset(&seen_def, 0, sizeof seen_def); - memset(&live[b], 0, sizeof live[b]); - for (u32 i = 0; i < bl->ninsts; ++i) { - HardRegSet use, def; - hard_inst_use_def(f, &bl->insts[i], &use, &def); - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - live[b].live_use.cls[c] |= use.cls[c] & ~seen_def.cls[c]; - hard_or(&seen_def, &def); - hard_or(&live[b].live_def, &def); - } - } - - int changed; - do { - changed = 0; - for (u32 bi = f->nblocks; bi > 0; --bi) { - u32 b = bi - 1u; - Block* bl = &f->blocks[b]; - HardRegSet new_out, new_in; - memset(&new_out, 0, sizeof new_out); - for (u32 s = 0; s < bl->nsucc; ++s) { - u32 t = bl->succ[s]; - if (t < f->nblocks) hard_or(&new_out, &live[t].live_in); - } - hard_live_in_from_out(&new_in, &live[b].live_use, &new_out, - &live[b].live_def); - if (!hard_eq(&live[b].live_out, &new_out)) { - live[b].live_out = new_out; - changed = 1; - } - if (!hard_eq(&live[b].live_in, &new_in)) { - live[b].live_in = new_in; - changed = 1; - } - } - } while (changed); -} - -static int hard_live_out_has_phys_reg(const HardBlockLive* live, - const Operand* r) { - if (!live || !r || r->kind != OPK_REG || r->cls >= OPT_REG_CLASSES || - r->v.reg >= 32) - return 0; - return (live->live_out.cls[r->cls] & (1u << r->v.reg)) != 0; -} - -static HardBlockLive* maybe_build_hard_live(Func* f) { - if (!f->opt_rewritten) return NULL; - HardBlockLive* live = arena_zarray(f->arena, HardBlockLive, - f->nblocks ? f->nblocks : 1u); - hard_live_blocks(f, live); - return live; -} - -static HardRegSet hard_live_out_for_block(const HardBlockLive* live) { - HardRegSet out; - memset(&out, 0, sizeof out); - if (live) out = live->live_out; - return out; -} - -static int block_live_out_has_phys_reg(Func* f, const HardBlockLive* hard_live, - u32 block, const Operand* r) { - (void)f; - if (!hard_live || block >= f->nblocks) return 0; - return hard_live_out_has_phys_reg(&hard_live[block], r); -} - -void opt_dce(Func* f) { - HardBlockLive* hard_live = maybe_build_hard_live(f); - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - if (f->opt_rewritten) { - HardRegSet live = hard_live_out_for_block(hard_live ? &hard_live[b] : NULL); - Inst* new_insts = arena_array(f->arena, Inst, bl->ninsts); - u32 w = 0; - for (u32 ri = bl->ninsts; ri > 0; --ri) { - u32 i = ri - 1u; - Inst* in = &bl->insts[i]; - HardRegSet use, def; - if ((IROp)in->op == IR_NOP) continue; - hard_inst_use_def(f, in, &use, &def); - if (!inst_has_side_effect(f, in) && !hard_empty(&def) && - !hard_intersects(&def, &live)) { - continue; - } - if (!inst_has_side_effect(f, in) && hard_empty(&def) && - in->nopnds == 0) { - continue; - } - new_insts[w++] = *in; - hard_live_step(&live, &use, &def); - } - for (u32 i = 0; i < w / 2; ++i) { - Inst tmp = new_insts[i]; - new_insts[i] = new_insts[w - 1u - i]; - new_insts[w - 1u - i] = tmp; - } - bl->insts = new_insts; - bl->ninsts = w; - bl->cap = w; - continue; - } - - u32 w = 0; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_NOP) continue; - if (!inst_has_side_effect(f, in) && in->def == VAL_NONE && - in->ndefs == 0 && in->nopnds == 0) - continue; - bl->insts[w++] = *in; - } - bl->ninsts = w; - } -} diff --git a/src/opt/pass_machinize.c b/src/opt/pass_machinize.c @@ -0,0 +1,198 @@ +#include <string.h> + +#include "core/arena.h" +#include "core/core.h" +#include "core/pool.h" +#include "opt/opt_internal.h" + +static const char* asm_constraint_body(const char* s) { + if (!s) return ""; + if (s[0] == '=' && s[1] == '&') return s + 2; + if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; + return s; +} + +static int asm_resolve_fixed_constraint(Func* f, CGTarget* target, + const char* constraint, Reg* reg_out, + RegClass* cls_out) { + const char* body = asm_constraint_body(constraint); + if (!target->resolve_reg_name) return 0; + if (body[0] != '{') return 0; + const char* end = body + 1; + while (*end && *end != '}') ++end; + if (*end != '}' || end == body + 1) return 0; + Sym name = pool_intern(f->c->global, body + 1, (size_t)(end - body - 1)); + return target->resolve_reg_name(target, name, reg_out, cls_out) == 0; +} + +static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) { + if (!aux) return; + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) aux->clobber_mask[c] = 0; + + if (aux->nout && !aux->out_fixed_regs) { + aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout); + aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout); + for (u32 i = 0; i < aux->nout; ++i) aux->out_fixed_regs[i] = -1; + } + if (aux->nin && !aux->in_fixed_regs) { + aux->in_fixed_regs = arena_array(f->arena, i32, aux->nin); + aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin); + for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1; + } + + if (target->resolve_reg_name) { + for (u32 i = 0; i < aux->nclob; ++i) { + Reg r; + RegClass cls; + if (target->resolve_reg_name(target, aux->clobbers[i], &r, &cls) != 0) + continue; + if ((u32)cls < OPT_REG_CLASSES && r < 32) + aux->clobber_mask[cls] |= 1u << r; + } + } + + for (u32 i = 0; i < aux->nout; ++i) { + Reg r; + RegClass cls; + if (asm_resolve_fixed_constraint(f, target, aux->outs[i].str, &r, &cls)) { + aux->out_fixed_regs[i] = (i32)r; + aux->out_fixed_cls[i] = (u8)cls; + } + } + for (u32 i = 0; i < aux->nin; ++i) { + Reg r; + RegClass cls; + if (asm_resolve_fixed_constraint(f, target, aux->ins[i].str, &r, &cls)) { + aux->in_fixed_regs[i] = (i32)r; + aux->in_fixed_cls[i] = (u8)cls; + } + } +} + +static int call_plan_replay_supported(const IRCallAux* aux, + const CGTarget* target); + +void opt_machinize(Func* f, CGTarget* target) { + f->opt_target = target->c->target; + f->opt_has_target = 1; + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + f->opt_hard_reg_count[c] = 0; + f->opt_phys_reg_count[c] = 0; + f->opt_scratch_reg_count[c] = 0; + f->opt_caller_saved[c] = 0; + f->opt_callee_saved[c] = 0; + f->opt_reserved_regs[c] = 0; + f->opt_arg_regs[c] = 0; + f->opt_ret_regs[c] = 0; + } + + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_ASM_BLOCK) { + asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux); + } else if ((IROp)in->op == IR_CALL && target->plan_call) { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (aux) { + target->plan_call(target, &aux->desc, &aux->plan); + aux->plan_valid = 1; + aux->use_plan_replay = call_plan_replay_supported(aux, target); + } + } + } + } + + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + const CGPhysRegInfo* phys = NULL; + u32 nphys = 0; + if (target->get_phys_regs) + target->get_phys_regs(target, (RegClass)c, &phys, &nphys); + if (phys) { + for (u32 i = 0; i < nphys && i < OPT_MAX_HARD_REGS; ++i) { + CGPhysRegInfo pi = phys[i]; + Reg hr = pi.reg; + if (hr < 32u) { + if (pi.flags & CG_REG_CALLER_SAVED) + f->opt_caller_saved[c] |= 1u << hr; + if (pi.flags & CG_REG_CALLEE_SAVED) + f->opt_callee_saved[c] |= 1u << hr; + if (pi.flags & CG_REG_RESERVED) f->opt_reserved_regs[c] |= 1u << hr; + if (pi.flags & CG_REG_ARG) f->opt_arg_regs[c] |= 1u << hr; + if (pi.flags & CG_REG_RET) f->opt_ret_regs[c] |= 1u << hr; + } + f->opt_phys_regs[c][f->opt_phys_reg_count[c]++] = pi; + if ((pi.flags & CG_REG_ALLOCABLE) && !(pi.flags & CG_REG_RESERVED)) { + f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hr; + } + } + } else { + const Reg* hard = NULL; + u32 nhard = 0; + if (target->get_allocable_regs) + target->get_allocable_regs(target, (RegClass)c, &hard, &nhard); + for (u32 i = 0; i < nhard && i < OPT_MAX_HARD_REGS; ++i) + f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hard[i]; + } + + const Reg* scratch = NULL; + u32 nscratch = 0; + if (target->get_scratch_regs) + target->get_scratch_regs(target, (RegClass)c, &scratch, &nscratch); + for (u32 i = 0; i < nscratch && i < OPT_MAX_SCRATCH_REGS; ++i) + f->opt_scratch_regs[c][f->opt_scratch_reg_count[c]++] = scratch[i]; + + if (!phys && target->is_caller_saved) { + for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) { + Reg hr = f->opt_hard_regs[c][i]; + if (target->is_caller_saved(target, (RegClass)c, hr)) + f->opt_caller_saved[c] |= (1u << hr); + } + } + if (target->callee_save_mask) + f->opt_callee_saved[c] |= target->callee_save_mask(target, (RegClass)c); + } + + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) { + Reg hr = f->opt_hard_regs[c][i]; + for (u32 s = 0; s < f->opt_scratch_reg_count[c]; ++s) { + if (f->opt_scratch_regs[c][s] == hr) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt_machinize: hard reg %u overlaps scratch reg " + "in class %u", + (unsigned)hr, (unsigned)c); + } + } + } + } +} + +static int call_plan_replay_supported(const IRCallAux* aux, + const CGTarget* target) { + if (!aux || !aux->plan_valid || !target || !target->emit_call_plan) return 0; + for (u32 i = 0; i < aux->plan.nargs; ++i) { + if ((aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK || + aux->plan.args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && + !target->store_call_arg) + return 0; + if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG && + (aux->plan.args[i].src_kind == CG_CALL_PLAN_SRC_ADDR || + aux->plan.args[i].src_offset) && + !target->load_call_arg) + return 0; + } + for (u32 i = 0; i < aux->plan.nrets; ++i) + if (aux->plan.rets[i].dst.kind != OPK_REG && + aux->plan.rets[i].dst.kind != OPK_LOCAL && + aux->plan.rets[i].dst.kind != OPK_INDIRECT) + return 0; + for (u32 i = 0; i < aux->plan.nrets; ++i) + if (aux->plan.rets[i].dst_offset && + (aux->plan.rets[i].dst.kind == OPK_LOCAL || + aux->plan.rets[i].dst.kind == OPK_INDIRECT) && + !target->store_call_ret) + return 0; + return 1; +}