kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ef0ffdfc2b24e392e8a4d6e9dac6f6f74ca0f1b1
parent 7484601482123847d836cc711d250f7cbef65d46
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 15:41:17 -0700

arch,cg: introduce NativeTarget and NativeDirectTarget interfaces

Add src/arch/native_target.h defining the physical native emission vtable
(NativeTarget) that replaces the old CGTarget approach for optimized and
direct-O0 codegen.  Every hook receives caller-selected, target-legal
physical operands; the target must not pick replacement registers or spill
behind the caller's back.

Add src/cg/native_direct_target.{h,c} implementing NativeDirectTarget, which
adapts the semantic CgTarget surface to a NativeTarget backend by choosing
scratch registers, materializing semantic operands, and routing frame layout
through the native target's func_begin/frame_slot/spill/reload hooks.

Update doc/CGTARGET.md to document the NativeTarget contract, the
NativeDirectTarget adapter path, and the frame-patching lifecycle.

Diffstat:
Mdoc/CGTARGET.md | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Asrc/arch/native_target.h | 356+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/native_direct_target.c | 1462+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/native_direct_target.h | 136+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 2050 insertions(+), 16 deletions(-)

diff --git a/doc/CGTARGET.md b/doc/CGTARGET.md @@ -349,7 +349,9 @@ contract. `NativeTarget` is the physical native emission interface. Optimized code uses it post-machinize and post-regalloc, where it speaks final machine locations -and selected/native operations: +and selected/native operations. Direct `-O0` uses the same interface only after +`NativeDirectTarget` has chosen scratch registers and materialized semantic +operands. ```text MIR_LOC_REG hard physical register @@ -359,23 +361,77 @@ MIR_LOC_GLOBAL symbol plus addend MIR_LOC_ADDR final addressing mode ``` -A compact native emission surface can be shaped around final MIR records: +The central rule is that `NativeTarget` is not a register allocator. Every +instruction hook receives caller-selected, target-legal physical operands: +register destinations are `NATIVE_LOC_REG`, arithmetic sources are registers or +target-legal immediates, and memory addresses have already had base/index +values materialized into legal registers when the architecture requires that. +The native target may validate this contract and panic on invalid input, but it +must not pick replacement registers or spill behind the caller's back. + +The surface is a low-level vtable rather than a semantic target: ```c typedef struct NativeTarget NativeTarget; struct NativeTarget { + Compiler *c; + ObjBuilder *obj; + MCEmitter *mc; const NativeRegInfo *regs; + NativeAllocClass (*class_for_type)(NativeTarget *, CfreeCgTypeId); + int (*imm_legal)(NativeTarget *, NativeImmUse, u32 op, + CfreeCgTypeId type, i64 value); + int (*addr_legal)(NativeTarget *, const NativeAddr *, MemAccess); + + void (*func_begin)(NativeTarget *, const CGFuncDesc *); void (*func_begin_known_frame)(NativeTarget *, const CGFuncDesc *, - const NativeFrameDesc *); - void (*emit)(NativeTarget *, const MIRInst *); + const NativeKnownFrameDesc *, + NativeFrameSlot *out_slots); + void (*note_frame_state)(NativeTarget *, const NativeFramePatchState *); void (*func_end)(NativeTarget *); - void (*plan_call)(NativeTarget *, const CGCallDesc *, NativeCallPlan *); + NativeFrameSlot (*frame_slot)(NativeTarget *, + const NativeFrameSlotDesc *); + + void (*move)(NativeTarget *, NativeLoc dst_reg, NativeLoc src_reg); + void (*load_imm)(NativeTarget *, NativeLoc dst_reg, i64 imm); + void (*load_addr)(NativeTarget *, NativeLoc dst_reg, NativeAddr addr); + void (*load)(NativeTarget *, NativeLoc dst_reg, NativeAddr addr, + MemAccess); + void (*store)(NativeTarget *, NativeAddr addr, NativeLoc src_reg, + MemAccess); + void (*binop)(NativeTarget *, BinOp, NativeLoc dst_reg, + NativeLoc a_reg, NativeLoc b_reg_or_imm); + void (*cmp)(NativeTarget *, CmpOp, NativeLoc dst_reg, + NativeLoc a_reg, NativeLoc b_reg_or_imm); + void (*convert)(NativeTarget *, ConvKind, NativeLoc dst_reg, + NativeLoc src_reg); + + void (*spill)(NativeTarget *, NativeLoc src_reg, NativeFrameSlot, + MemAccess); + void (*reload)(NativeTarget *, NativeLoc dst_reg, NativeFrameSlot, + MemAccess); + + void (*plan_call)(NativeTarget *, const NativeCallDesc *, + NativeCallPlan *); + void (*emit_call)(NativeTarget *, const NativeCallPlan *); + void (*plan_ret)(NativeTarget *, const CGFuncDesc *, + const NativeLoc *values, u32 nvalues, + NativeCallPlanRet **out_rets, u32 *out_nrets); + void (*ret)(NativeTarget *); + + void (*patch_add)(NativeTarget *, const NativePatch *); + void (*patch_apply)(NativeTarget *); }; ``` +The real header has the full operation set, including labels, aggregate and +bitfield operations, atomics, intrinsics, inline assembly, traps, source +locations, finalize, and destroy. The important shape is the same for every +hook: it emits one selected native operation using already-legal operands. + The shared direct path may also use the same `NativeTarget`, but it does so through `NativeDirectTarget` and the small `NativeOps` adapter. This keeps the semantic `CGTarget` surface maximally reused while avoiding a large @@ -384,7 +440,7 @@ arch-specific direct `CGTarget` implementation per native backend. This interface owns the machine-level concerns removed from semantic `CGTarget`: -- concrete frame and spill slots; +- concrete frame, spill, save, alloca, and outgoing slots; - known-frame layout and max outgoing call area; - callee-save reservation and prologue/epilogue patching; - hard-register operands and final addressing modes; @@ -392,7 +448,8 @@ This interface owns the machine-level concerns removed from semantic - selected two-address and arch-specific instruction forms; - direct, indirect, and tail call emission after ABI routing; - CFI and unwind emission; -- inline-asm constraint binding and clobber handling. +- inline-asm constraint binding and clobber handling; +- backend register discovery and legality queries. Static register-file metadata belongs in `NativeRegInfo`: @@ -433,20 +490,41 @@ assembly dialects later affect name resolution, the callback can take a small dialect context. Call-specific answers should not be static register metadata when they depend -on ABI, calling convention, variadic state, vector ABI, or attributes. -Those belong to native call planning: +on ABI, calling convention, variadic state, vector ABI, or attributes. Those +belong to native call planning and must be expressed in native locations: ```text -call_clobber_mask(call, class) -return_locations(function ABI) -argument register/stack routing -tail-call stack routing +NativeCallDesc: + callee: native location or address + args/results: semantic values already represented as NativeLoc homes + +NativeCallPlan: + argument moves into hard argument registers or outgoing stack slots + return moves from hard return registers or result memory to result homes + clobber/return masks per allocation class + stack_arg_size for late frame patching ``` For direct `-O0`, `NativeOps` may expose call planning as an adapter because -`NativeDirectTarget` starts from semantic `CGCallDesc` values. For optimized -code, call planning belongs on `NativeTarget` or in MIR lowering; the optimizer -does not call `NativeOps`. +`NativeDirectTarget` starts from semantic `CGCallDesc` values and frame homes. +The adapter must still return legal native destinations, not semantic storage +decisions. For optimized code, call planning belongs on `NativeTarget` or in +MIR lowering; the optimizer does not call `NativeOps`. + +Frame size and outgoing-call size are allowed to be unknown when prologue code +is first emitted. The native target exposes explicit patch points: + +```text +func_begin or func_begin_known_frame + -> emit provisional prologue and record NativePatch records +body emission + -> NativeDirectTarget or allocated MIR updates NativeFramePatchState +func_end + -> note_frame_state, patch_apply, then final epilogue/finalization +``` + +This gives the single-pass direct path a clean escape hatch without moving +frame layout or register allocation back into `NativeTarget`. ## CfreeCg Value Stack @@ -530,6 +608,8 @@ move machine concepts downward: 1. Define semantic `CGLocal` and semantic `Operand` without `OPK_REG`. 2. Move frame slots, call plans, hard-register metadata, and spill/reload hooks out of semantic `CGTarget`. + The new internal contracts start in `src/arch/native_target.h` and + `src/cg/native_direct_target.h`. 3. Convert `CfreeCg` stack entries from physical register/frame ownership to semantic locals, lvalues, immediates, constants, and delayed compares. 4. Implement shared `NativeDirectTarget` as the native semantic `CGTarget`, diff --git a/src/arch/native_target.h b/src/arch/native_target.h @@ -0,0 +1,356 @@ +#ifndef CFREE_ARCH_NATIVE_TARGET_H +#define CFREE_ARCH_NATIVE_TARGET_H + +#include <string.h> + +#include "arch/arch.h" +#include "arch/regalloc.h" +#include "cg/cgtarget.h" +#include "core/core.h" + +/* NativeTarget is the physical native-emission contract. It is driven after + * semantic CG has been either direct-lowered by NativeDirectTarget or recorded, + * optimized, machinized, and allocated. It must not speak in semantic CGLocal + * ids except where a descriptor is carried for diagnostics or ABI queries. */ + +typedef u32 NativeFrameSlot; +#define NATIVE_FRAME_SLOT_NONE 0u + +typedef enum NativeFrameSlotKind { + NATIVE_FRAME_SLOT_LOCAL, + NATIVE_FRAME_SLOT_PARAM, + NATIVE_FRAME_SLOT_SPILL, + NATIVE_FRAME_SLOT_ALLOCA, + NATIVE_FRAME_SLOT_OUTGOING, + NATIVE_FRAME_SLOT_SAVE, +} NativeFrameSlotKind; + +typedef enum NativeFrameSlotFlag { + NATIVE_FRAME_SLOT_NONE_FLAG = 0, + NATIVE_FRAME_SLOT_ADDR_TAKEN = 1u << 0, + NATIVE_FRAME_SLOT_MEMORY_REQUIRED = 1u << 1, + NATIVE_FRAME_SLOT_FIXED_OFFSET = 1u << 2, +} NativeFrameSlotFlag; + +typedef struct NativeFrameSlotDesc { + CfreeCgTypeId type; + Sym name; + SrcLoc loc; + u32 size; + u32 align; + i32 fixed_offset; + u8 kind; /* NativeFrameSlotKind */ + u8 pad; + u16 flags; /* NativeFrameSlotFlag */ +} NativeFrameSlotDesc; + +typedef struct NativeKnownFrameDesc { + const NativeFrameSlotDesc* slots; + u32 nslots; + u32 max_outgoing; + u32 align; +} NativeKnownFrameDesc; + +typedef enum NativeRegFlag { + NATIVE_REG_NONE = 0, + NATIVE_REG_ALLOCABLE = 1u << 0, + NATIVE_REG_CALLER_SAVED = 1u << 1, + NATIVE_REG_CALLEE_SAVED = 1u << 2, + NATIVE_REG_ARG = 1u << 3, + NATIVE_REG_RET = 1u << 4, + NATIVE_REG_RESERVED = 1u << 5, + NATIVE_REG_TEMP_PREFERRED = 1u << 6, +} NativeRegFlag; + +typedef struct NativePhysRegInfo { + Reg reg; + u8 cls; /* NativeAllocClass */ + u8 abi_index; /* 0xff when not an ordered ABI arg/ret register */ + u16 flags; /* NativeRegFlag */ + u16 spill_cost; + u16 copy_cost; +} NativePhysRegInfo; + +typedef struct NativeAllocClassInfo { + u8 cls; /* NativeAllocClass */ + u8 pad[3]; + + const Reg* allocable; + u32 nallocable; + + const Reg* scratch; + u32 nscratch; + + const NativePhysRegInfo* phys; + u32 nphys; + + u32 caller_saved_mask; + u32 callee_saved_mask; + u32 arg_mask; + u32 ret_mask; + u32 reserved_mask; +} NativeAllocClassInfo; + +typedef struct NativeRegInfo NativeRegInfo; +struct NativeRegInfo { + const NativeAllocClassInfo* classes; + u32 nclasses; + + int (*resolve_name)(const NativeRegInfo*, Sym name, Reg* out, + NativeAllocClass* cls_out); + const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg); + u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg); +}; + +typedef enum NativeLocKind { + NATIVE_LOC_NONE, + NATIVE_LOC_REG, + NATIVE_LOC_FRAME, + NATIVE_LOC_STACK, + NATIVE_LOC_IMM, + NATIVE_LOC_GLOBAL, + NATIVE_LOC_ADDR, +} NativeLocKind; + +typedef enum NativeAddrBaseKind { + NATIVE_ADDR_BASE_NONE, + NATIVE_ADDR_BASE_REG, + NATIVE_ADDR_BASE_FRAME, + NATIVE_ADDR_BASE_FRAME_VALUE, + NATIVE_ADDR_BASE_GLOBAL, +} NativeAddrBaseKind; + +typedef enum NativeAddrIndexKind { + NATIVE_ADDR_INDEX_NONE, + NATIVE_ADDR_INDEX_REG, + NATIVE_ADDR_INDEX_FRAME_VALUE, +} NativeAddrIndexKind; + +typedef enum NativeImmUse { + NATIVE_IMM_MOVE, + NATIVE_IMM_BINOP, + NATIVE_IMM_CMP, + NATIVE_IMM_ADDR_OFFSET, +} NativeImmUse; + +typedef struct NativeAddr { + u8 base_kind; /* NativeAddrBaseKind */ + u8 cls; /* NativeAllocClass for base value */ + u8 index_kind; /* NativeAddrIndexKind */ + u8 index_cls; /* NativeAllocClass for index value */ + u8 log2_scale; + u8 pad[3]; + CfreeCgTypeId base_type; + CfreeCgTypeId index_type; + union { + Reg reg; + NativeFrameSlot frame; + struct { + ObjSymId sym; + i64 addend; + } global; + } base; + union { + Reg reg; + NativeFrameSlot frame; + } index; + i32 offset; +} NativeAddr; + +typedef struct NativeLoc { + u8 kind; /* NativeLocKind */ + u8 cls; /* NativeAllocClass for register-like locations */ + u8 pad[2]; + CfreeCgTypeId type; + union { + Reg reg; + NativeFrameSlot frame; + struct { + NativeFrameSlot slot; + i32 offset; + } stack; + i64 imm; + struct { + ObjSymId sym; + i64 addend; + } global; + NativeAddr addr; + } v; +} NativeLoc; + +typedef struct NativeInst NativeInst; + +typedef enum NativePatchKind { + NATIVE_PATCH_FRAME_SIZE, + NATIVE_PATCH_MAX_OUTGOING, + NATIVE_PATCH_ARCH = 0x1000, +} NativePatchKind; + +typedef struct NativePatch { + u32 kind; /* NativePatchKind or arch-private */ + u32 section_id; + u32 offset; + u32 width; + i64 addend; + u64 value; +} NativePatch; + +typedef struct NativeFramePatchState { + u32 max_outgoing; + u32 max_align; +} NativeFramePatchState; + +#define NATIVE_CALL_PLAN_CLASSES 3u + +typedef struct NativeCallDesc { + CfreeCgTypeId fn_type; + NativeLoc callee; + const NativeLoc* args; + const NativeLoc* results; + u32 nargs; + u32 nresults; + u16 flags; /* CGCallFlag */ + u8 tail_policy; /* CfreeCgTailPolicy */ + u8 pad; + CfreeCgInlinePolicy inline_policy; +} NativeCallDesc; + +typedef enum NativeCallPlanMoveKind { + NATIVE_CALL_MOVE_NONE, + NATIVE_CALL_MOVE_VALUE, + NATIVE_CALL_MOVE_ADDR, +} NativeCallPlanMoveKind; + +typedef struct NativeCallPlanMove { + NativeLoc src; + NativeLoc dst; + MemAccess mem; + u8 src_kind; /* NativeCallPlanMoveKind */ + u8 dst_kind; /* NativeLocKind */ + u8 pad[2]; +} NativeCallPlanMove; + +typedef struct NativeCallPlanRet { + NativeLoc src; + NativeLoc dst; + MemAccess mem; +} NativeCallPlanRet; + +typedef struct NativeCallPlan { + NativeLoc callee; + NativeCallPlanMove* args; + NativeCallPlanRet* rets; + u32 nargs; + u32 nrets; + u32 stack_arg_size; + u32 clobber_mask[NATIVE_CALL_PLAN_CLASSES]; + u32 return_mask[NATIVE_CALL_PLAN_CLASSES]; + u16 flags; /* CGCallFlag */ + u8 has_sret; + u8 is_variadic; +} NativeCallPlan; + +typedef struct NativeTarget NativeTarget; +struct NativeTarget { + Compiler* c; + ObjBuilder* obj; + MCEmitter* mc; + const NativeRegInfo* regs; + + NativeAllocClass (*class_for_type)(NativeTarget*, CfreeCgTypeId); + int (*imm_legal)(NativeTarget*, NativeImmUse, u32 op, CfreeCgTypeId, i64); + int (*addr_legal)(NativeTarget*, const NativeAddr*, MemAccess); + + void (*func_begin)(NativeTarget*, const CGFuncDesc*); + void (*func_begin_known_frame)(NativeTarget*, const CGFuncDesc*, + const NativeKnownFrameDesc*, + NativeFrameSlot* out_slots); + void (*note_frame_state)(NativeTarget*, const NativeFramePatchState*); + void (*func_end)(NativeTarget*); + + NativeFrameSlot (*frame_slot)(NativeTarget*, const NativeFrameSlotDesc*); + void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeFrameSlot home); + + MCLabel (*label_new)(NativeTarget*); + void (*label_place)(NativeTarget*, MCLabel); + void (*jump)(NativeTarget*, MCLabel); + void (*cmp_branch)(NativeTarget*, CmpOp, NativeLoc a, NativeLoc b, + MCLabel target); + void (*indirect_branch)(NativeTarget*, NativeLoc addr, + const MCLabel* valid_targets, u32 ntargets); + void (*load_label_addr)(NativeTarget*, NativeLoc dst, MCLabel target); + + void (*emit)(NativeTarget*, const NativeInst*); + /* All instruction-emission hooks require caller-selected legal physical + * operands. In particular, dst values are NATIVE_LOC_REG, arithmetic sources + * are NATIVE_LOC_REG or target-legal immediates, and memory base/index + * registers in NativeAddr must already be materialized. NativeTarget may + * validate and assert, but it must not allocate registers. */ + void (*move)(NativeTarget*, NativeLoc dst_reg, NativeLoc src_reg); + void (*load_imm)(NativeTarget*, NativeLoc dst_reg, i64 imm); + void (*load_const)(NativeTarget*, NativeLoc dst_reg, ConstBytes); + void (*load_addr)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr); + void (*load)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr, MemAccess); + void (*store)(NativeTarget*, NativeAddr addr, NativeLoc src_reg, MemAccess); + void (*tls_addr_of)(NativeTarget*, NativeLoc dst_reg, ObjSymId sym, + i64 addend); + void (*copy_bytes)(NativeTarget*, NativeAddr dst, NativeAddr src, + AggregateAccess); + void (*set_bytes)(NativeTarget*, NativeAddr dst, NativeLoc byte_value, + AggregateAccess); + void (*bitfield_load)(NativeTarget*, NativeLoc dst_reg, + NativeAddr record_addr, BitFieldAccess); + void (*bitfield_store)(NativeTarget*, NativeAddr record_addr, + NativeLoc src_reg, BitFieldAccess); + void (*binop)(NativeTarget*, BinOp, NativeLoc dst_reg, NativeLoc a_reg, + NativeLoc b_reg_or_imm); + void (*unop)(NativeTarget*, UnOp, NativeLoc dst_reg, NativeLoc src_reg); + void (*cmp)(NativeTarget*, CmpOp, NativeLoc dst_reg, NativeLoc a_reg, + NativeLoc b_reg_or_imm); + void (*convert)(NativeTarget*, ConvKind, NativeLoc dst_reg, + NativeLoc src_reg); + void (*alloca_)(NativeTarget*, NativeLoc dst_reg, NativeLoc size_reg, + u32 align); + + void (*spill)(NativeTarget*, NativeLoc src_reg, NativeFrameSlot, MemAccess); + void (*reload)(NativeTarget*, NativeLoc dst_reg, NativeFrameSlot, MemAccess); + + void (*plan_call)(NativeTarget*, const NativeCallDesc*, NativeCallPlan*); + void (*emit_call)(NativeTarget*, const NativeCallPlan*); + void (*plan_ret)(NativeTarget*, const CGFuncDesc*, const NativeLoc* values, + u32 nvalues, NativeCallPlanRet** out_rets, u32* out_nrets); + void (*ret)(NativeTarget*); + + void (*atomic_load)(NativeTarget*, NativeLoc dst, NativeAddr addr, MemAccess, + MemOrder); + void (*atomic_store)(NativeTarget*, NativeAddr addr, NativeLoc src, MemAccess, + MemOrder); + void (*atomic_rmw)(NativeTarget*, AtomicOp, NativeLoc dst, NativeAddr addr, + NativeLoc val, MemAccess, MemOrder); + void (*atomic_cas)(NativeTarget*, NativeLoc prior, NativeLoc ok, + NativeAddr addr, NativeLoc expected, NativeLoc desired, + MemAccess, MemOrder success, MemOrder failure); + void (*fence)(NativeTarget*, MemOrder); + void (*intrinsic)(NativeTarget*, IntrinKind, const NativeLoc* dsts, u32 ndst, + const NativeLoc* args, u32 narg); + void (*asm_block)(NativeTarget*, const char* tmpl, const AsmConstraint* outs, + u32 nout, NativeLoc* out_locs, const AsmConstraint* ins, + u32 nin, const NativeLoc* in_locs, const Sym* clobbers, + u32 nclob); + void (*file_scope_asm)(NativeTarget*, const char* src, size_t len); + void (*patch_add)(NativeTarget*, const NativePatch*); + void (*patch_apply)(NativeTarget*); + void (*trap)(NativeTarget*); + void (*set_loc)(NativeTarget*, SrcLoc); + void (*finalize)(NativeTarget*); + void (*destroy)(NativeTarget*); +}; + +static inline NativeLoc native_loc_none(void) { + NativeLoc loc; + memset(&loc, 0, sizeof loc); + loc.kind = NATIVE_LOC_NONE; + return loc; +} + +#endif diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c @@ -0,0 +1,1462 @@ +#include "cg/native_direct_target.h" + +/* NativeDirectTarget is intentionally single-pass: semantic CG calls are + * lowered immediately to NativeTarget operations, MCEmitter owns label fixups, + * and function-end calls note_frame_state()/patch_apply() let the native + * backend patch deferred frame/prologue details after max outgoing space is + * known. Direct lowering currently forwards final frame state but does not + * author generic NativePatch records itself. + * + * Remaining direct/backend cutover work: stack arguments, tail/musttail, + * varargs, typed inline-asm register/memory bindings and outputs, + * label-address data and computed gotos, records/sret/large aggregates, FP and + * rounding conversions, fuller scalar intrinsics, and production-grade atomic + * RMW/CAS lowering. */ + +#include <string.h> + +#include "cg/type.h" +#include "core/arena.h" +#include "core/pool.h" +#include "core/slice.h" + +#define NATIVE_DIRECT_MAGIC 0x4e445447u + +static NativeDirectTarget* nd_of(CgTarget* t) { return (NativeDirectTarget*)t; } + +static _Noreturn void nd_panic(NativeDirectTarget* d, const char* what) { + compiler_panic(d->base.c, d->loc, "native direct target: %s", what); +} + +static void* nd_arena(NativeDirectTarget* d, size_t size, size_t align) { + void* p = arena_zalloc(d->base.c->tu, size, align); + if (!p) nd_panic(d, "out of memory"); + return p; +} + +static void nd_grow_locals(NativeDirectTarget* d, u32 want) { + NativeDirectLocal* next; + u32 cap; + if (d->locals_cap >= want) return; + cap = d->locals_cap ? d->locals_cap : 32u; + while (cap < want) cap *= 2u; + next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectLocal)); + if (d->locals) memcpy(next, d->locals, sizeof(*next) * d->nlocals); + d->locals = next; + d->locals_cap = cap; +} + +static void nd_grow_labels(NativeDirectTarget* d, u32 want) { + MCLabel* next; + u32 cap; + if (d->labels_cap >= want) return; + cap = d->labels_cap ? d->labels_cap : 32u; + while (cap < want) cap *= 2u; + next = nd_arena(d, sizeof(*next) * cap, _Alignof(MCLabel)); + if (d->labels) memcpy(next, d->labels, sizeof(*next) * d->labels_cap); + d->labels = next; + d->labels_cap = cap; +} + +static void nd_grow_scopes(NativeDirectTarget* d, u32 want) { + NativeDirectScope* next; + u32 cap; + if (d->scopes_cap >= want) return; + cap = d->scopes_cap ? d->scopes_cap : 16u; + while (cap < want) cap *= 2u; + next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectScope)); + if (d->scopes) memcpy(next, d->scopes, sizeof(*next) * d->nscopes); + d->scopes = next; + d->scopes_cap = cap; +} + +static NativeDirectLocal* nd_local(NativeDirectTarget* d, CGLocal local) { + if (local == CG_LOCAL_NONE || local > d->nlocals) + nd_panic(d, "bad semantic local"); + return &d->locals[local - 1u]; +} + +static NativeAllocClass nd_class_for_type(NativeDirectTarget* d, + CfreeCgTypeId type) { + if (d->ops && d->ops->class_for_type) return d->ops->class_for_type(d, type); + if (d->native && d->native->class_for_type) + return d->native->class_for_type(d->native, type); + return NATIVE_REG_INT; +} + +static const NativeAllocClassInfo* nd_class_info(NativeDirectTarget* d, + NativeAllocClass cls) { + const NativeRegInfo* ri = + d->ops && d->ops->reg_info ? d->ops->reg_info(d) : NULL; + if (!ri && d->native) ri = d->native->regs; + if (!ri) nd_panic(d, "target has no register info"); + for (u32 i = 0; i < ri->nclasses; ++i) + if ((NativeAllocClass)ri->classes[i].cls == cls) return &ri->classes[i]; + nd_panic(d, "target has no requested register class"); +} + +static NativeLoc nd_reg_loc(Reg reg, NativeAllocClass cls, CfreeCgTypeId type) { + NativeLoc out; + memset(&out, 0, sizeof out); + out.kind = NATIVE_LOC_REG; + out.cls = (u8)cls; + out.type = type; + out.v.reg = reg; + return out; +} + +static Reg nd_scratch_acquire(NativeDirectTarget* d, NativeAllocClass cls) { + const NativeAllocClassInfo* ci = nd_class_info(d, cls); + const Reg* regs = ci->scratch; + u32 nregs = ci->nscratch; + for (u32 pass = 0; pass < 2u; ++pass) { + for (u32 i = 0; i < nregs; ++i) { + Reg r = regs[i]; + if (r >= 32u) continue; + if ((d->scratch_used[cls] & (1u << r)) == 0) { + d->scratch_used[cls] |= 1u << r; + return r; + } + } + regs = ci->allocable; + nregs = ci->nallocable; + } + nd_panic(d, "out of scratch registers"); +} + +static void nd_scratch_release(NativeDirectTarget* d, NativeAllocClass cls, + Reg reg) { + if (reg < 32u) d->scratch_used[cls] &= ~(1u << reg); +} + +static NativeFrameSlot nd_alloc_frame_slot(NativeDirectTarget* d, + const NativeFrameSlotDesc* desc) { + NativeFrameSlot slot = NATIVE_FRAME_SLOT_NONE; + if (d->ops && d->ops->alloc_frame_slot) + slot = d->ops->alloc_frame_slot(d, desc); + else if (d->native && d->native->frame_slot) + slot = d->native->frame_slot(d->native, desc); + else + nd_panic(d, "target does not allocate frame slots"); + if (slot == NATIVE_FRAME_SLOT_NONE) + nd_panic(d, "frame slot allocation failed"); + return slot; +} + +static NativeFrameSlotDesc nd_slot_desc_local(const CGLocalDesc* in) { + NativeFrameSlotDesc out; + memset(&out, 0, sizeof out); + out.type = in->type; + out.name = in->name; + out.loc = in->loc; + out.size = in->size; + out.align = in->align; + out.kind = NATIVE_FRAME_SLOT_LOCAL; + if (in->flags & CG_LOCAL_ADDR_TAKEN) + out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN; + if (in->flags & CG_LOCAL_MEMORY_REQUIRED) + out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED; + return out; +} + +static NativeFrameSlotDesc nd_slot_desc_param(const CGParamDesc* in) { + NativeFrameSlotDesc out; + memset(&out, 0, sizeof out); + out.type = in->type; + out.name = in->name; + out.loc = in->loc; + out.size = in->size; + out.align = in->align; + out.kind = NATIVE_FRAME_SLOT_PARAM; + if (in->flags & CG_LOCAL_ADDR_TAKEN) + out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN; + if (in->flags & CG_LOCAL_MEMORY_REQUIRED) + out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED; + return out; +} + +static CGLocal nd_alloc_local(NativeDirectTarget* d, const CGLocalDesc* desc) { + NativeDirectLocal* l; + NativeFrameSlotDesc fsd; + CGLocal id; + nd_grow_locals(d, d->nlocals + 1u); + id = d->nlocals + 1u; + l = &d->locals[d->nlocals++]; + memset(l, 0, sizeof *l); + l->type = desc->type; + l->size = desc->size; + l->align = desc->align; + l->flags = desc->flags; + l->reg = REG_NONE; + l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0; + l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0; + l->cls = (u8)nd_class_for_type(d, desc->type); + fsd = nd_slot_desc_local(desc); + l->home = nd_alloc_frame_slot(d, &fsd); + return id; +} + +static MCLabel nd_mc_label(NativeDirectTarget* d, Label label) { + if (label == LABEL_NONE || label > d->nlabels || !d->labels[label]) + nd_panic(d, "bad label"); + return d->labels[label]; +} + +static Label nd_label_new_raw(NativeDirectTarget* d) { + Label id; + if (!d->native || !d->native->label_new) + nd_panic(d, "target does not allocate labels"); + id = d->nlabels + 1u; + nd_grow_labels(d, id + 1u); + d->labels[id] = d->native->label_new(d->native); + d->nlabels = id; + return id; +} + +static NativeLoc nd_loc_frame(NativeDirectTarget* d, CGLocal local, + CfreeCgTypeId type) { + NativeDirectLocal* l = nd_local(d, local); + NativeLoc out; + memset(&out, 0, sizeof out); + out.kind = NATIVE_LOC_FRAME; + out.cls = l->cls; + out.type = type ? type : l->type; + out.v.frame = l->home; + return out; +} + +static NativeLoc nd_loc_imm(i64 imm, CfreeCgTypeId type) { + NativeLoc out; + memset(&out, 0, sizeof out); + out.kind = NATIVE_LOC_IMM; + out.type = type; + out.v.imm = imm; + return out; +} + +static NativeLoc nd_loc_global(ObjSymId sym, i64 addend, CfreeCgTypeId type) { + NativeLoc out; + memset(&out, 0, sizeof out); + out.kind = NATIVE_LOC_GLOBAL; + out.type = type; + out.v.global.sym = sym; + out.v.global.addend = addend; + return out; +} + +static NativeLoc nd_loc_operand(NativeDirectTarget* d, Operand op) { + switch ((OpKind)op.kind) { + case OPK_IMM: + return nd_loc_imm(op.v.imm, op.type); + case OPK_LOCAL: + return nd_loc_frame(d, op.v.local, op.type); + case OPK_GLOBAL: + return nd_loc_global(op.v.global.sym, op.v.global.addend, op.type); + case OPK_INDIRECT: { + NativeLoc out; + memset(&out, 0, sizeof out); + out.kind = NATIVE_LOC_ADDR; + out.type = op.type; + out.v.addr.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; + out.v.addr.base.frame = nd_local(d, op.v.ind.base)->home; + out.v.addr.cls = nd_local(d, op.v.ind.base)->cls; + out.v.addr.base_type = nd_local(d, op.v.ind.base)->type; + if (op.v.ind.index != CG_LOCAL_NONE) { + out.v.addr.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE; + out.v.addr.index.frame = nd_local(d, op.v.ind.index)->home; + out.v.addr.index_cls = nd_local(d, op.v.ind.index)->cls; + out.v.addr.index_type = nd_local(d, op.v.ind.index)->type; + } + out.v.addr.log2_scale = op.v.ind.log2_scale; + out.v.addr.offset = op.v.ind.ofs; + return out; + } + default: + nd_panic(d, "bad operand kind"); + } +} + +static NativeAddr nd_addr_storage(NativeDirectTarget* d, Operand op) { + NativeAddr out; + memset(&out, 0, sizeof out); + switch ((OpKind)op.kind) { + case OPK_LOCAL: + out.base_kind = NATIVE_ADDR_BASE_FRAME; + out.base.frame = nd_local(d, op.v.local)->home; + out.cls = nd_local(d, op.v.local)->cls; + out.base_type = nd_local(d, op.v.local)->type; + return out; + case OPK_GLOBAL: + out.base_kind = NATIVE_ADDR_BASE_GLOBAL; + out.base.global.sym = op.v.global.sym; + out.base.global.addend = op.v.global.addend; + out.base_type = op.type; + return out; + case OPK_INDIRECT: + out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; + out.base.frame = nd_local(d, op.v.ind.base)->home; + out.cls = nd_local(d, op.v.ind.base)->cls; + out.base_type = nd_local(d, op.v.ind.base)->type; + if (op.v.ind.index != CG_LOCAL_NONE) { + out.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE; + out.index.frame = nd_local(d, op.v.ind.index)->home; + out.index_cls = nd_local(d, op.v.ind.index)->cls; + out.index_type = nd_local(d, op.v.ind.index)->type; + } + out.log2_scale = op.v.ind.log2_scale; + out.offset = op.v.ind.ofs; + return out; + default: + nd_panic(d, "operand is not addressable storage"); + } +} + +static NativeAddr nd_addr_pointer(NativeDirectTarget* d, Operand op) { + NativeAddr out; + memset(&out, 0, sizeof out); + switch ((OpKind)op.kind) { + case OPK_LOCAL: + if (cg_type_is_ptr(d->base.c, op.type)) { + out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; + out.base.frame = nd_local(d, op.v.local)->home; + out.cls = nd_local(d, op.v.local)->cls; + out.base_type = nd_local(d, op.v.local)->type; + } else { + out.base_kind = NATIVE_ADDR_BASE_FRAME; + out.base.frame = nd_local(d, op.v.local)->home; + out.cls = nd_local(d, op.v.local)->cls; + out.base_type = nd_local(d, op.v.local)->type; + } + return out; + case OPK_GLOBAL: + out.base_kind = NATIVE_ADDR_BASE_GLOBAL; + out.base.global.sym = op.v.global.sym; + out.base.global.addend = op.v.global.addend; + out.base_type = op.type; + return out; + case OPK_INDIRECT: + return nd_addr_storage(d, op); + default: + nd_panic(d, "operand is not a pointer address"); + } +} + +#define ND_REQUIRE_NATIVE(d, member, name) \ + do { \ + if (!(d)->native || !(d)->native->member) nd_panic((d), (name)); \ + } while (0) + +typedef struct NdAddrTemps { + Reg base; + Reg index; + NativeAllocClass base_cls; + NativeAllocClass index_cls; +} NdAddrTemps; + +static void nd_addr_temps_release(NativeDirectTarget* d, + const NdAddrTemps* temps); + +static MemAccess nd_scalar_mem(CfreeCgTypeId type, u32 size, u32 align) { + MemAccess mem; + memset(&mem, 0, sizeof mem); + mem.type = type; + mem.size = size; + mem.align = align; + return mem; +} + +static MemAccess nd_type_mem(NativeDirectTarget* d, CfreeCgTypeId type) { + u64 size; + if (!type) type = builtin_id(CFREE_CG_BUILTIN_I64); + size = cg_type_size(d->base.c, type); + if (size > 0xffffffffu) nd_panic(d, "scalar type is too large"); + return nd_scalar_mem(type, (u32)size, cg_type_align(d->base.c, type)); +} + +static void nd_barrier(NativeDirectTarget* d, u32 flags) { + if (d->ops && d->ops->barrier) d->ops->barrier(d, flags); +} + +static void nd_load_frame_to_reg(NativeDirectTarget* d, NativeLoc dst, + NativeFrameSlot frame, CfreeCgTypeId type) { + NativeAddr addr; + MemAccess mem; + memset(&addr, 0, sizeof addr); + addr.base_kind = NATIVE_ADDR_BASE_FRAME; + addr.base.frame = frame; + addr.base_type = type; + mem = nd_type_mem(d, type); + ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); + d->native->load(d->native, dst, addr, mem); +} + +static void nd_store_reg_to_frame(NativeDirectTarget* d, NativeFrameSlot frame, + CfreeCgTypeId type, NativeLoc src) { + NativeAddr addr; + MemAccess mem; + memset(&addr, 0, sizeof addr); + addr.base_kind = NATIVE_ADDR_BASE_FRAME; + addr.base.frame = frame; + addr.base_type = type; + mem = nd_type_mem(d, type); + ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); + d->native->store(d->native, addr, src, mem); +} + +static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, NativeLoc src); +static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc); + +static NativeAddr nd_addr_materialize(NativeDirectTarget* d, NativeAddr in, + NdAddrTemps* temps, MemAccess mem) { + NativeAddr out = in; + memset(temps, 0, sizeof *temps); + temps->base = REG_NONE; + temps->index = REG_NONE; + if (out.base_kind == NATIVE_ADDR_BASE_FRAME_VALUE) { + NativeAllocClass cls = (NativeAllocClass)out.cls; + Reg r = nd_scratch_acquire(d, cls); + NativeLoc dst = nd_reg_loc(r, cls, out.base_type); + nd_load_frame_to_reg(d, dst, out.base.frame, out.base_type); + out.base_kind = NATIVE_ADDR_BASE_REG; + out.base.reg = r; + temps->base = r; + temps->base_cls = cls; + } + if (out.index_kind == NATIVE_ADDR_INDEX_FRAME_VALUE) { + NativeAllocClass cls = (NativeAllocClass)out.index_cls; + Reg r = nd_scratch_acquire(d, cls); + NativeLoc dst = nd_reg_loc(r, cls, out.index_type); + nd_load_frame_to_reg(d, dst, out.index.frame, out.index_type); + out.index_kind = NATIVE_ADDR_INDEX_REG; + out.index.reg = r; + temps->index = r; + temps->index_cls = cls; + } + if ((d->ops && d->ops->addr_legal && !d->ops->addr_legal(d, &out, mem)) || + ((!d->ops || !d->ops->addr_legal) && d->native && d->native->addr_legal && + !d->native->addr_legal(d->native, &out, mem))) { + NativeAllocClass cls = NATIVE_REG_INT; + Reg r = nd_scratch_acquire(d, cls); + NativeLoc dst = nd_reg_loc( + r, cls, + out.base_type ? out.base_type : builtin_id(CFREE_CG_BUILTIN_I64)); + ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); + d->native->load_addr(d->native, dst, out); + nd_addr_temps_release(d, temps); + memset(temps, 0, sizeof *temps); + temps->base = r; + temps->index = REG_NONE; + temps->base_cls = cls; + memset(&out, 0, sizeof out); + out.base_kind = NATIVE_ADDR_BASE_REG; + out.base.reg = r; + out.cls = (u8)cls; + out.base_type = dst.type; + if ((d->ops && d->ops->addr_legal && !d->ops->addr_legal(d, &out, mem)) || + ((!d->ops || !d->ops->addr_legal) && d->native && + d->native->addr_legal && !d->native->addr_legal(d->native, &out, mem))) + nd_panic(d, "native address is not legal"); + } + return out; +} + +static void nd_addr_temps_release(NativeDirectTarget* d, + const NdAddrTemps* temps) { + if (temps->base != REG_NONE) + nd_scratch_release(d, temps->base_cls, temps->base); + if (temps->index != REG_NONE) + nd_scratch_release(d, temps->index_cls, temps->index); +} + +static NativeLoc nd_materialize_loc(NativeDirectTarget* d, NativeLoc src, + NativeAllocClass cls, CfreeCgTypeId type) { + Reg r; + NativeLoc dst; + if (src.kind == NATIVE_LOC_REG) return src; + r = nd_scratch_acquire(d, cls); + dst = nd_reg_loc(r, cls, type ? type : src.type); + nd_copy_to_reg(d, dst, src); + return dst; +} + +static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, + NativeLoc src) { + if (dst.kind != NATIVE_LOC_REG) nd_panic(d, "copy destination is not a reg"); + switch ((NativeLocKind)src.kind) { + case NATIVE_LOC_REG: + if (src.v.reg != dst.v.reg || src.cls != dst.cls) { + ND_REQUIRE_NATIVE(d, move, "target does not emit register moves"); + d->native->move(d->native, dst, src); + } + break; + case NATIVE_LOC_FRAME: + nd_load_frame_to_reg(d, dst, src.v.frame, dst.type); + break; + case NATIVE_LOC_STACK: { + NativeAddr addr; + MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, + d->base.c->target.ptr_align); + memset(&addr, 0, sizeof addr); + addr.base_kind = NATIVE_ADDR_BASE_FRAME; + addr.base.frame = src.v.stack.slot; + addr.base_type = dst.type; + addr.offset = src.v.stack.offset; + ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); + d->native->load(d->native, dst, addr, mem); + break; + } + case NATIVE_LOC_IMM: + ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates"); + d->native->load_imm(d->native, dst, src.v.imm); + break; + case NATIVE_LOC_GLOBAL: { + NativeAddr addr; + memset(&addr, 0, sizeof addr); + addr.base_kind = NATIVE_ADDR_BASE_GLOBAL; + addr.base.global.sym = src.v.global.sym; + addr.base.global.addend = src.v.global.addend; + addr.base_type = dst.type; + ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); + d->native->load_addr(d->native, dst, addr); + break; + } + case NATIVE_LOC_ADDR: { + NdAddrTemps temps; + MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, + d->base.c->target.ptr_align); + NativeAddr addr = nd_addr_materialize(d, src.v.addr, &temps, mem); + ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); + d->native->load_addr(d->native, dst, addr); + nd_addr_temps_release(d, &temps); + break; + } + default: + nd_panic(d, "cannot materialize native location"); + } +} + +static void nd_write_loc(NativeDirectTarget* d, NativeLoc dst, NativeLoc src, + MemAccess mem) { + switch ((NativeLocKind)dst.kind) { + case NATIVE_LOC_REG: + nd_copy_to_reg(d, dst, src); + break; + case NATIVE_LOC_FRAME: { + NativeLoc val = + nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type); + nd_store_reg_to_frame(d, dst.v.frame, dst.type, val); + nd_release_materialized(d, val); + break; + } + case NATIVE_LOC_STACK: { + NativeAddr addr; + NativeLoc val = + nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type); + memset(&addr, 0, sizeof addr); + addr.base_kind = NATIVE_ADDR_BASE_FRAME; + addr.base.frame = dst.v.stack.slot; + addr.base_type = dst.type; + addr.offset = dst.v.stack.offset; + ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); + d->native->store(d->native, addr, val, mem); + nd_release_materialized(d, val); + break; + } + case NATIVE_LOC_ADDR: { + NdAddrTemps temps; + NativeAddr addr = nd_addr_materialize(d, dst.v.addr, &temps, mem); + NativeAllocClass cls = nd_class_for_type(d, src.type); + NativeLoc val = nd_materialize_loc(d, src, cls, src.type); + ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); + d->native->store(d->native, addr, val, mem); + nd_release_materialized(d, val); + nd_addr_temps_release(d, &temps); + break; + } + default: + nd_panic(d, "unsupported write destination"); + } +} + +static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc) { + if (loc.kind == NATIVE_LOC_REG) + nd_scratch_release(d, (NativeAllocClass)loc.cls, loc.v.reg); +} + +static NativeLoc nd_materialize_operand(NativeDirectTarget* d, Operand op) { + NativeAllocClass cls = nd_class_for_type(d, op.type); + return nd_materialize_loc(d, nd_loc_operand(d, op), cls, op.type); +} + +static NativeLoc nd_dst_scratch(NativeDirectTarget* d, Operand dst) { + NativeAllocClass cls = nd_class_for_type(d, dst.type); + Reg r = nd_scratch_acquire(d, cls); + return nd_reg_loc(r, cls, dst.type); +} + +static void nd_store_operand_from_reg(NativeDirectTarget* d, Operand dst, + NativeLoc src) { + if (dst.kind != OPK_LOCAL) nd_panic(d, "destination is not a semantic local"); + nd_store_reg_to_frame(d, nd_local(d, dst.v.local)->home, dst.type, src); +} + +static void nd_func_begin(CgTarget* t, const CGFuncDesc* fd) { + NativeDirectTarget* d = nd_of(t); + d->func = fd; + d->nlocals = 0; + d->nlabels = 0; + d->nscopes = 0; + d->max_outgoing = 0; + memset(d->scratch_used, 0, sizeof d->scratch_used); + if (d->ops && d->ops->func_begin) d->ops->func_begin(d, fd); + if (d->native && d->native->func_begin) d->native->func_begin(d->native, fd); +} + +static void nd_func_end(CgTarget* t) { + NativeDirectTarget* d = nd_of(t); + NativeFramePatchState frame; + memset(&frame, 0, sizeof frame); + frame.max_outgoing = d->max_outgoing; + if (d->native && d->native->note_frame_state) + d->native->note_frame_state(d->native, &frame); + if (d->native && d->native->patch_apply) d->native->patch_apply(d->native); + if (d->ops && d->ops->func_end) d->ops->func_end(d); + if (d->native && d->native->func_end) d->native->func_end(d->native); + d->func = NULL; +} + +static void nd_alias(CgTarget* t, ObjSymId alias_sym, ObjSymId target_sym, + CfreeCgTypeId type) { + (void)t; + (void)alias_sym; + (void)target_sym; + (void)type; +} + +static CGLocal nd_local_new(CgTarget* t, const CGLocalDesc* desc) { + return nd_alloc_local(nd_of(t), desc); +} + +static void nd_local_addr(CgTarget* t, Operand dst, const CGLocalDesc* desc, + CGLocal local) { + NativeDirectTarget* d = nd_of(t); + NativeDirectLocal* l = nd_local(d, local); + Operand lv; + (void)desc; + l->address_taken = 1; + l->flags |= CG_LOCAL_ADDR_TAKEN; + memset(&lv, 0, sizeof lv); + lv.kind = OPK_LOCAL; + lv.type = l->type; + lv.v.local = local; + { + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); + d->native->load_addr(d->native, reg, nd_addr_storage(d, lv)); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); + } +} + +static CGLocal nd_param(CgTarget* t, const CGParamDesc* desc) { + NativeDirectTarget* d = nd_of(t); + NativeDirectLocal* l; + NativeFrameSlotDesc fsd; + CGLocal id; + nd_grow_locals(d, d->nlocals + 1u); + id = d->nlocals + 1u; + l = &d->locals[d->nlocals++]; + memset(l, 0, sizeof *l); + l->type = desc->type; + l->size = desc->size; + l->align = desc->align; + l->flags = desc->flags; + l->reg = REG_NONE; + l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0; + l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0; + l->cls = (u8)nd_class_for_type(d, desc->type); + fsd = nd_slot_desc_param(desc); + l->home = nd_alloc_frame_slot(d, &fsd); + if (d->ops && d->ops->bind_param) d->ops->bind_param(d, desc, id, l); + return id; +} + +static Label nd_label_new(CgTarget* t) { return nd_label_new_raw(nd_of(t)); } + +static void nd_label_place(CgTarget* t, Label label) { + NativeDirectTarget* d = nd_of(t); + ND_REQUIRE_NATIVE(d, label_place, "target does not place labels"); + d->native->label_place(d->native, nd_mc_label(d, label)); +} + +static void nd_jump(CgTarget* t, Label label) { + NativeDirectTarget* d = nd_of(t); + ND_REQUIRE_NATIVE(d, jump, "target does not emit jumps"); + d->native->jump(d->native, nd_mc_label(d, label)); +} + +static void nd_cmp_branch(CgTarget* t, CmpOp op, Operand a, Operand b, + Label label) { + NativeDirectTarget* d = nd_of(t); + NativeLoc ar = nd_materialize_operand(d, a); + NativeLoc br = nd_materialize_operand(d, b); + ND_REQUIRE_NATIVE(d, cmp_branch, "target does not emit compare branches"); + d->native->cmp_branch(d->native, op, ar, br, nd_mc_label(d, label)); + nd_release_materialized(d, br); + nd_release_materialized(d, ar); +} + +static void nd_switch(CgTarget* t, const CGSwitchDesc* desc) { + cg_lower_switch_default(t, desc); +} + +static void nd_indirect_branch(CgTarget* t, Operand addr, + const Label* valid_targets, u32 ntargets) { + NativeDirectTarget* d = nd_of(t); + MCLabel* native_targets; + NativeLoc addr_reg = nd_materialize_operand(d, addr); + ND_REQUIRE_NATIVE(d, indirect_branch, + "target does not emit indirect branches"); + native_targets = ntargets ? nd_arena(d, sizeof(*native_targets) * ntargets, + _Alignof(MCLabel)) + : NULL; + for (u32 i = 0; i < ntargets; ++i) + native_targets[i] = nd_mc_label(d, valid_targets[i]); + d->native->indirect_branch(d->native, addr_reg, native_targets, ntargets); + nd_release_materialized(d, addr_reg); +} + +static void nd_load_label_addr(CgTarget* t, Operand dst, Label label) { + NativeDirectTarget* d = nd_of(t); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load_label_addr, + "target does not materialize label addresses"); + d->native->load_label_addr(d->native, reg, nd_mc_label(d, label)); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); +} + +static int nd_local_static_data_begin(CgTarget* t, + const CGLocalStaticDataDesc* desc) { + NativeDirectTarget* d = nd_of(t); + Sym name; + SecKind kind; + u16 flags; + if (!d->native || !d->native->mc || !desc) return 0; + if (d->local_static_active) nd_panic(d, "nested local static data"); + if (desc->attrs.section) { + name = (Sym)desc->attrs.section; + kind = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA + : SEC_DATA; + flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SF_ALLOC + : (SF_ALLOC | SF_WRITE); + } else if (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) { + name = pool_intern_slice(t->c->global, SLICE_LIT(".rodata")); + kind = SEC_RODATA; + flags = SF_ALLOC; + } else { + name = pool_intern_slice(t->c->global, SLICE_LIT(".data")); + kind = SEC_DATA; + flags = SF_ALLOC | SF_WRITE; + } + d->local_static_sec = + obj_section(t->obj, name, kind, flags, desc->align ? desc->align : 1u); + d->local_static_base = + obj_align_to(t->obj, d->local_static_sec, desc->align ? desc->align : 1u); + d->local_static_size = 0; + d->local_static_sym = desc->sym; + d->local_static_active = 1; + return 1; +} + +static void nd_local_static_data_write(CgTarget* t, const u8* data, u64 len) { + NativeDirectTarget* d = nd_of(t); + u8 zero[64]; + u64 orig_len = len; + if (!d->local_static_active || !len) return; + if (data) { + obj_write(t->obj, d->local_static_sec, data, (size_t)len); + } else { + memset(zero, 0, sizeof zero); + while (len >= sizeof zero) { + obj_write(t->obj, d->local_static_sec, zero, sizeof zero); + len -= sizeof zero; + } + if (len) obj_write(t->obj, d->local_static_sec, zero, (size_t)len); + } + d->local_static_size += (u32)orig_len; +} + +static void nd_local_static_data_label_addr(CgTarget* t, Label target, + i64 addend, u32 width, + u32 address_space) { + NativeDirectTarget* d = nd_of(t); + u32 off; + u8 zero[8]; + (void)width; + (void)address_space; + if (!d->local_static_active) nd_panic(d, "label address outside local static data"); + if (width != 8u) nd_panic(d, "unsupported local static label address width"); + memset(zero, 0, sizeof zero); + off = d->local_static_base + d->local_static_size; + obj_write(t->obj, d->local_static_sec, zero, width); + d->native->mc->emit_label_data_reloc(d->native->mc, d->local_static_sec, off, + nd_mc_label(d, target), R_ABS64, width, + addend); + d->local_static_size += width; +} + +static void nd_local_static_data_end(CgTarget* t) { + NativeDirectTarget* d = nd_of(t); + if (!d->local_static_active) return; + obj_symbol_define(t->obj, d->local_static_sym, d->local_static_sec, + d->local_static_base, d->local_static_size); + d->local_static_active = 0; + d->local_static_sec = OBJ_SEC_NONE; + d->local_static_sym = OBJ_SYM_NONE; + d->local_static_base = 0; + d->local_static_size = 0; +} + +static const char* nd_data_label_addr_unsupported_msg(CgTarget* t) { + (void)t; + return NULL; +} + +static CGScope nd_scope_begin(CgTarget* t, const CGScopeDesc* desc) { + NativeDirectTarget* d = nd_of(t); + NativeDirectScope* s; + CGScope id; + nd_grow_scopes(d, d->nscopes + 1u); + id = d->nscopes + 1u; + s = &d->scopes[d->nscopes++]; + memset(s, 0, sizeof *s); + s->kind = desc->kind; + s->owns_break = desc->break_label == LABEL_NONE; + s->break_label = desc->break_label ? desc->break_label : nd_label_new_raw(d); + s->continue_label = desc->continue_label; + if (desc->kind == SCOPE_LOOP && s->continue_label == LABEL_NONE) + s->continue_label = nd_label_new_raw(d); + if (desc->kind == SCOPE_IF) { + Operand zero; + s->else_label = nd_label_new_raw(d); + s->end_label = nd_label_new_raw(d); + memset(&zero, 0, sizeof zero); + zero.kind = OPK_IMM; + zero.type = desc->cond.type; + zero.v.imm = 0; + nd_cmp_branch(t, CMP_EQ, desc->cond, zero, s->else_label); + } + return id; +} + +static NativeDirectScope* nd_scope(NativeDirectTarget* d, CGScope scope) { + if (scope == CG_SCOPE_NONE || scope > d->nscopes) nd_panic(d, "bad scope"); + return &d->scopes[scope - 1u]; +} + +static void nd_scope_else(CgTarget* t, CGScope scope) { + NativeDirectTarget* d = nd_of(t); + NativeDirectScope* s = nd_scope(d, scope); + if (s->kind != SCOPE_IF) nd_panic(d, "scope_else on non-if scope"); + s->has_else = 1; + nd_jump(t, s->end_label); + nd_label_place(t, s->else_label); +} + +static void nd_scope_end(CgTarget* t, CGScope scope) { + NativeDirectTarget* d = nd_of(t); + NativeDirectScope* s = nd_scope(d, scope); + if (s->kind == SCOPE_IF) { + if (!s->has_else) nd_label_place(t, s->else_label); + nd_label_place(t, s->end_label); + } + if (s->owns_break) nd_label_place(t, s->break_label); +} + +static void nd_break_to(CgTarget* t, CGScope scope) { + nd_jump(t, nd_scope(nd_of(t), scope)->break_label); +} + +static void nd_continue_to(CgTarget* t, CGScope scope) { + NativeDirectScope* s = nd_scope(nd_of(t), scope); + if (s->continue_label == LABEL_NONE) + nd_panic(nd_of(t), "continue_to on scope without continue label"); + nd_jump(t, s->continue_label); +} + +static void nd_load_imm(CgTarget* t, Operand dst, i64 imm) { + NativeDirectTarget* d = nd_of(t); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates"); + d->native->load_imm(d->native, reg, imm); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); +} + +static void nd_load_const(CgTarget* t, Operand dst, ConstBytes cbytes) { + NativeDirectTarget* d = nd_of(t); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load_const, "target does not emit byte constants"); + d->native->load_const(d->native, reg, cbytes); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); +} + +static void nd_copy(CgTarget* t, Operand dst, Operand src) { + NativeDirectTarget* d = nd_of(t); + u64 size = dst.type ? cg_type_size(t->c, dst.type) : 0; + if (size > (u64)t->c->target.ptr_size) { + NdAddrTemps dt, st; + AggregateAccess access; + memset(&access, 0, sizeof access); + access.type = dst.type; + access.size = (u32)size; + access.align = dst.type ? cg_type_align(t->c, dst.type) + : (u32)t->c->target.ptr_align; + access.mem.type = dst.type; + access.mem.size = access.size; + access.mem.align = access.align; + NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, + access.mem); + NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st, + access.mem); + ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); + d->native->copy_bytes(d->native, da, sa, access); + nd_addr_temps_release(d, &st); + nd_addr_temps_release(d, &dt); + return; + } + NativeLoc val = nd_materialize_operand(d, src); + nd_store_operand_from_reg(d, dst, val); + nd_release_materialized(d, val); +} + +static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); + if (mem.flags & MF_VOLATILE) + nd_barrier(d, + NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem); + if (size > (u64)t->c->target.ptr_size) { + NdAddrTemps dt; + AggregateAccess access; + memset(&access, 0, sizeof access); + access.type = mem.type ? mem.type : dst.type; + access.size = (u32)size; + access.align = mem.align; + access.mem = mem; + NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, mem); + ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); + d->native->copy_bytes(d->native, da, naddr, access); + nd_addr_temps_release(d, &dt); + nd_addr_temps_release(d, &temps); + return; + } + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); + d->native->load(d->native, reg, naddr, mem); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); + nd_addr_temps_release(d, &temps); +} + +static void nd_store(CgTarget* t, Operand addr, Operand src, MemAccess mem) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); + if (mem.flags & MF_VOLATILE) + nd_barrier(d, + NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem); + if (size > (u64)t->c->target.ptr_size) { + NdAddrTemps st; + AggregateAccess access; + memset(&access, 0, sizeof access); + access.type = mem.type ? mem.type : src.type; + access.size = (u32)size; + access.align = mem.align; + access.mem = mem; + NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st, mem); + ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); + d->native->copy_bytes(d->native, naddr, sa, access); + nd_addr_temps_release(d, &st); + nd_addr_temps_release(d, &temps); + return; + } + NativeLoc val = nd_materialize_operand(d, src); + ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); + d->native->store(d->native, naddr, val, mem); + nd_release_materialized(d, val); + nd_addr_temps_release(d, &temps); +} + +static void nd_addr_of(CgTarget* t, Operand dst, Operand lv) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, + d->base.c->target.ptr_align); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_storage(d, lv), &temps, mem); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); + d->native->load_addr(d->native, reg, naddr); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); + nd_addr_temps_release(d, &temps); +} + +static void nd_tls_addr_of(CgTarget* t, Operand dst, ObjSymId sym, i64 addend) { + NativeDirectTarget* d = nd_of(t); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, tls_addr_of, + "target does not materialize TLS addresses"); + d->native->tls_addr_of(d->native, reg, sym, addend); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); +} + +static void nd_copy_bytes(CgTarget* t, Operand dst_addr, Operand src_addr, + AggregateAccess access) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps dt, st; + NativeAddr dst = + nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &dt, access.mem); + NativeAddr src = + nd_addr_materialize(d, nd_addr_pointer(d, src_addr), &st, access.mem); + ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); + d->native->copy_bytes(d->native, dst, src, access); + nd_addr_temps_release(d, &st); + nd_addr_temps_release(d, &dt); +} + +static void nd_set_bytes(CgTarget* t, Operand dst_addr, Operand byte_value, + AggregateAccess access) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + NativeAddr dst = + nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &temps, access.mem); + NativeLoc byte = nd_materialize_operand(d, byte_value); + ND_REQUIRE_NATIVE(d, set_bytes, "target does not set bytes"); + d->native->set_bytes(d->native, dst, byte, access); + nd_release_materialized(d, byte); + nd_addr_temps_release(d, &temps); +} + +static void nd_bitfield_load(CgTarget* t, Operand dst, Operand record_addr, + BitFieldAccess access) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + NativeAddr addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), + &temps, access.storage); + NativeLoc reg = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, bitfield_load, "target does not load bitfields"); + d->native->bitfield_load(d->native, reg, addr, access); + nd_store_operand_from_reg(d, dst, reg); + nd_release_materialized(d, reg); + nd_addr_temps_release(d, &temps); +} + +static void nd_bitfield_store(CgTarget* t, Operand record_addr, Operand src, + BitFieldAccess access) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + NativeAddr addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), + &temps, access.storage); + NativeLoc val = nd_materialize_operand(d, src); + ND_REQUIRE_NATIVE(d, bitfield_store, "target does not store bitfields"); + d->native->bitfield_store(d->native, addr, val, access); + nd_release_materialized(d, val); + nd_addr_temps_release(d, &temps); +} + +static void nd_binop(CgTarget* t, BinOp op, Operand dst, Operand a, Operand b) { + NativeDirectTarget* d = nd_of(t); + NativeLoc ar = nd_materialize_operand(d, a); + NativeLoc br = nd_materialize_operand(d, b); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, binop, "target does not emit binary ops"); + d->native->binop(d->native, op, dr, ar, br); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, br); + nd_release_materialized(d, ar); +} + +static void nd_unop(CgTarget* t, UnOp op, Operand dst, Operand a) { + NativeDirectTarget* d = nd_of(t); + NativeLoc ar = nd_materialize_operand(d, a); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, unop, "target does not emit unary ops"); + d->native->unop(d->native, op, dr, ar); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, ar); +} + +static void nd_cmp(CgTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { + NativeDirectTarget* d = nd_of(t); + NativeLoc ar = nd_materialize_operand(d, a); + NativeLoc br = nd_materialize_operand(d, b); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, cmp, "target does not emit compares"); + d->native->cmp(d->native, op, dr, ar, br); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, br); + nd_release_materialized(d, ar); +} + +static void nd_convert(CgTarget* t, ConvKind op, Operand dst, Operand src) { + NativeDirectTarget* d = nd_of(t); + NativeLoc sr = nd_materialize_operand(d, src); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, convert, "target does not emit converts"); + d->native->convert(d->native, op, dr, sr); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, sr); +} + +static void nd_call(CgTarget* t, const CGCallDesc* desc) { + NativeDirectTarget* d = nd_of(t); + NativeCallPlan plan; + NativeCallDesc nd; + NativeLoc* args; + NativeLoc* results; + int release_callee = 0; + nd_barrier(d, NATIVE_DIRECT_BARRIER_CALL | NATIVE_DIRECT_BARRIER_MEMORY); + memset(&plan, 0, sizeof plan); + memset(&nd, 0, sizeof nd); + args = desc->nargs + ? nd_arena(d, sizeof(*args) * desc->nargs, _Alignof(NativeLoc)) + : NULL; + results = desc->nresults ? nd_arena(d, sizeof(*results) * desc->nresults, + _Alignof(NativeLoc)) + : NULL; + for (u32 i = 0; i < desc->nargs; ++i) + args[i] = nd_loc_frame(d, desc->args[i], 0); + for (u32 i = 0; i < desc->nresults; ++i) + results[i] = nd_loc_frame(d, desc->results[i], 0); + nd.fn_type = desc->fn_type; + nd.callee = nd_loc_operand(d, desc->callee); + nd.args = args; + nd.results = results; + nd.nargs = desc->nargs; + nd.nresults = desc->nresults; + nd.flags = desc->flags; + nd.tail_policy = desc->tail_policy; + nd.inline_policy = desc->inline_policy; + + if (d->ops && d->ops->plan_call) + d->ops->plan_call(d, &nd, &plan); + else { + ND_REQUIRE_NATIVE(d, plan_call, "target does not plan calls"); + d->native->plan_call(d->native, &nd, &plan); + } + if (plan.stack_arg_size > d->max_outgoing) + d->max_outgoing = plan.stack_arg_size; + for (u32 i = 0; i < plan.nargs; ++i) + nd_write_loc(d, plan.args[i].dst, plan.args[i].src, plan.args[i].mem); + if (plan.callee.kind == NATIVE_LOC_FRAME) { + NativeLoc callee = nd_materialize_loc( + d, plan.callee, (NativeAllocClass)plan.callee.cls, plan.callee.type); + plan.callee = callee; + release_callee = 1; + } + if (d->ops && d->ops->emit_call) + d->ops->emit_call(d, &plan); + else { + ND_REQUIRE_NATIVE(d, emit_call, "target does not emit calls"); + d->native->emit_call(d->native, &plan); + } + for (u32 i = 0; i < plan.nrets; ++i) + nd_write_loc(d, plan.rets[i].dst, plan.rets[i].src, plan.rets[i].mem); + if (release_callee) + nd_scratch_release(d, (NativeAllocClass)plan.callee.cls, plan.callee.v.reg); +} + +static const char* nd_tail_call_unrealizable_reason(CgTarget* t, + const CGCallDesc* desc) { + NativeDirectTarget* d = nd_of(t); + if (d->ops && d->ops->tail_call_unrealizable_reason) + return d->ops->tail_call_unrealizable_reason(d, desc); + return "target does not expose direct tail-call lowering"; +} + +static void nd_ret(CgTarget* t, const CGLocal* values, u32 nvalues) { + NativeDirectTarget* d = nd_of(t); + NativeLoc* locs = NULL; + NativeCallPlanRet* rets = NULL; + u32 nrets = 0; + if (d->ops && d->ops->emit_ret) { + d->ops->emit_ret(d, values, nvalues); + return; + } + locs = nvalues ? nd_arena(d, sizeof(*locs) * nvalues, _Alignof(NativeLoc)) + : NULL; + for (u32 i = 0; i < nvalues; ++i) locs[i] = nd_loc_frame(d, values[i], 0); + ND_REQUIRE_NATIVE(d, plan_ret, "target does not plan returns"); + d->native->plan_ret(d->native, d->func, locs, nvalues, &rets, &nrets); + for (u32 i = 0; i < nrets; ++i) + nd_write_loc(d, rets[i].dst, rets[i].src, rets[i].mem); + ND_REQUIRE_NATIVE(d, ret, "target does not emit returns"); + d->native->ret(d->native); +} + +static void nd_alloca(CgTarget* t, Operand dst, Operand size, u32 align) { + NativeDirectTarget* d = nd_of(t); + NativeLoc sr = nd_materialize_operand(d, size); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, alloca_, "target does not emit alloca"); + d->native->alloca_(d->native, dr, sr, align); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, sr); +} + +static void nd_va_start(CgTarget* t, Operand ap_addr) { + NativeDirectTarget* d = nd_of(t); + if (!d->ops || !d->ops->va_start_) + nd_panic(d, "target does not emit va_start"); + d->ops->va_start_(d, ap_addr); +} + +static void nd_va_arg(CgTarget* t, Operand dst, Operand ap_addr, + CfreeCgTypeId type) { + NativeDirectTarget* d = nd_of(t); + if (!d->ops || !d->ops->va_arg_) nd_panic(d, "target does not emit va_arg"); + d->ops->va_arg_(d, dst, ap_addr, type); +} + +static void nd_va_end(CgTarget* t, Operand ap_addr) { + NativeDirectTarget* d = nd_of(t); + if (!d->ops || !d->ops->va_end_) nd_panic(d, "target does not emit va_end"); + d->ops->va_end_(d, ap_addr); +} + +static void nd_va_copy(CgTarget* t, Operand dst_ap_addr, Operand src_ap_addr) { + NativeDirectTarget* d = nd_of(t); + if (!d->ops || !d->ops->va_copy_) nd_panic(d, "target does not emit va_copy"); + d->ops->va_copy_(d, dst_ap_addr, src_ap_addr); +} + +static void nd_atomic_load(CgTarget* t, Operand dst, Operand addr, + MemAccess mem, MemOrder order) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, atomic_load, "target does not emit atomic loads"); + d->native->atomic_load(d->native, dr, naddr, mem, order); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_addr_temps_release(d, &temps); +} + +static void nd_atomic_store(CgTarget* t, Operand addr, Operand src, + MemAccess mem, MemOrder order) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); + NativeLoc sr = nd_materialize_operand(d, src); + ND_REQUIRE_NATIVE(d, atomic_store, "target does not emit atomic stores"); + d->native->atomic_store(d->native, naddr, sr, mem, order); + nd_release_materialized(d, sr); + nd_addr_temps_release(d, &temps); +} + +static void nd_atomic_rmw(CgTarget* t, AtomicOp op, Operand dst, Operand addr, + Operand val, MemAccess mem, MemOrder order) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); + NativeLoc vr = nd_materialize_operand(d, val); + NativeLoc dr = nd_dst_scratch(d, dst); + ND_REQUIRE_NATIVE(d, atomic_rmw, "target does not emit atomic rmw"); + d->native->atomic_rmw(d->native, op, dr, naddr, vr, mem, order); + nd_store_operand_from_reg(d, dst, dr); + nd_release_materialized(d, dr); + nd_release_materialized(d, vr); + nd_addr_temps_release(d, &temps); +} + +static void nd_atomic_cas(CgTarget* t, Operand prior, Operand ok, Operand addr, + Operand expected, Operand desired, MemAccess mem, + MemOrder success, MemOrder failure) { + NativeDirectTarget* d = nd_of(t); + NdAddrTemps temps; + nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); + NativeAddr naddr = + nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); + NativeLoc er = nd_materialize_operand(d, expected); + NativeLoc dr = nd_materialize_operand(d, desired); + NativeLoc pr = nd_dst_scratch(d, prior); + NativeLoc kr = nd_dst_scratch(d, ok); + ND_REQUIRE_NATIVE(d, atomic_cas, + "target does not emit atomic compare-exchange"); + d->native->atomic_cas(d->native, pr, kr, naddr, er, dr, mem, success, + failure); + nd_store_operand_from_reg(d, prior, pr); + nd_store_operand_from_reg(d, ok, kr); + nd_release_materialized(d, kr); + nd_release_materialized(d, pr); + nd_release_materialized(d, dr); + nd_release_materialized(d, er); + nd_addr_temps_release(d, &temps); +} + +static void nd_fence(CgTarget* t, MemOrder order) { + NativeDirectTarget* d = nd_of(t); + ND_REQUIRE_NATIVE(d, fence, "target does not emit fences"); + d->native->fence(d->native, order); +} + +static void nd_intrinsic(CgTarget* t, IntrinKind kind, Operand* dsts, u32 ndst, + const Operand* args, u32 narg) { + NativeDirectTarget* d = nd_of(t); + NativeLoc* ndsts = + ndst ? nd_arena(d, sizeof(*ndsts) * ndst, _Alignof(NativeLoc)) : NULL; + NativeLoc* nargs = + narg ? nd_arena(d, sizeof(*nargs) * narg, _Alignof(NativeLoc)) : NULL; + ND_REQUIRE_NATIVE(d, intrinsic, "target does not emit compiler intrinsics"); + for (u32 i = 0; i < ndst; ++i) ndsts[i] = nd_dst_scratch(d, dsts[i]); + for (u32 i = 0; i < narg; ++i) { + nargs[i] = args[i].kind == OPK_IMM ? nd_loc_operand(d, args[i]) + : nd_materialize_operand(d, args[i]); + } + d->native->intrinsic(d->native, kind, ndsts, ndst, nargs, narg); + for (u32 i = 0; i < ndst; ++i) { + nd_store_operand_from_reg(d, dsts[i], ndsts[i]); + nd_release_materialized(d, ndsts[i]); + } + for (u32 i = 0; i < narg; ++i) nd_release_materialized(d, nargs[i]); +} + +static void nd_asm_block(CgTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 nout, Operand* out_ops, + const AsmConstraint* ins, u32 nin, + const Operand* in_ops, const Sym* clobbers, + u32 nclob) { + NativeDirectTarget* d = nd_of(t); + nd_barrier(d, + NATIVE_DIRECT_BARRIER_INLINE_ASM | NATIVE_DIRECT_BARRIER_MEMORY); + if (d->ops && d->ops->asm_block) { + d->ops->asm_block(d, tmpl, outs, nout, out_ops, ins, nin, in_ops, clobbers, + nclob); + return; + } + nd_panic(d, "target does not emit inline asm"); +} + +static void nd_file_scope_asm(CgTarget* t, const char* src, size_t len) { + NativeDirectTarget* d = nd_of(t); + ND_REQUIRE_NATIVE(d, file_scope_asm, "target does not emit file-scope asm"); + d->native->file_scope_asm(d->native, src, len); +} + +static void nd_set_loc(CgTarget* t, SrcLoc loc) { + NativeDirectTarget* d = nd_of(t); + d->loc = loc; + if (d->native && d->native->set_loc) d->native->set_loc(d->native, loc); +} + +static void nd_finalize(CgTarget* t) { + NativeDirectTarget* d = nd_of(t); + if (d->native && d->native->finalize) d->native->finalize(d->native); +} + +static void nd_destroy(CgTarget* t) { + NativeDirectTarget* d = nd_of(t); + if (d->native && d->native->destroy) d->native->destroy(d->native); +} + +CgTarget* native_direct_target_new(Compiler* c, ObjBuilder* obj, + const NativeDirectTargetConfig* cfg) { + NativeDirectTarget* d; + if (!c || !cfg || !cfg->native) + compiler_panic(c, (SrcLoc){0, 0, 0}, + "native_direct_target_new: missing native target"); + d = arena_znew(c->tu, NativeDirectTarget); + if (!d) return NULL; + d->base.c = c; + d->base.obj = obj; + d->magic = NATIVE_DIRECT_MAGIC; + d->native = cfg->native; + d->ops = cfg->ops; + d->user = cfg->user; + + d->base.func_begin = nd_func_begin; + d->base.func_end = nd_func_end; + d->base.alias = nd_alias; + d->base.local = nd_local_new; + d->base.local_addr = nd_local_addr; + d->base.param = nd_param; + d->base.label_new = nd_label_new; + d->base.label_place = nd_label_place; + d->base.jump = nd_jump; + d->base.cmp_branch = nd_cmp_branch; + d->base.switch_ = nd_switch; + d->base.indirect_branch = nd_indirect_branch; + d->base.load_label_addr = nd_load_label_addr; + d->base.local_static_data_begin = nd_local_static_data_begin; + d->base.local_static_data_write = nd_local_static_data_write; + d->base.local_static_data_label_addr = nd_local_static_data_label_addr; + d->base.local_static_data_end = nd_local_static_data_end; + d->base.data_label_addr_unsupported_msg = nd_data_label_addr_unsupported_msg; + d->base.scope_begin = nd_scope_begin; + d->base.scope_else = nd_scope_else; + d->base.scope_end = nd_scope_end; + d->base.break_to = nd_break_to; + d->base.continue_to = nd_continue_to; + d->base.load_imm = nd_load_imm; + d->base.load_const = nd_load_const; + d->base.copy = nd_copy; + d->base.load = nd_load; + d->base.store = nd_store; + d->base.addr_of = nd_addr_of; + d->base.tls_addr_of = nd_tls_addr_of; + d->base.copy_bytes = nd_copy_bytes; + d->base.set_bytes = nd_set_bytes; + d->base.bitfield_load = nd_bitfield_load; + d->base.bitfield_store = nd_bitfield_store; + d->base.binop = nd_binop; + d->base.unop = nd_unop; + d->base.cmp = nd_cmp; + d->base.convert = nd_convert; + d->base.call = nd_call; + d->base.tail_call_unrealizable_reason = nd_tail_call_unrealizable_reason; + d->base.ret = nd_ret; + d->base.alloca_ = nd_alloca; + d->base.va_start_ = nd_va_start; + d->base.va_arg_ = nd_va_arg; + d->base.va_end_ = nd_va_end; + d->base.va_copy_ = nd_va_copy; + d->base.atomic_load = nd_atomic_load; + d->base.atomic_store = nd_atomic_store; + d->base.atomic_rmw = nd_atomic_rmw; + d->base.atomic_cas = nd_atomic_cas; + d->base.fence = nd_fence; + d->base.intrinsic = nd_intrinsic; + d->base.asm_block = nd_asm_block; + d->base.file_scope_asm = nd_file_scope_asm; + d->base.set_loc = nd_set_loc; + d->base.finalize = nd_finalize; + d->base.destroy = nd_destroy; + return &d->base; +} + +NativeTarget* native_direct_target_native(CgTarget* t) { + NativeDirectTarget* d = t ? nd_of(t) : NULL; + return d && d->magic == NATIVE_DIRECT_MAGIC ? d->native : NULL; +} diff --git a/src/cg/native_direct_target.h b/src/cg/native_direct_target.h @@ -0,0 +1,136 @@ +#ifndef CFREE_CG_NATIVE_DIRECT_TARGET_H +#define CFREE_CG_NATIVE_DIRECT_TARGET_H + +#include "arch/native_target.h" +#include "cg/cgtarget.h" + +/* NativeDirectTarget is the shared direct -O0 implementation of CgTarget for + * native backends. It owns semantic local homes, direct-mode scratch policy, + * local register caching, and conservative flushes. Arch code supplies a + * NativeTarget plus this small adapter for ABI/frame/legality decisions. */ + +typedef struct NativeDirectTarget NativeDirectTarget; + +typedef struct NativeDirectScope { + u8 kind; /* ScopeKind */ + u8 has_else; + u8 owns_break; + u8 pad; + Label break_label; + Label continue_label; + Label else_label; + Label end_label; +} NativeDirectScope; + +typedef enum NativeDirectBarrier { + NATIVE_DIRECT_BARRIER_CALL = 1u << 0, + NATIVE_DIRECT_BARRIER_MEMORY = 1u << 1, + NATIVE_DIRECT_BARRIER_VOLATILE = 1u << 2, + NATIVE_DIRECT_BARRIER_ATOMIC = 1u << 3, + NATIVE_DIRECT_BARRIER_INLINE_ASM = 1u << 4, +} NativeDirectBarrier; + +typedef struct NativeDirectLocal { + CfreeCgTypeId type; + u32 size; + u32 align; + u32 flags; /* CGLocalFlag */ + + NativeFrameSlot home; + Reg reg; + u8 cls; /* NativeAllocClass */ + u8 dirty; + u8 address_taken; + u8 memory_required; +} NativeDirectLocal; + +typedef enum NativeDirectAddrLegality { + NATIVE_DIRECT_ADDR_ILLEGAL, + NATIVE_DIRECT_ADDR_LEGAL, + NATIVE_DIRECT_ADDR_LEGAL_IF_UNSCALED, +} NativeDirectAddrLegality; + +typedef struct NativeOps NativeOps; +struct NativeOps { + const NativeRegInfo* (*reg_info)(NativeDirectTarget*); + + void (*func_begin)(NativeDirectTarget*, const CGFuncDesc*); + void (*func_end)(NativeDirectTarget*); + + NativeFrameSlot (*alloc_frame_slot)(NativeDirectTarget*, + const NativeFrameSlotDesc*); + void (*bind_param)(NativeDirectTarget*, const CGParamDesc*, CGLocal, + NativeDirectLocal*); + + NativeAllocClass (*class_for_type)(NativeDirectTarget*, CfreeCgTypeId); + int (*operand_legal)(NativeDirectTarget*, const Operand*, NativeAllocClass); + int (*addr_legal)(NativeDirectTarget*, const NativeAddr*, MemAccess); + NativeDirectAddrLegality (*semantic_addr_legal)(NativeDirectTarget*, + Operand addr, MemAccess); + + void (*plan_call)(NativeDirectTarget*, const NativeCallDesc*, + NativeCallPlan*); + const char* (*tail_call_unrealizable_reason)(NativeDirectTarget*, + const CGCallDesc*); + void (*emit_call)(NativeDirectTarget*, const NativeCallPlan*); + void (*emit_ret)(NativeDirectTarget*, const CGLocal* values, u32 nvalues); + + void (*va_start_)(NativeDirectTarget*, Operand ap_addr); + void (*va_arg_)(NativeDirectTarget*, Operand dst, Operand ap_addr, + CfreeCgTypeId type); + void (*va_end_)(NativeDirectTarget*, Operand ap_addr); + void (*va_copy_)(NativeDirectTarget*, Operand dst_ap_addr, + Operand src_ap_addr); + + void (*asm_block)(NativeDirectTarget*, const char* tmpl, + const AsmConstraint* outs, u32 nout, Operand* out_ops, + const AsmConstraint* ins, u32 nin, const Operand* in_ops, + const Sym* clobbers, u32 nclob); + + void (*barrier)(NativeDirectTarget*, u32 flags); +}; + +typedef struct NativeDirectTargetConfig { + NativeTarget* native; + const NativeOps* ops; + void* user; + u32 flags; +} NativeDirectTargetConfig; + +struct NativeDirectTarget { + CgTarget base; + u32 magic; + NativeTarget* native; + const NativeOps* ops; + void* user; + + const CGFuncDesc* func; + SrcLoc loc; + + NativeDirectLocal* locals; + u32 nlocals; + u32 locals_cap; + + MCLabel* labels; + u32 nlabels; + u32 labels_cap; + + NativeDirectScope* scopes; + u32 nscopes; + u32 scopes_cap; + + u32 scratch_used[3]; + u32 max_outgoing; + + ObjSecId local_static_sec; + ObjSymId local_static_sym; + u32 local_static_base; + u32 local_static_size; + u8 local_static_active; +}; + +CgTarget* native_direct_target_new(Compiler*, ObjBuilder*, + const NativeDirectTargetConfig*); +NativeTarget* native_direct_target_native(CgTarget*); + +#endif