native_argmove.h (2767B)
1 #ifndef KIT_CG_NATIVE_ARGMOVE_H 2 #define KIT_CG_NATIVE_ARGMOVE_H 3 4 /* Shared parallel-copy register-move scheduler for the native backends. 5 * 6 * Marshalling call arguments (and, on the optimizer path, binding incoming 7 * parameters) means realizing a set of register `dst <- src` moves as a 8 * *parallel* copy: every register must be read by all moves that source it 9 * before any move overwrites it. The allocator usually arranges a conflict-free 10 * order, but not always (notably variadic args, and tail-call / entry 11 * permutations the allocator is free to rotate). When a true cycle remains — 12 * a rotation like rdi<-rdx, rsi<-rdi, rdx<-rsi — it is broken by stashing one 13 * member's register into a scratch and redirecting that value's readers. 14 * 15 * The scheduling (topological emission + cycle detection + the scratch break) 16 * is identical across aa64/rv64/x64; only the leaf operations differ — how one 17 * move is emitted and which register is the scratch. Those come in via the ops 18 * struct, so all three backends (and the entry-bind path) share one scheduler. 19 */ 20 21 #include <string.h> 22 23 #include "arch/native_target.h" 24 #include "core/core.h" 25 26 /* A single register-destination move. `src` may be a register (the common, 27 * order-constraining case), a frame/stack slot, an immediate, or — when 28 * `is_addr` — an address to materialize into `dst`. Identical to the per-arch 29 * arg-move structs it replaces. */ 30 typedef struct NativeArgMove { 31 NativeLoc dst; 32 NativeLoc src; 33 u32 src_offset; 34 u32 size; 35 int is_addr; 36 /* Optional emit hints, opaque to the scheduler (it only reads dst/src/is_addr 37 * for ordering). x64 uses these for the Win64 "variadic FP arg also 38 * duplicated into the matching GPR" rule; other backends leave them zero. */ 39 int dup_to_gpr; 40 Reg dup_gpr; 41 } NativeArgMove; 42 43 typedef struct NativeArgShuffle { 44 NativeTarget* t; 45 /* Emit one move as-is: an address-of for is_addr, else a load/copy of `size` 46 * bytes at `src_offset` from `src` into `dst`. */ 47 void (*emit_one)(NativeTarget* t, const NativeArgMove* m); 48 /* A plain register-to-register copy, used to stash a cycle victim into the 49 * scratch register before redirecting its readers. */ 50 void (*reg_move)(NativeTarget* t, NativeLoc dst, NativeLoc src); 51 /* Scratch register per NativeAllocClass (index by class id) for breaking 52 * cycles. A cycle's ring is register-to-register, so the scratch needs no 53 * address arithmetic — a bare register suffices. */ 54 Reg scratch[NATIVE_REG_VEC + 1]; 55 } NativeArgShuffle; 56 57 /* Emit `moves[0..n)` as a parallel register copy via `s`. Mutates `moves` 58 * (redirects a broken cycle's readers to the scratch register). */ 59 void native_arg_shuffle(const NativeArgShuffle* s, NativeArgMove* moves, u32 n); 60 61 #endif