asm.c (12710B)
1 #include "cg/internal.h" 2 3 const char* api_sym_cstr(KitCg* g, KitSym sym) { 4 const char* s; 5 if (!sym) return ""; 6 s = pool_slice(g->c->global, (Sym)sym).s; 7 return s ? s : ""; 8 } 9 10 int api_asm_parse_match_index(const char* s) { 11 int n; 12 if (!s || s[0] < '0' || s[0] > '9') return -1; 13 n = 0; 14 for (const char* p = s; *p >= '0' && *p <= '9'; ++p) { 15 n = n * 10 + (*p - '0'); 16 } 17 return n; 18 } 19 20 const char* api_asm_constraint_body(const char* s) { 21 if (!s) return ""; 22 if (s[0] == '=' && s[1] == '&') return s + 2; 23 if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; 24 return s; 25 } 26 27 int api_asm_is_early_clobber(const char* s) { 28 if (!s) return 0; 29 return (s[0] == '=' && s[1] == '&') || s[0] == '&'; 30 } 31 32 /* Does this constraint body name a register operand (one that binds to a temp 33 * local, as opposed to 'i' immediate or 'm' memory)? 'r' is the architecture- 34 * neutral general-register class; 'f' (riscv), 'x' (x86 SSE) and 'w' (aarch64 35 * SIMD/FP) are the per-target FP/vector register classes. The temp local's type 36 * selects the actual NativeAllocClass downstream, and the target's asm hook 37 * rejects a letter that does not apply to it, so listing all three here is safe 38 * across backends. A hard-register pin (AsmConstraint.reg, from a GNU local 39 * register variable) rides alongside such a register operand and does not 40 * change this classification — the constraint letter stays "r". */ 41 int api_asm_is_reg_constraint(char c) { 42 return c == 'r' || c == 'f' || c == 'x' || c == 'w'; 43 } 44 45 static int api_asm_constraint_is_reg(KitCg* g, const char* constraint) { 46 const char* body = api_asm_constraint_body(constraint); 47 if (api_asm_is_reg_constraint(body[0])) return 1; 48 if (g && g->target && g->target->asm_is_reg_constraint) 49 return g->target->asm_is_reg_constraint(g->target, constraint); 50 return 0; 51 } 52 53 /* A register ('r'/'f'/'x'/'w') asm operand must live in a single hardware 54 * register. A 64-bit scalar on a 32-bit target does not fit one: it would need 55 * a register pair, which this inline-asm lowering does not model, so binding it 56 * to a single register would silently truncate to the low word. Reject it up 57 * front; the source can use a memory ("m") constraint (the value is already 58 * memory-resident) or split it into two 32-bit operands instead. Wider scalars 59 * on a 64-bit target (i128/f128) take a different lowering and are not this 60 * helper's concern. */ 61 static void api_asm_reject_wide_reg(KitCg* g, KitCgTypeId ty) { 62 if (api_is_wide8_scalar_type(g->c, ty)) { 63 compiler_panic(g->c, g->cur_loc, 64 "KitCg: 64-bit value in a register asm constraint is not " 65 "supported on a 32-bit target; use a memory (\"m\") " 66 "constraint or split into two 32-bit operands"); 67 } 68 } 69 70 void api_asm_memory_clobber_sv(KitCg* g, ApiSValue* sv, CGLocal local_id) { 71 (void)g; 72 (void)sv; 73 (void)local_id; 74 } 75 76 void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { 77 static const char* const match_strs[10] = {"0", "1", "2", "3", "4", 78 "5", "6", "7", "8", "9"}; 79 CgTarget* T; 80 Heap* h; 81 KitCgTypeId fallback_ty; 82 AsmConstraint* outs; 83 AsmConstraint* ins; 84 Sym* clobs; 85 ApiSValue* in_svs; 86 Operand* in_ops; 87 Operand* out_ops; 88 u8* out_local_owned; 89 const char* tmpl_str; 90 Sym sym_memory; 91 int has_memory_clobber; 92 uint32_t ninout; 93 uint32_t total_inputs; 94 KitSym tmpl = asm_block.tmpl; 95 const KitCgAsmOperand* outputs = asm_block.outputs; 96 uint32_t noutputs = asm_block.noutputs; 97 const KitCgAsmOperand* inputs = asm_block.inputs; 98 uint32_t ninputs = asm_block.ninputs; 99 const KitSym* clobbers = asm_block.clobbers; 100 uint32_t nclobbers = asm_block.nclobbers; 101 uint32_t clobber_abi_sets = asm_block.clobber_abi_sets; 102 (void)asm_block.flags; 103 if (!g) return; 104 api_local_const_memory_boundary(g); 105 T = g->target; 106 h = g->c->ctx->heap; 107 fallback_ty = builtin_id(KIT_CG_BUILTIN_I64); 108 tmpl_str = api_sym_cstr(g, tmpl); 109 ninout = 0; 110 111 outs = NULL; 112 ins = NULL; 113 clobs = NULL; 114 in_svs = NULL; 115 in_ops = NULL; 116 out_ops = NULL; 117 out_local_owned = NULL; 118 119 if (noutputs) { 120 outs = (AsmConstraint*)h->alloc(h, sizeof(*outs) * noutputs, 121 _Alignof(AsmConstraint)); 122 memset(outs, 0, sizeof(*outs) * noutputs); 123 for (u32 i = 0; i < noutputs; ++i) { 124 outs[i].str = api_sym_cstr(g, outputs[i].constraint); 125 outs[i].name = (Sym)outputs[i].name; 126 outs[i].type = resolve_type(g->c, outputs[i].type); 127 outs[i].reg = (Sym)outputs[i].reg; 128 outs[i].dir = (u8)outputs[i].dir; 129 if (!outs[i].type) outs[i].type = fallback_ty; 130 if (outs[i].reg && !api_asm_constraint_is_reg(g, outs[i].str)) { 131 compiler_panic(g->c, g->cur_loc, 132 "KitCg: asm hard-register output requires a register " 133 "constraint"); 134 } 135 if (outs[i].dir == KIT_CG_ASM_INOUT) { 136 if (i >= 10) { 137 compiler_panic(g->c, g->cur_loc, 138 "KitCg: asm inout output index exceeds matching " 139 "constraint range"); 140 } 141 ninout++; 142 } 143 } 144 out_ops = 145 (Operand*)h->alloc(h, sizeof(*out_ops) * noutputs, _Alignof(Operand)); 146 memset(out_ops, 0, sizeof(*out_ops) * noutputs); 147 out_local_owned = (u8*)h->alloc(h, noutputs, 1); 148 memset(out_local_owned, 0, noutputs); 149 } 150 151 total_inputs = ninputs + ninout; 152 if (total_inputs) { 153 uint32_t inout_index; 154 ins = (AsmConstraint*)h->alloc(h, sizeof(*ins) * total_inputs, 155 _Alignof(AsmConstraint)); 156 memset(ins, 0, sizeof(*ins) * total_inputs); 157 in_svs = (ApiSValue*)h->alloc(h, sizeof(*in_svs) * total_inputs, 158 _Alignof(ApiSValue)); 159 in_ops = (Operand*)h->alloc(h, sizeof(*in_ops) * total_inputs, 160 _Alignof(Operand)); 161 memset(in_ops, 0, sizeof(*in_ops) * total_inputs); 162 for (u32 i = 0; i < ninputs; ++i) { 163 ins[i].str = api_sym_cstr(g, inputs[i].constraint); 164 ins[i].name = (Sym)inputs[i].name; 165 ins[i].type = resolve_type(g->c, inputs[i].type); 166 ins[i].reg = (Sym)inputs[i].reg; 167 ins[i].dir = (u8)inputs[i].dir; 168 if (!ins[i].type) ins[i].type = fallback_ty; 169 if (ins[i].reg && !api_asm_constraint_is_reg(g, ins[i].str)) { 170 compiler_panic(g->c, g->cur_loc, 171 "KitCg: asm hard-register input requires a register " 172 "constraint"); 173 } 174 } 175 inout_index = ninputs; 176 for (u32 i = 0; i < noutputs; ++i) { 177 if (outs[i].dir != KIT_CG_ASM_INOUT) continue; 178 ins[inout_index].str = match_strs[i]; 179 ins[inout_index].type = outs[i].type ? outs[i].type : fallback_ty; 180 ins[inout_index].dir = KIT_CG_ASM_IN; 181 inout_index++; 182 } 183 for (u32 i = 0; i < total_inputs; ++i) { 184 u32 idx = total_inputs - 1u - i; 185 in_svs[idx] = api_pop(g); 186 api_ensure_local(g, &in_svs[idx]); 187 } 188 } 189 190 if (nclobbers) { 191 clobs = (Sym*)h->alloc(h, sizeof(*clobs) * nclobbers, _Alignof(Sym)); 192 for (u32 i = 0; i < nclobbers; ++i) clobs[i] = (Sym)clobbers[i]; 193 } 194 195 for (u32 i = 0; i < noutputs; ++i) { 196 if (api_asm_is_early_clobber(outs[i].str)) continue; 197 /* A register constraint binds to a temp local; the local's type selects the 198 * register class (integer vs FP), so the backend hook places an FP-class 199 * output (riscv 'f', x86 'x', aarch64 'w') in an FP register. */ 200 if (api_asm_constraint_is_reg(g, outs[i].str)) { 201 KitCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; 202 CGLocal r; 203 api_asm_reject_wide_reg(g, oty); 204 r = api_alloc_temp_local(g, oty); 205 out_ops[i] = api_op_local(r, oty); 206 out_local_owned[i] = 1; 207 } else { 208 compiler_panic(g->c, g->cur_loc, 209 "KitCg: unsupported asm output constraint"); 210 } 211 } 212 213 for (u32 i = 0; i < total_inputs; ++i) { 214 const char* s = ins[i].str ? ins[i].str : ""; 215 int matched = api_asm_parse_match_index(s); 216 KitCgTypeId ity = api_sv_type(&in_svs[i]); 217 if (matched >= 0) { 218 Operand bound; 219 if ((u32)matched >= noutputs) { 220 compiler_panic(g->c, g->cur_loc, 221 "KitCg: asm matching constraint out of range"); 222 continue; 223 } 224 if (api_asm_is_early_clobber(outs[matched].str)) { 225 compiler_panic(g->c, g->cur_loc, 226 "KitCg: asm matching input uses early-clobber output"); 227 continue; 228 } 229 bound = out_ops[matched]; 230 if (api_sv_op_is(&in_svs[i], OPK_LOCAL) && 231 in_svs[i].op.v.local == bound.v.local) { 232 } else if (api_sv_op_is(&in_svs[i], OPK_IMM)) { 233 T->load_imm(T, bound, in_svs[i].op.v.imm); 234 } else { 235 Operand src = api_force_local(g, &in_svs[i], ity); 236 T->copy(T, bound, src); 237 } 238 in_ops[i] = bound; 239 } else if (api_asm_constraint_is_reg(g, s)) { 240 api_asm_reject_wide_reg(g, ity); 241 in_ops[i] = api_force_local(g, &in_svs[i], ity); 242 } else if (s[0] == 'i') { 243 if (!api_sv_op_is(&in_svs[i], OPK_IMM)) { 244 compiler_panic(g->c, g->cur_loc, 245 "KitCg: asm 'i' constraint requires an immediate"); 246 } 247 in_ops[i] = in_svs[i].op; 248 } else if (s[0] == 'm') { 249 if (api_sv_op_is(&in_svs[i], OPK_INDIRECT)) { 250 in_ops[i] = in_svs[i].op; 251 } else if (api_is_lvalue_sv(&in_svs[i])) { 252 KitCgTypeId pty = 253 cg_type_ptr_to(g->c, ity ? ity : builtin_id(KIT_CG_BUILTIN_VOID)); 254 Operand dst = api_lvalue_addr(g, &in_svs[i], pty); 255 in_svs[i].op = api_op_indirect(dst.v.local, 0, ity); 256 in_svs[i].res = RES_LOCAL; 257 in_ops[i] = in_svs[i].op; 258 } else { 259 compiler_panic(g->c, g->cur_loc, 260 "KitCg: asm 'm' constraint requires an lvalue"); 261 } 262 } else { 263 compiler_panic(g->c, g->cur_loc, 264 "KitCg: unsupported asm input constraint"); 265 } 266 } 267 268 for (u32 i = 0; i < noutputs; ++i) { 269 KitCgTypeId oty; 270 CGLocal r; 271 if (!api_asm_is_early_clobber(outs[i].str)) continue; 272 if (!api_asm_constraint_is_reg(g, outs[i].str)) { 273 compiler_panic(g->c, g->cur_loc, 274 "KitCg: unsupported early-clobber asm output"); 275 continue; 276 } 277 oty = outs[i].type ? outs[i].type : fallback_ty; 278 api_asm_reject_wide_reg(g, oty); 279 r = api_alloc_temp_local(g, oty); 280 for (u32 k = 0; k < total_inputs; ++k) { 281 if ((in_ops[k].kind == OPK_LOCAL && in_ops[k].v.local == r) || 282 (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r)) { 283 compiler_panic(g->c, g->cur_loc, 284 "KitCg: asm early-clobber local collision"); 285 } 286 } 287 out_ops[i] = api_op_local(r, oty); 288 out_local_owned[i] = 1; 289 } 290 291 sym_memory = pool_intern_slice(g->c->global, SLICE_LIT("memory")); 292 has_memory_clobber = 0; 293 for (u32 i = 0; i < nclobbers; ++i) { 294 if (clobs[i] == sym_memory) has_memory_clobber = 1; 295 } 296 if (has_memory_clobber) { 297 for (u32 i = 0; i < g->sp; ++i) { 298 ApiSValue* sv = &g->stack[i]; 299 CGLocal local_id; 300 if (sv->res != RES_LOCAL) continue; 301 local_id = api_local_of_sv(sv); 302 api_asm_memory_clobber_sv(g, sv, local_id); 303 } 304 } 305 306 T->asm_block(T, tmpl_str, outs, noutputs, out_ops, ins, total_inputs, in_ops, 307 clobs, nclobbers, clobber_abi_sets); 308 309 for (u32 i = 0; i < total_inputs; ++i) api_release(g, &in_svs[i]); 310 for (u32 i = 0; i < noutputs; ++i) { 311 KitCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; 312 ApiSValue sv = api_make_sv(out_ops[i], oty); 313 if (!out_local_owned[i] && sv.res == RES_LOCAL) sv.res = RES_INHERENT; 314 api_push(g, sv); 315 } 316 317 if (outs) h->free(h, outs, sizeof(*outs) * noutputs); 318 if (ins) h->free(h, ins, sizeof(*ins) * total_inputs); 319 if (clobs) h->free(h, clobs, sizeof(*clobs) * nclobbers); 320 if (in_svs) h->free(h, in_svs, sizeof(*in_svs) * total_inputs); 321 if (in_ops) h->free(h, in_ops, sizeof(*in_ops) * total_inputs); 322 if (out_ops) h->free(h, out_ops, sizeof(*out_ops) * noutputs); 323 if (out_local_owned) h->free(h, out_local_owned, noutputs); 324 } 325 326 void kit_cg_file_scope_asm(KitCg* g, KitSlice asm_source) { 327 if (!g || !asm_source.s) return; 328 if (g->check_only) return; 329 if (g->target && g->target->file_scope_asm) { 330 g->target->file_scope_asm(g->target, asm_source.s, asm_source.len); 331 return; 332 } 333 compiler_panic(g->c, api_no_loc(), 334 "KitCg: file-scope asm requires target support"); 335 } 336 337 /* ============================================================ 338 * Labels / branches 339 * ============================================================ */