commit 15e2effc7193b2495ed95320ca05dab0fe2acdb0 parent 7eaf7bf92d5dc73af08024a55e2495bd71439b25 Author: Ryan Sepassi <rsepassi@gmail.com> Date: Tue, 2 Jun 2026 06:13:16 -0700 cg: supports_intrinsic query + diagnostic + CPU intrinsics (Track 4c) Diffstat:
42 files changed, 543 insertions(+), 63 deletions(-)
diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -239,6 +239,7 @@ CFREE_API int cfree_cg_target_supports_call_conv(CfreeCompiler*, CfreeCgCallConv); CFREE_API int cfree_cg_target_supports_symbol_feature(CfreeCompiler*, CfreeCgSymbolFeature); +/* cfree_cg_target_supports_intrinsic is declared after CfreeCgIntrinsic. */ CFREE_API uint64_t cfree_cg_target_backend_features(CfreeCompiler*); /* ============================================================ @@ -952,6 +953,14 @@ typedef enum CfreeCgBarrierScope { CFREE_API void cfree_cg_intrinsic(CfreeCg*, CfreeCgIntrinsic, uint32_t nargs, CfreeCgTypeId result_type); +/* Capability query for cfree_cg_intrinsic: true when the selected target has a + * legal lowering for this intrinsic (see cfree_cg_target_supports_call_conv for + * the contract). Frontends test this before requesting a baremetal/CPU + * intrinsic so they can emit their own unsupported-feature diagnostic instead + * of tripping the CG fatal. */ +CFREE_API int cfree_cg_target_supports_intrinsic(CfreeCompiler*, + CfreeCgIntrinsic); + /* ============================================================ * Fixed-Sized Memory Operations * ============================================================ */ diff --git a/lang/toy/builtins.c b/lang/toy/builtins.c @@ -493,6 +493,8 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name, intrin = CFREE_CG_INTRIN_ISB; if (!toy_parser_expect(p, TOK_LPAREN) || !toy_parser_expect(p, TOK_RPAREN)) return CFREE_CG_TYPE_NONE; + if (!cfree_cg_target_supports_intrinsic(p->c, intrin)) + return toy_unsupported_intrinsic(p); cfree_cg_intrinsic(p->cg, intrin, 0, toy_builtin_type(p, CFREE_CG_BUILTIN_VOID)); return toy_builtin_type(p, CFREE_CG_BUILTIN_VOID); @@ -686,6 +688,18 @@ CfreeCgTypeId toy_parse_builtin_call(ToyParser* p, CfreeSym name, return toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL); } + if (toy_sym_is(p, name, "supports_intrinsic")) { + CfreeCgIntrinsic intrin; + if (!toy_parser_expect(p, TOK_LPAREN) || + !toy_parse_intrinsic_const(p, &intrin) || + !toy_parser_expect(p, TOK_RPAREN)) + return CFREE_CG_TYPE_NONE; + cfree_cg_push_int( + p->cg, cfree_cg_target_supports_intrinsic(p->c, intrin) ? 1u : 0u, + toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL)); + return toy_builtin_type(p, CFREE_CG_BUILTIN_BOOL); + } + if (toy_sym_is(p, name, "has_backend_feature")) { uint64_t feature; if (!toy_parser_expect(p, TOK_LPAREN) || @@ -1450,11 +1464,30 @@ CfreeCgTypeId toy_parse_low_level_builtin_call(ToyParser* p, CfreeSym name, return toy_unsupported_intrinsic(p); } - if (toy_sym_is(p, name, "irq_save") || toy_sym_is(p, name, "wfi") || - toy_sym_is(p, name, "wfe") || toy_sym_is(p, name, "sev")) { + if (toy_sym_is(p, name, "wfi") || toy_sym_is(p, name, "wfe") || + toy_sym_is(p, name, "sev")) { + CfreeCgIntrinsic intrin = CFREE_CG_INTRIN_WFI; + if (toy_sym_is(p, name, "wfe")) + intrin = CFREE_CG_INTRIN_WFE; + else if (toy_sym_is(p, name, "sev")) + intrin = CFREE_CG_INTRIN_SEV; if (!toy_parser_expect(p, TOK_LPAREN) || !toy_parser_expect(p, TOK_RPAREN)) return CFREE_CG_TYPE_NONE; - return toy_unsupported_intrinsic(p); + if (!cfree_cg_target_supports_intrinsic(p->c, intrin)) + return toy_unsupported_intrinsic(p); + cfree_cg_intrinsic(p->cg, intrin, 0, + toy_builtin_type(p, CFREE_CG_BUILTIN_VOID)); + return toy_builtin_type(p, CFREE_CG_BUILTIN_VOID); + } + + if (toy_sym_is(p, name, "irq_save")) { + if (!toy_parser_expect(p, TOK_LPAREN) || !toy_parser_expect(p, TOK_RPAREN)) + return CFREE_CG_TYPE_NONE; + if (!cfree_cg_target_supports_intrinsic(p->c, CFREE_CG_INTRIN_IRQ_SAVE)) + return toy_unsupported_intrinsic(p); + /* Returns the saved interrupt-mask state as a usize (toy int type). */ + cfree_cg_intrinsic(p->cg, CFREE_CG_INTRIN_IRQ_SAVE, 0, p->int_type); + return p->int_type; } if (toy_sym_is(p, name, "irq_restore")) { @@ -1467,17 +1500,29 @@ CfreeCgTypeId toy_parse_low_level_builtin_call(ToyParser* p, CfreeSym name, toy_error(p, p->cur.loc, "irq_restore expects integer state"); return CFREE_CG_TYPE_NONE; } - return toy_unsupported_intrinsic(p); + if (!cfree_cg_target_supports_intrinsic(p->c, CFREE_CG_INTRIN_IRQ_RESTORE)) + return toy_unsupported_intrinsic(p); + cfree_cg_intrinsic(p->cg, CFREE_CG_INTRIN_IRQ_RESTORE, 1, + toy_builtin_type(p, CFREE_CG_BUILTIN_VOID)); + return toy_builtin_type(p, CFREE_CG_BUILTIN_VOID); } if (toy_sym_is(p, name, "dmb") || toy_sym_is(p, name, "dsb")) { + CfreeCgIntrinsic intrin = + toy_sym_is(p, name, "dsb") ? CFREE_CG_INTRIN_DSB : CFREE_CG_INTRIN_DMB; CfreeCgBarrierScope scope; if (!toy_parser_expect(p, TOK_LPAREN) || !toy_parse_barrier_scope(p, &scope) || !toy_parser_expect(p, TOK_RPAREN)) return CFREE_CG_TYPE_NONE; - (void)scope; - return toy_unsupported_intrinsic(p); + if (!cfree_cg_target_supports_intrinsic(p->c, intrin)) + return toy_unsupported_intrinsic(p); + /* The barrier domain rides as an immediate operand the backend maps onto + * the arch's barrier-option field. */ + cfree_cg_push_int(p->cg, (uint64_t)scope, p->int_type); + cfree_cg_intrinsic(p->cg, intrin, 1, + toy_builtin_type(p, CFREE_CG_BUILTIN_VOID)); + return toy_builtin_type(p, CFREE_CG_BUILTIN_VOID); } if (toy_sym_is(p, name, "dcache_clean") || diff --git a/lang/toy/expr.c b/lang/toy/expr.c @@ -412,6 +412,49 @@ int toy_parse_backend_feature_const(ToyParser* p, uint64_t* out) { return 1; } +int toy_parse_intrinsic_const(ToyParser* p, CfreeCgIntrinsic* out) { + CfreeSym name; + if (!toy_parser_expect(p, TOK_DOT) || p->cur.kind != TOK_IDENT) { + toy_error(p, p->cur.loc, "expected intrinsic name"); + return 0; + } + name = toy_tok_sym(p, p->cur); + toy_parser_advance(p); + if (toy_sym_is(p, name, "cpu_nop")) + *out = CFREE_CG_INTRIN_CPU_NOP; + else if (toy_sym_is(p, name, "cpu_yield")) + *out = CFREE_CG_INTRIN_CPU_YIELD; + else if (toy_sym_is(p, name, "wfi")) + *out = CFREE_CG_INTRIN_WFI; + else if (toy_sym_is(p, name, "wfe")) + *out = CFREE_CG_INTRIN_WFE; + else if (toy_sym_is(p, name, "sev")) + *out = CFREE_CG_INTRIN_SEV; + else if (toy_sym_is(p, name, "isb")) + *out = CFREE_CG_INTRIN_ISB; + else if (toy_sym_is(p, name, "dmb")) + *out = CFREE_CG_INTRIN_DMB; + else if (toy_sym_is(p, name, "dsb")) + *out = CFREE_CG_INTRIN_DSB; + else if (toy_sym_is(p, name, "irq_save")) + *out = CFREE_CG_INTRIN_IRQ_SAVE; + else if (toy_sym_is(p, name, "irq_restore")) + *out = CFREE_CG_INTRIN_IRQ_RESTORE; + else if (toy_sym_is(p, name, "irq_enable")) + *out = CFREE_CG_INTRIN_IRQ_ENABLE; + else if (toy_sym_is(p, name, "irq_disable")) + *out = CFREE_CG_INTRIN_IRQ_DISABLE; + else if (toy_sym_is(p, name, "syscall")) + *out = CFREE_CG_INTRIN_SYSCALL; + else if (toy_sym_is(p, name, "coro_switch")) + *out = CFREE_CG_INTRIN_CORO_SWITCH; + else { + toy_error(p, p->cur.loc, "unknown intrinsic"); + return 0; + } + return 1; +} + int toy_parse_rounding_const(ToyParser* p, CfreeCgRounding* out) { CfreeSym name; if (!toy_parse_attr_dot_name(p, &name)) return 0; diff --git a/lang/toy/internal.h b/lang/toy/internal.h @@ -333,6 +333,7 @@ int toy_parse_attr_int_arg(ToyParser* p, int64_t* out); int toy_parse_callconv_const(ToyParser* p, CfreeCgCallConv* out); int toy_parse_symbol_feature_const(ToyParser* p, CfreeCgSymbolFeature* out); int toy_parse_backend_feature_const(ToyParser* p, uint64_t* out); +int toy_parse_intrinsic_const(ToyParser* p, CfreeCgIntrinsic* out); int toy_parse_rounding_const(ToyParser* p, CfreeCgRounding* out); int toy_parse_attr_list(ToyParser* p, ToyAttrSet* attrs, CfreeSymBind default_bind); diff --git a/src/arch/aa64/isa.h b/src/arch/aa64/isa.h @@ -962,7 +962,13 @@ static inline u32 aa64_ldp64_post(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { #define AA64_HINT_FAMILY_MATCH 0xD503201Fu #define AA64_HINT_FAMILY_MASK 0xFFFFF01Fu /* CRm + op2 vary */ -#define AA64_HINT_OP_NOP 0u /* CRm=0, op2=0 */ +/* HINT #N with CRm=0: op2 selects the variant. */ +#define AA64_HINT_OP_NOP 0u /* CRm=0, op2=0 */ +#define AA64_HINT_OP_YIELD 1u /* CRm=0, op2=1 */ +#define AA64_HINT_OP_WFE 2u /* CRm=0, op2=2 */ +#define AA64_HINT_OP_WFI 3u /* CRm=0, op2=3 */ +#define AA64_HINT_OP_SEV 4u /* CRm=0, op2=4 */ +#define AA64_HINT_OP_SEVL 5u /* CRm=0, op2=5 */ typedef struct AA64Hint { u32 CRm, op2; @@ -972,6 +978,10 @@ static inline u32 aa64_hint_pack(AA64Hint f) { return AA64_HINT_FAMILY_MATCH | ((f.CRm & 0xfu) << 8) | ((f.op2 & 7u) << 5); } +static inline u32 aa64_hint(u32 op2) { + return aa64_hint_pack((AA64Hint){.CRm = 0, .op2 = op2}); +} + static inline AA64Hint aa64_hint_unpack(u32 w) { AA64Hint f; f.CRm = (w >> 8) & 0xfu; @@ -1050,6 +1060,26 @@ static inline u32 aa64_clrex(u32 opt) { } /* ==================================================================== + * Interrupt-mask (DAIF) system register access. Used by the IRQ-control + * intrinsics; privileged at EL0. Only the encodings the backend emits live + * here (they are not registered in the disassembler's mnemonic table). + * MRS Xt, DAIF : 1101 0101 0011 1011 0100 0010 000 Rt -> 0xD53B4200|Rt + * MSR DAIF, Xt : 1101 0101 0001 1011 0100 0010 000 Rt -> 0xD51B4200|Rt + * MSR DAIFSet, #imm4 : op1=011, op2=110 -> 0xD50340DF | (imm4 << 8) + * MSR DAIFClr, #imm4 : op1=011, op2=111 -> 0xD50340FF | (imm4 << 8) + * imm4 = 0xF masks/unmasks D,A,I,F together. ==================== */ +#define AA64_DAIF_ALL 0xfu + +static inline u32 aa64_mrs_daif(u32 rt) { return 0xD53B4200u | (rt & 0x1fu); } +static inline u32 aa64_msr_daif(u32 rt) { return 0xD51B4200u | (rt & 0x1fu); } +static inline u32 aa64_msr_daifset(u32 imm4) { + return 0xD50340DFu | ((imm4 & 0xfu) << 8); +} +static inline u32 aa64_msr_daifclr(u32 imm4) { + return 0xD50340FFu | ((imm4 & 0xfu) << 8); +} + +/* ==================================================================== * Load/store pair, signed-offset (STP / LDP, no pre/post-increment). * opc(2) 101 V(1) 010 L(1) imm7 Rt2 Rn Rt (bit 23 = 0) * diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -3121,6 +3121,22 @@ static void aa_fence(NativeTarget* t, CfreeCgMemOrder order) { if (order != CFREE_CG_MO_RELAXED) aa_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); } +/* Map a CfreeCgBarrierScope (passed as an immediate arg to DMB/DSB) onto an + * AArch64 barrier domain option. Defaults to full-system (SY) when the scope + * is absent or unrecognized. */ +static u32 aa_barrier_opt(const NativeLoc* args, u32 narg) { + if (narg < 1u || args[0].kind != NATIVE_LOC_IMM) return AA64_BARRIER_OPT_SY; + switch ((CfreeCgBarrierScope)args[0].v.imm) { + case CFREE_CG_BARRIER_FULL: return AA64_BARRIER_OPT_SY; + case CFREE_CG_BARRIER_INNER: return AA64_BARRIER_OPT_ISH; + case CFREE_CG_BARRIER_INNER_STORE: return AA64_BARRIER_OPT_ISHST; + case CFREE_CG_BARRIER_OUTER: return AA64_BARRIER_OPT_OSH; + case CFREE_CG_BARRIER_OUTER_STORE: return AA64_BARRIER_OPT_OSHST; + case CFREE_CG_BARRIER_NON_SHARE: return AA64_BARRIER_OPT_NSH; + } + return AA64_BARRIER_OPT_SY; +} + static void aa_intrinsic(NativeTarget* t, IntrinKind kind, const NativeLoc* dsts, u32 ndst, const NativeLoc* args, u32 narg) { @@ -3349,6 +3365,46 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind, case INTRIN_TRAP: aa_trap(t); return; + case INTRIN_CPU_NOP: + aa_emit32(t->mc, aa64_hint(AA64_HINT_OP_NOP)); + return; + case INTRIN_CPU_YIELD: + aa_emit32(t->mc, aa64_hint(AA64_HINT_OP_YIELD)); + return; + case INTRIN_WFI: + aa_emit32(t->mc, aa64_hint(AA64_HINT_OP_WFI)); + return; + case INTRIN_WFE: + aa_emit32(t->mc, aa64_hint(AA64_HINT_OP_WFE)); + return; + case INTRIN_SEV: + aa_emit32(t->mc, aa64_hint(AA64_HINT_OP_SEV)); + return; + case INTRIN_ISB: + aa_emit32(t->mc, aa64_isb(AA64_BARRIER_OPT_SY)); + return; + case INTRIN_DMB: + aa_emit32(t->mc, aa64_dmb(aa_barrier_opt(args, narg))); + return; + case INTRIN_DSB: + aa_emit32(t->mc, aa64_dsb(aa_barrier_opt(args, narg))); + return; + case INTRIN_IRQ_SAVE: + /* Read the interrupt-mask state, then mask D,A,I,F. */ + if (ndst == 1u) { + aa_emit32(t->mc, aa64_mrs_daif(loc_reg(dsts[0]))); + aa_emit32(t->mc, aa64_msr_daifset(AA64_DAIF_ALL)); + } + return; + case INTRIN_IRQ_RESTORE: + if (narg == 1u) aa_emit32(t->mc, aa64_msr_daif(loc_reg(args[0]))); + return; + case INTRIN_IRQ_DISABLE: + aa_emit32(t->mc, aa64_msr_daifset(AA64_DAIF_ALL)); + return; + case INTRIN_IRQ_ENABLE: + aa_emit32(t->mc, aa64_msr_daifclr(AA64_DAIF_ALL)); + return; default: aa_panic(aa_of(t), "unsupported compiler intrinsic"); } diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h @@ -364,11 +364,20 @@ static inline u32 rv_nop(void) { return RV_NOP; } /* System */ static inline u32 rv_ecall(void) { return rv_i(0, 0, 0, 0, RV_SYSTEM); } static inline u32 rv_ebreak(void) { return rv_i(1, 0, 0, 0, RV_SYSTEM); } +/* WFI: wait-for-interrupt, SYSTEM funct12=0x105 (privileged). */ +static inline u32 rv_wfi(void) { return 0x10500073u; } /* FENCE: pred/succ each 4 bits in imm[11:8]/imm[7:4]. fm bits 11:8 of imm */ static inline u32 rv_fence_rw_rw(void) { return rv_i((i32)0x033, 0, 0, 0, RV_FENCE); } +/* FENCE.I: instruction-stream sync (Zifencei). funct3=1 in the MISC-MEM major + * opcode (0x0F). Used to lower the ISB intrinsic. */ +static inline u32 rv_fence_i(void) { return 0x0000100Fu; } +/* PAUSE (Zihintpause): a FENCE with pred=W, succ=none. Used for cpu_yield; + * decodes as a plain FENCE on hardware lacking the extension, which is a safe + * (stronger) no-op hint. */ +static inline u32 rv_pause(void) { return 0x0100000Fu; } /* ---- FP (F + D extensions) ---- * funct7 layout: bits[6:2] op-major (e.g. 0x00 FADD, 0x01 FSUB, ...); diff --git a/src/arch/rv64/native.c b/src/arch/rv64/native.c @@ -2885,6 +2885,22 @@ static void rv_intrinsic(NativeTarget* t, IntrinKind kind, const NativeLoc* dsts } return; } + case INTRIN_CPU_NOP: + rv64_emit32(mc, rv_nop()); + return; + case INTRIN_CPU_YIELD: + rv64_emit32(mc, rv_pause()); + return; + case INTRIN_ISB: + rv64_emit32(mc, rv_fence_i()); + return; + case INTRIN_DMB: + case INTRIN_DSB: + rv64_emit32(mc, rv_fence_rw_rw()); + return; + case INTRIN_WFI: + rv64_emit32(mc, rv_wfi()); + return; default: break; } diff --git a/src/arch/wasm/emit.c b/src/arch/wasm/emit.c @@ -1606,6 +1606,30 @@ static const char* intrin_name(IntrinKind k) { return "__builtin_smul_overflow"; case INTRIN_UMUL_OVERFLOW: return "__builtin_umul_overflow"; + case INTRIN_CPU_NOP: + return "cpu_nop"; + case INTRIN_CPU_YIELD: + return "cpu_yield"; + case INTRIN_WFI: + return "wfi"; + case INTRIN_WFE: + return "wfe"; + case INTRIN_SEV: + return "sev"; + case INTRIN_ISB: + return "isb"; + case INTRIN_DMB: + return "dmb"; + case INTRIN_DSB: + return "dsb"; + case INTRIN_IRQ_SAVE: + return "irq_save"; + case INTRIN_IRQ_RESTORE: + return "irq_restore"; + case INTRIN_IRQ_ENABLE: + return "irq_enable"; + case INTRIN_IRQ_DISABLE: + return "irq_disable"; } return "<unknown>"; } @@ -1764,6 +1788,21 @@ void wasm_intrinsic(CGTarget* tg, IntrinKind k, Operand* dst, u32 ndst, intrin_name(k)); return; + /* Baremetal/CPU-control intrinsics have no wasm lowering; + * cfree_cg_target_supports_intrinsic reports them false so frontends + * diagnose before reaching here. Fall through to the generic panic. */ + case INTRIN_CPU_NOP: + case INTRIN_CPU_YIELD: + case INTRIN_WFI: + case INTRIN_WFE: + case INTRIN_SEV: + case INTRIN_ISB: + case INTRIN_DMB: + case INTRIN_DSB: + case INTRIN_IRQ_SAVE: + case INTRIN_IRQ_RESTORE: + case INTRIN_IRQ_ENABLE: + case INTRIN_IRQ_DISABLE: case INTRIN_NONE: break; } diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c @@ -3333,6 +3333,32 @@ static void x64_intrinsic(NativeTarget* t, IntrinKind kind, } return; } + case INTRIN_CPU_NOP: { + u8 b = 0x90; /* NOP */ + mc->emit_bytes(mc, &b, 1); + return; + } + case INTRIN_CPU_YIELD: { + u8 b[2] = {0xF3, 0x90}; /* PAUSE */ + mc->emit_bytes(mc, b, 2); + return; + } + case INTRIN_DMB: + case INTRIN_DSB: { + u8 b[3] = {0x0F, 0xAE, 0xF0}; /* MFENCE: full-system memory barrier */ + mc->emit_bytes(mc, b, 3); + return; + } + case INTRIN_IRQ_DISABLE: { + u8 b = 0xFA; /* CLI (privileged) */ + mc->emit_bytes(mc, &b, 1); + return; + } + case INTRIN_IRQ_ENABLE: { + u8 b = 0xFB; /* STI (privileged) */ + mc->emit_bytes(mc, &b, 1); + return; + } case INTRIN_MEMSET: { u32 dr, n; if (narg != 3u || args[0].kind != NATIVE_LOC_REG || diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -821,24 +821,36 @@ IntrinKind api_map_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, return INTRIN_EXPECT; case CFREE_CG_INTRIN_ASSUME_ALIGNED: return INTRIN_ASSUME_ALIGNED; - case CFREE_CG_INTRIN_FMA: - case CFREE_CG_INTRIN_SYSCALL: + case CFREE_CG_INTRIN_CPU_NOP: + return INTRIN_CPU_NOP; + case CFREE_CG_INTRIN_CPU_YIELD: + return INTRIN_CPU_YIELD; + case CFREE_CG_INTRIN_WFI: + return INTRIN_WFI; + case CFREE_CG_INTRIN_WFE: + return INTRIN_WFE; + case CFREE_CG_INTRIN_SEV: + return INTRIN_SEV; + case CFREE_CG_INTRIN_ISB: + return INTRIN_ISB; + case CFREE_CG_INTRIN_DMB: + return INTRIN_DMB; + case CFREE_CG_INTRIN_DSB: + return INTRIN_DSB; case CFREE_CG_INTRIN_IRQ_SAVE: + return INTRIN_IRQ_SAVE; case CFREE_CG_INTRIN_IRQ_RESTORE: - case CFREE_CG_INTRIN_IRQ_DISABLE: + return INTRIN_IRQ_RESTORE; case CFREE_CG_INTRIN_IRQ_ENABLE: - case CFREE_CG_INTRIN_DMB: - case CFREE_CG_INTRIN_DSB: - case CFREE_CG_INTRIN_ISB: + return INTRIN_IRQ_ENABLE; + case CFREE_CG_INTRIN_IRQ_DISABLE: + return INTRIN_IRQ_DISABLE; + case CFREE_CG_INTRIN_FMA: + case CFREE_CG_INTRIN_SYSCALL: case CFREE_CG_INTRIN_DCACHE_CLEAN: case CFREE_CG_INTRIN_DCACHE_INVALIDATE: case CFREE_CG_INTRIN_DCACHE_CLEAN_INVALIDATE: case CFREE_CG_INTRIN_ICACHE_INVALIDATE: - case CFREE_CG_INTRIN_CPU_NOP: - case CFREE_CG_INTRIN_CPU_YIELD: - case CFREE_CG_INTRIN_WFI: - case CFREE_CG_INTRIN_WFE: - case CFREE_CG_INTRIN_SEV: case CFREE_CG_INTRIN_CORO_SWITCH: return INTRIN_NONE; } @@ -846,8 +858,25 @@ IntrinKind api_map_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, } int api_intrinsic_is_void(CfreeCgIntrinsic intrin) { - return intrin == CFREE_CG_INTRIN_TRAP || intrin == CFREE_CG_INTRIN_LONGJMP || - intrin == CFREE_CG_INTRIN_PREFETCH; + switch (intrin) { + case CFREE_CG_INTRIN_TRAP: + case CFREE_CG_INTRIN_LONGJMP: + case CFREE_CG_INTRIN_PREFETCH: + case CFREE_CG_INTRIN_CPU_NOP: + case CFREE_CG_INTRIN_CPU_YIELD: + case CFREE_CG_INTRIN_WFI: + case CFREE_CG_INTRIN_WFE: + case CFREE_CG_INTRIN_SEV: + case CFREE_CG_INTRIN_ISB: + case CFREE_CG_INTRIN_DMB: + case CFREE_CG_INTRIN_DSB: + case CFREE_CG_INTRIN_IRQ_RESTORE: + case CFREE_CG_INTRIN_IRQ_ENABLE: + case CFREE_CG_INTRIN_IRQ_DISABLE: + return 1; + default: + return 0; + } } int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin) { @@ -859,6 +888,61 @@ int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin) { intrin == CFREE_CG_INTRIN_UMUL_OVERFLOW; } +const char* api_arch_name(CfreeArchKind arch) { + switch (arch) { + case CFREE_ARCH_X86_32: return "x86"; + case CFREE_ARCH_X86_64: return "x86_64"; + case CFREE_ARCH_ARM_32: return "arm"; + case CFREE_ARCH_ARM_64: return "aarch64"; + case CFREE_ARCH_RV32: return "riscv32"; + case CFREE_ARCH_RV64: return "riscv64"; + case CFREE_ARCH_WASM: return "wasm"; + } + return "target"; +} + +const char* api_intrinsic_name(CfreeCgIntrinsic intrin) { + switch (intrin) { + case CFREE_CG_INTRIN_TRAP: return "trap"; + case CFREE_CG_INTRIN_CLZ: return "clz"; + case CFREE_CG_INTRIN_CTZ: return "ctz"; + case CFREE_CG_INTRIN_POPCOUNT: return "popcount"; + case CFREE_CG_INTRIN_BSWAP: return "bswap"; + case CFREE_CG_INTRIN_SETJMP: return "setjmp"; + case CFREE_CG_INTRIN_LONGJMP: return "longjmp"; + case CFREE_CG_INTRIN_SADD_OVERFLOW: return "sadd_overflow"; + case CFREE_CG_INTRIN_UADD_OVERFLOW: return "uadd_overflow"; + case CFREE_CG_INTRIN_SSUB_OVERFLOW: return "ssub_overflow"; + case CFREE_CG_INTRIN_USUB_OVERFLOW: return "usub_overflow"; + case CFREE_CG_INTRIN_SMUL_OVERFLOW: return "smul_overflow"; + case CFREE_CG_INTRIN_UMUL_OVERFLOW: return "umul_overflow"; + case CFREE_CG_INTRIN_FMA: return "fma"; + case CFREE_CG_INTRIN_PREFETCH: return "prefetch"; + case CFREE_CG_INTRIN_EXPECT: return "expect"; + case CFREE_CG_INTRIN_ASSUME_ALIGNED: return "assume_aligned"; + case CFREE_CG_INTRIN_SYSCALL: return "syscall"; + case CFREE_CG_INTRIN_IRQ_SAVE: return "irq_save"; + case CFREE_CG_INTRIN_IRQ_RESTORE: return "irq_restore"; + case CFREE_CG_INTRIN_IRQ_DISABLE: return "irq_disable"; + case CFREE_CG_INTRIN_IRQ_ENABLE: return "irq_enable"; + case CFREE_CG_INTRIN_DMB: return "dmb"; + case CFREE_CG_INTRIN_DSB: return "dsb"; + case CFREE_CG_INTRIN_ISB: return "isb"; + case CFREE_CG_INTRIN_DCACHE_CLEAN: return "dcache_clean"; + case CFREE_CG_INTRIN_DCACHE_INVALIDATE: return "dcache_invalidate"; + case CFREE_CG_INTRIN_DCACHE_CLEAN_INVALIDATE: + return "dcache_clean_invalidate"; + case CFREE_CG_INTRIN_ICACHE_INVALIDATE: return "icache_invalidate"; + case CFREE_CG_INTRIN_CPU_NOP: return "cpu_nop"; + case CFREE_CG_INTRIN_CPU_YIELD: return "cpu_yield"; + case CFREE_CG_INTRIN_WFI: return "wfi"; + case CFREE_CG_INTRIN_WFE: return "wfe"; + case CFREE_CG_INTRIN_SEV: return "sev"; + case CFREE_CG_INTRIN_CORO_SWITCH: return "coro_switch"; + } + return "intrinsic"; +} + void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, CfreeCgTypeId result_type) { CgTarget* T; @@ -876,8 +960,11 @@ void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, rty = resolve_type(g->c, result_type); int_ty = builtin_id(CFREE_CG_BUILTIN_I32); kind = api_map_intrinsic(g, intrin, result_type); - if (kind == INTRIN_NONE) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported intrinsic"); + if (!cfree_cg_target_supports_intrinsic(g->c, intrin) || + kind == INTRIN_NONE) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: target '%s' does not support intrinsic '%s'", + api_arch_name(g->c->target.arch), api_intrinsic_name(intrin)); return; } @@ -895,7 +982,8 @@ void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, if (api_sv_op_is(&svs[idx], OPK_IMM) && (intrin == CFREE_CG_INTRIN_EXPECT || intrin == CFREE_CG_INTRIN_ASSUME_ALIGNED || - intrin == CFREE_CG_INTRIN_PREFETCH)) { + intrin == CFREE_CG_INTRIN_PREFETCH || + intrin == CFREE_CG_INTRIN_DMB || intrin == CFREE_CG_INTRIN_DSB)) { args[idx] = svs[idx].op; } else { args[idx] = api_force_local(g, &svs[idx], aty); diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h @@ -144,6 +144,24 @@ typedef enum IntrinKind { INTRIN_USUB_OVERFLOW, INTRIN_SMUL_OVERFLOW, INTRIN_UMUL_OVERFLOW, + + /* baremetal CPU control — single-instruction, no operands unless noted. + * dsts/args empty except IRQ_SAVE (dsts[0] = saved interrupt state) and + * IRQ_RESTORE (args[0] = state to restore). Privileged forms (WFI/WFE/SEV + * and the IRQ family) trap at user level; backends still emit the one + * instruction and frontends gate any runtime use behind a capability test. */ + INTRIN_CPU_NOP, + INTRIN_CPU_YIELD, + INTRIN_WFI, + INTRIN_WFE, + INTRIN_SEV, + INTRIN_ISB, + INTRIN_DMB, + INTRIN_DSB, + INTRIN_IRQ_SAVE, + INTRIN_IRQ_RESTORE, + INTRIN_IRQ_ENABLE, + INTRIN_IRQ_DISABLE, } IntrinKind; typedef enum OpKind { diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -225,6 +225,8 @@ IntrinKind api_map_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, CfreeCgTypeId result_type); int api_intrinsic_is_void(CfreeCgIntrinsic intrin); int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin); +const char* api_intrinsic_name(CfreeCgIntrinsic intrin); +const char* api_arch_name(CfreeArchKind arch); void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, CfreeCgTypeId result_type); CfreeCgTypeId api_atomic_pointee(CfreeCg* g, CfreeCgTypeId pty, diff --git a/src/cg/type.c b/src/cg/type.c @@ -995,6 +995,72 @@ int cfree_cg_target_supports_symbol_feature(CfreeCompiler* c, return 0; } +int cfree_cg_target_supports_intrinsic(CfreeCompiler* c, + CfreeCgIntrinsic intrin) { + CfreeArchKind arch; + if (!c) return 0; + arch = c->target.arch; + switch (intrin) { + /* Portable intrinsics every backend (native + wasm + C-source) lowers. + * The C-source backend runs under the host's native arch, so it is covered + * by the native arches here. */ + case CFREE_CG_INTRIN_TRAP: + case CFREE_CG_INTRIN_CLZ: + case CFREE_CG_INTRIN_CTZ: + case CFREE_CG_INTRIN_POPCOUNT: + case CFREE_CG_INTRIN_BSWAP: + case CFREE_CG_INTRIN_SADD_OVERFLOW: + case CFREE_CG_INTRIN_UADD_OVERFLOW: + case CFREE_CG_INTRIN_SSUB_OVERFLOW: + case CFREE_CG_INTRIN_USUB_OVERFLOW: + case CFREE_CG_INTRIN_SMUL_OVERFLOW: + case CFREE_CG_INTRIN_UMUL_OVERFLOW: + case CFREE_CG_INTRIN_PREFETCH: + case CFREE_CG_INTRIN_EXPECT: + case CFREE_CG_INTRIN_ASSUME_ALIGNED: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_X86_64 || + arch == CFREE_ARCH_RV64 || arch == CFREE_ARCH_WASM; + + /* Single-instruction CPU control: NOP / YIELD exist on all three native + * arches; the wait/event/barrier/IRQ forms are arch-specific (see the + * per-backend nd_intrinsic switch). */ + case CFREE_CG_INTRIN_CPU_NOP: + case CFREE_CG_INTRIN_CPU_YIELD: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_X86_64 || + arch == CFREE_ARCH_RV64; + case CFREE_CG_INTRIN_ISB: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_RV64; + case CFREE_CG_INTRIN_DMB: + case CFREE_CG_INTRIN_DSB: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_X86_64 || + arch == CFREE_ARCH_RV64; + case CFREE_CG_INTRIN_WFI: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_RV64; + case CFREE_CG_INTRIN_WFE: + case CFREE_CG_INTRIN_SEV: + return arch == CFREE_ARCH_ARM_64; + case CFREE_CG_INTRIN_IRQ_SAVE: + case CFREE_CG_INTRIN_IRQ_RESTORE: + return arch == CFREE_ARCH_ARM_64; + case CFREE_CG_INTRIN_IRQ_ENABLE: + case CFREE_CG_INTRIN_IRQ_DISABLE: + return arch == CFREE_ARCH_ARM_64 || arch == CFREE_ARCH_X86_64; + + /* Not yet implemented on any native backend. */ + case CFREE_CG_INTRIN_SETJMP: + case CFREE_CG_INTRIN_LONGJMP: + case CFREE_CG_INTRIN_FMA: + case CFREE_CG_INTRIN_SYSCALL: + case CFREE_CG_INTRIN_DCACHE_CLEAN: + case CFREE_CG_INTRIN_DCACHE_INVALIDATE: + case CFREE_CG_INTRIN_DCACHE_CLEAN_INVALIDATE: + case CFREE_CG_INTRIN_ICACHE_INVALIDATE: + case CFREE_CG_INTRIN_CORO_SWITCH: + return 0; + } + return 0; +} + uint64_t cfree_cg_target_backend_features(CfreeCompiler* c) { uint64_t out = 0; if (!c) return 0; diff --git a/src/interp/engine.c b/src/interp/engine.c @@ -1423,6 +1423,14 @@ static int interp_intrinsic(InterpStack* st, InterpFunc* fn, u64* regs, return 1; case INTRIN_PREFETCH: return 1; + /* CPU hints and memory barriers have no observable effect in the + * single-threaded interpreter model: treat them as no-ops. */ + case INTRIN_CPU_NOP: + case INTRIN_CPU_YIELD: + case INTRIN_ISB: + case INTRIN_DMB: + case INTRIN_DSB: + return 1; case INTRIN_TRAP: fault(st, "__builtin_trap"); return 0; diff --git a/test/toy/cases/143_baremetal_hints.cbackend.skip b/test/toy/cases/143_baremetal_hints.cbackend.skip @@ -0,0 +1 @@ +C source target has no lowering for baremetal CPU/barrier intrinsics diff --git a/test/toy/cases/143_baremetal_hints.expected b/test/toy/cases/143_baremetal_hints.expected @@ -0,0 +1 @@ +42 diff --git a/test/toy/cases/143_baremetal_hints.toy b/test/toy/cases/143_baremetal_hints.toy @@ -0,0 +1,12 @@ +fn __user_main(): i64 { + // CPU hints and memory barriers shared by all native backends: single + // instructions with no architectural side effect at user level, so they + // execute inline and the function still returns its sentinel. + @cpu_nop(); + @cpu_yield(); + @dmb(.inner); + @dsb(.full); + return 42; +} + +fn main(): i32 { return __user_main() as i32; } diff --git a/test/toy/cases/143_baremetal_hints.wasm.skip b/test/toy/cases/143_baremetal_hints.wasm.skip @@ -0,0 +1 @@ +wasm has no lowering for baremetal CPU/barrier/IRQ intrinsics diff --git a/test/toy/cases/144_intrinsic_capability_query.expected b/test/toy/cases/144_intrinsic_capability_query.expected @@ -0,0 +1 @@ +42 diff --git a/test/toy/cases/144_intrinsic_capability_query.toy b/test/toy/cases/144_intrinsic_capability_query.toy @@ -0,0 +1,20 @@ +fn __user_main(): i64 { + // @supports_intrinsic is a compile-time capability query: it folds to a + // boolean constant for the selected target. nop/yield are lowerable on every + // native backend; the dmb/dsb barriers likewise. This case only queries (it + // emits no arch-specific instruction) so it runs unchanged on every target. + let nop_ok: bool = @supports_intrinsic(.cpu_nop); + let yield_ok: bool = @supports_intrinsic(.cpu_yield); + let dmb_ok: bool = @supports_intrinsic(.dmb); + let dsb_ok: bool = @supports_intrinsic(.dsb); + // syscall/coro_switch have no native lowering yet on any backend. + let syscall_unsupported: bool = !@supports_intrinsic(.syscall); + let coro_unsupported: bool = !@supports_intrinsic(.coro_switch); + if nop_ok and yield_ok and dmb_ok and dsb_ok and + syscall_unsupported and coro_unsupported { + return 42; + } + return 1; +} + +fn main(): i32 { return __user_main() as i32; } diff --git a/test/toy/cases/144_intrinsic_capability_query.wasm.skip b/test/toy/cases/144_intrinsic_capability_query.wasm.skip @@ -0,0 +1 @@ +wasm has no lowering for baremetal CPU/barrier/IRQ intrinsics diff --git a/test/toy/cases/145_baremetal_privileged_aa64.cbackend.skip b/test/toy/cases/145_baremetal_privileged_aa64.cbackend.skip @@ -0,0 +1 @@ +C source target has no lowering for baremetal CPU/barrier/IRQ intrinsics diff --git a/test/toy/cases/145_baremetal_privileged_aa64.expected b/test/toy/cases/145_baremetal_privileged_aa64.expected @@ -0,0 +1 @@ +42 diff --git a/test/toy/cases/145_baremetal_privileged_aa64.toy b/test/toy/cases/145_baremetal_privileged_aa64.toy @@ -0,0 +1,24 @@ +// aarch64-only: the wait/event and interrupt-mask (DAIF) intrinsics are real +// single instructions but are privileged at EL0 (or stall), so they are +// emitted inside a runtime-false branch — compiled and lowered through CG, but +// never executed by the harness. Gated to aarch64 because wfe/sev and the +// irq_save/irq_restore DAIF pair have no x64/rv64 lowering today. +// @target_arch() is 1 on aarch64; this case only ever compiles for aarch64 +// (default host paths), where the guard below is statically false at runtime. +fn __user_main(): i64 { + let arch: i64 = @target_arch(); + if arch != 1 { + // Dead at runtime on aarch64, still fully code-generated at -O0. + @wfi(); + @wfe(); + @sev(); + @isb(); + @irq_disable(); + @irq_enable(); + let flags: usize = @irq_save(); + @irq_restore(flags); + } + return 42; +} + +fn main(): i32 { return __user_main() as i32; } diff --git a/test/toy/cases/145_baremetal_privileged_aa64.wasm.skip b/test/toy/cases/145_baremetal_privileged_aa64.wasm.skip @@ -0,0 +1 @@ +wasm has no lowering for baremetal CPU/barrier/IRQ intrinsics diff --git a/test/toy/err/unsupported_cpu_nop.expected b/test/toy/err/unsupported_cpu_nop.expected @@ -1 +0,0 @@ -unsupported intrinsic diff --git a/test/toy/err/unsupported_cpu_nop.toy b/test/toy/err/unsupported_cpu_nop.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @cpu_nop(); - return 42; -} diff --git a/test/toy/err/unsupported_dmb.expected b/test/toy/err/unsupported_dmb.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_dmb.toy b/test/toy/err/unsupported_dmb.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @dmb(.inner); - return 0; -} diff --git a/test/toy/err/unsupported_dsb.expected b/test/toy/err/unsupported_dsb.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_dsb.toy b/test/toy/err/unsupported_dsb.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @dsb(.full); - return 0; -} diff --git a/test/toy/err/unsupported_irq_restore.expected b/test/toy/err/unsupported_irq_restore.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_irq_restore.toy b/test/toy/err/unsupported_irq_restore.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @irq_restore(0); - return 0; -} diff --git a/test/toy/err/unsupported_irq_save.expected b/test/toy/err/unsupported_irq_save.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_irq_save.toy b/test/toy/err/unsupported_irq_save.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - let flags: usize = @irq_save(); - return flags as i64; -} diff --git a/test/toy/err/unsupported_sev.expected b/test/toy/err/unsupported_sev.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_sev.toy b/test/toy/err/unsupported_sev.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @sev(); - return 0; -} diff --git a/test/toy/err/unsupported_wfe.expected b/test/toy/err/unsupported_wfe.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_wfe.toy b/test/toy/err/unsupported_wfe.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @wfe(); - return 0; -} diff --git a/test/toy/err/unsupported_wfi.expected b/test/toy/err/unsupported_wfi.expected @@ -1 +0,0 @@ -unsupported target intrinsic diff --git a/test/toy/err/unsupported_wfi.toy b/test/toy/err/unsupported_wfi.toy @@ -1,4 +0,0 @@ -fn main(): i64 { - @wfi(); - return 0; -}