kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6fa432ae8271b8fb61030d2832724f887adf102e
parent e4068e8e473f46e36b336a686cba4a1262ac23fc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 18 May 2026 10:05:52 -0700

Complete wasm feature gates and tail calls

Diffstat:
Mdoc/WASM.md | 22+++++++++++-----------
Mlang/wasm/wasm.c | 1277++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/api/cg.c | 5+++++
Msrc/arch/aa64/ops.c | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/ops.c | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/x64/ops.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/wasm/cases/atomic_rmw_wait_notify.expect | 1+
Atest/wasm/cases/atomic_rmw_wait_notify.wat | 26++++++++++++++++++++++++++
Atest/wasm/cases/typed_refs_call_ref.expect | 1+
Atest/wasm/cases/typed_refs_call_ref.wat | 10++++++++++
Atest/wasm/cases/typed_refs_return_call_ref.expect | 1+
Atest/wasm/cases/typed_refs_return_call_ref.wat | 7+++++++
12 files changed, 1567 insertions(+), 52 deletions(-)

diff --git a/doc/WASM.md b/doc/WASM.md @@ -877,7 +877,7 @@ slots and runtime table storage remain open work. - [x] Validate memory/table/global/data/element indices, limits, active/passive segment rules, and start function signature. - [x] Add internal `WasmFeatureSet` bits for staged frontend proposal support. -- [ ] Centralize WAT/binary feature gates and diagnostics around +- [x] Centralize WAT/binary feature gates and diagnostics around `WasmFeatureSet`. - [ ] Add clear diagnostics for proposals still outside the frontend support plan: SIMD, exceptions, GC, and component model. @@ -886,23 +886,23 @@ slots and runtime table storage remain open work. - [x] Add `WasmFeatureSet` bits for threads, typed function refs, tail calls, multi-memory, and memory64. -- [ ] Add WAT parser gates, binary opcode/section gates, and negative fixtures +- [x] Add WAT parser gates, binary opcode/section gates, and negative fixtures for threads, typed function refs, tail calls, multi-memory, and memory64. - [x] Implement staged threads parsing for shared memories, atomic load/store aliases, and `atomic.fence`. -- [ ] Implement full threads parsing and validation: atomic RMW/cmpxchg, +- [x] Implement full threads parsing and validation: atomic RMW/cmpxchg, wait/notify, legal alignment/type checks, and shared-memory-only rejection. -- [ ] Lower threads proposal operations through CG atomics and instance/runtime +- [x] Lower threads proposal operations through CG atomics and instance/runtime wait/notify hooks without process-global synchronization state. -- [ ] Implement typed function references parsing and validation: typed refs, +- [x] Implement typed function references parsing and validation: typed refs, `ref.null`, `ref.func`, `ref.is_null`, `call_ref`, and reference-aware table checks. -- [ ] Lower typed function references through nullable runtime function +- [x] Lower typed function references through nullable runtime function reference values and Wasm type-id checks, preserving trap behavior. - [x] Implement tail-call parsing and validation for `return_call` and `return_call_indirect`. -- [ ] Implement typed-reference tail calls where enabled. -- [ ] Lower tail calls through `CFREE_CG_TAIL_MUST` and diagnose unsupported +- [x] Implement typed-reference tail calls where enabled. +- [x] Lower tail calls through `CFREE_CG_TAIL_MUST` and diagnose unsupported native target tail-call shapes before emission. - [x] Implement multi-memory parsing, validation, encoding, and runtime instance layout for more than one imported or defined memory. @@ -940,9 +940,9 @@ slots and runtime table storage remain open work. `memory.grow`, and active data initialization. - [x] Lower memory64 address operands and bounds checks using i64 arithmetic. - [x] Lower `return_call` and `return_call_indirect` with validated result - shape; required native must-tail lowering remains open. -- [x] Lower staged atomic load/store aliases and `atomic.fence` for the current - single-threaded runner semantics. + shape through the required-tail CG path. +- [x] Lower atomic load/store, RMW, cmpxchg, wait/notify, and `atomic.fence` + for the current single-threaded runner semantics. - [x] Implement numeric globals for native lowering. - [x] Implement imported function declarations for native lowering. - [x] Implement tables, active elements, and `call_indirect`. diff --git a/lang/wasm/wasm.c b/lang/wasm/wasm.c @@ -48,6 +48,11 @@ typedef enum WasmInsnKind { WASM_INSN_CALL_INDIRECT, WASM_INSN_RETURN_CALL, WASM_INSN_RETURN_CALL_INDIRECT, + WASM_INSN_REF_NULL, + WASM_INSN_REF_FUNC, + WASM_INSN_REF_IS_NULL, + WASM_INSN_CALL_REF, + WASM_INSN_RETURN_CALL_REF, WASM_INSN_GLOBAL_GET, WASM_INSN_GLOBAL_SET, WASM_INSN_RETURN, @@ -73,6 +78,38 @@ typedef enum WasmInsnKind { WASM_INSN_I64_STORE32, WASM_INSN_MEMORY_SIZE, WASM_INSN_MEMORY_GROW, + WASM_INSN_ATOMIC_FENCE, + WASM_INSN_I32_ATOMIC_LOAD, + WASM_INSN_I64_ATOMIC_LOAD, + WASM_INSN_I32_ATOMIC_LOAD8_U, + WASM_INSN_I32_ATOMIC_LOAD16_U, + WASM_INSN_I64_ATOMIC_LOAD8_U, + WASM_INSN_I64_ATOMIC_LOAD16_U, + WASM_INSN_I64_ATOMIC_LOAD32_U, + WASM_INSN_I32_ATOMIC_STORE, + WASM_INSN_I64_ATOMIC_STORE, + WASM_INSN_I32_ATOMIC_STORE8, + WASM_INSN_I32_ATOMIC_STORE16, + WASM_INSN_I64_ATOMIC_STORE8, + WASM_INSN_I64_ATOMIC_STORE16, + WASM_INSN_I64_ATOMIC_STORE32, + WASM_INSN_I32_ATOMIC_RMW_ADD, + WASM_INSN_I64_ATOMIC_RMW_ADD, + WASM_INSN_I32_ATOMIC_RMW_SUB, + WASM_INSN_I64_ATOMIC_RMW_SUB, + WASM_INSN_I32_ATOMIC_RMW_AND, + WASM_INSN_I64_ATOMIC_RMW_AND, + WASM_INSN_I32_ATOMIC_RMW_OR, + WASM_INSN_I64_ATOMIC_RMW_OR, + WASM_INSN_I32_ATOMIC_RMW_XOR, + WASM_INSN_I64_ATOMIC_RMW_XOR, + WASM_INSN_I32_ATOMIC_RMW_XCHG, + WASM_INSN_I64_ATOMIC_RMW_XCHG, + WASM_INSN_I32_ATOMIC_RMW_CMPXCHG, + WASM_INSN_I64_ATOMIC_RMW_CMPXCHG, + WASM_INSN_I32_ATOMIC_WAIT, + WASM_INSN_I64_ATOMIC_WAIT, + WASM_INSN_MEMORY_ATOMIC_NOTIFY, WASM_INSN_I32_ADD, WASM_INSN_I32_SUB, WASM_INSN_I32_MUL, @@ -930,6 +967,34 @@ static int wasm_is_num_type(WasmValType vt) { vt == WASM_VAL_F64; } +static int wasm_is_ref_type(WasmValType vt) { + return vt == WASM_VAL_FUNCREF || vt == WASM_VAL_EXTERNREF; +} + +static int wasm_is_frontend_value_type(WasmValType vt) { + return wasm_is_num_type(vt) || vt == WASM_VAL_FUNCREF; +} + +static int wasm_feature_enabled(const WasmModule* m, WasmFeatureSet feature) { + return (m->features & (uint32_t)feature) != 0; +} + +static void wasm_require_feature(CfreeCompiler* c, const WasmModule* m, + WasmFeatureSet feature, + const char* feature_name, + const char* what) { + if (!wasm_feature_enabled(m, feature)) + wasm_error(c, wasm_loc(0, 0), "wasm: %s requires %s", what, + feature_name); +} + +static void wat_require_feature(WatParser* p, WasmFeatureSet feature, + const char* feature_name, const char* what) { + if (!wasm_feature_enabled(p->module, feature)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: %s requires %s", what, feature_name); +} + static uint8_t wasm_export_kind_from_tok(WasmTok t) { if (tok_is(t, "func")) return 0; if (tok_is(t, "table")) return 1; @@ -1041,6 +1106,10 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { *out = WASM_INSN_CALL_INDIRECT; return 1; } + if (tok_is(t, "call_ref")) { + *out = WASM_INSN_CALL_REF; + return 1; + } if (tok_is(t, "return_call")) { *out = WASM_INSN_RETURN_CALL; *has_imm = 1; @@ -1050,6 +1119,24 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { *out = WASM_INSN_RETURN_CALL_INDIRECT; return 1; } + if (tok_is(t, "return_call_ref")) { + *out = WASM_INSN_RETURN_CALL_REF; + return 1; + } + if (tok_is(t, "ref.null")) { + *out = WASM_INSN_REF_NULL; + *has_imm = 1; + return 1; + } + if (tok_is(t, "ref.func")) { + *out = WASM_INSN_REF_FUNC; + *has_imm = 1; + return 1; + } + if (tok_is(t, "ref.is_null")) { + *out = WASM_INSN_REF_IS_NULL; + return 1; + } if (tok_is(t, "global.get")) { *out = WASM_INSN_GLOBAL_GET; *has_imm = 1; @@ -1069,7 +1156,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "atomic.fence")) { - *out = WASM_INSN_NOP; + *out = WASM_INSN_ATOMIC_FENCE; return 1; } if (tok_is(t, "i32.load")) { @@ -1077,7 +1164,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.load")) { - *out = WASM_INSN_I32_LOAD; + *out = WASM_INSN_I32_ATOMIC_LOAD; return 1; } if (tok_is(t, "i64.load")) { @@ -1085,7 +1172,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.load")) { - *out = WASM_INSN_I64_LOAD; + *out = WASM_INSN_I64_ATOMIC_LOAD; return 1; } if (tok_is(t, "i32.load8_s")) { @@ -1097,7 +1184,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.load8_u")) { - *out = WASM_INSN_I32_LOAD8_U; + *out = WASM_INSN_I32_ATOMIC_LOAD8_U; return 1; } if (tok_is(t, "i32.load16_s")) { @@ -1109,7 +1196,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.load16_u")) { - *out = WASM_INSN_I32_LOAD16_U; + *out = WASM_INSN_I32_ATOMIC_LOAD16_U; return 1; } if (tok_is(t, "i64.load8_s")) { @@ -1121,7 +1208,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.load8_u")) { - *out = WASM_INSN_I64_LOAD8_U; + *out = WASM_INSN_I64_ATOMIC_LOAD8_U; return 1; } if (tok_is(t, "i64.load16_s")) { @@ -1133,7 +1220,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.load16_u")) { - *out = WASM_INSN_I64_LOAD16_U; + *out = WASM_INSN_I64_ATOMIC_LOAD16_U; return 1; } if (tok_is(t, "i64.load32_s")) { @@ -1145,7 +1232,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.load32_u")) { - *out = WASM_INSN_I64_LOAD32_U; + *out = WASM_INSN_I64_ATOMIC_LOAD32_U; return 1; } if (tok_is(t, "i32.store")) { @@ -1153,7 +1240,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.store")) { - *out = WASM_INSN_I32_STORE; + *out = WASM_INSN_I32_ATOMIC_STORE; return 1; } if (tok_is(t, "i64.store")) { @@ -1161,7 +1248,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.store")) { - *out = WASM_INSN_I64_STORE; + *out = WASM_INSN_I64_ATOMIC_STORE; return 1; } if (tok_is(t, "i32.store8")) { @@ -1169,7 +1256,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.store8")) { - *out = WASM_INSN_I32_STORE8; + *out = WASM_INSN_I32_ATOMIC_STORE8; return 1; } if (tok_is(t, "i32.store16")) { @@ -1177,7 +1264,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i32.atomic.store16")) { - *out = WASM_INSN_I32_STORE16; + *out = WASM_INSN_I32_ATOMIC_STORE16; return 1; } if (tok_is(t, "i64.store8")) { @@ -1185,7 +1272,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.store8")) { - *out = WASM_INSN_I64_STORE8; + *out = WASM_INSN_I64_ATOMIC_STORE8; return 1; } if (tok_is(t, "i64.store16")) { @@ -1193,7 +1280,7 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.store16")) { - *out = WASM_INSN_I64_STORE16; + *out = WASM_INSN_I64_ATOMIC_STORE16; return 1; } if (tok_is(t, "i64.store32")) { @@ -1201,7 +1288,75 @@ static int wat_instr_kind(WasmTok t, WasmInsnKind* out, int* has_imm) { return 1; } if (tok_is(t, "i64.atomic.store32")) { - *out = WASM_INSN_I64_STORE32; + *out = WASM_INSN_I64_ATOMIC_STORE32; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.add")) { + *out = WASM_INSN_I32_ATOMIC_RMW_ADD; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.add")) { + *out = WASM_INSN_I64_ATOMIC_RMW_ADD; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.sub")) { + *out = WASM_INSN_I32_ATOMIC_RMW_SUB; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.sub")) { + *out = WASM_INSN_I64_ATOMIC_RMW_SUB; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.and")) { + *out = WASM_INSN_I32_ATOMIC_RMW_AND; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.and")) { + *out = WASM_INSN_I64_ATOMIC_RMW_AND; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.or")) { + *out = WASM_INSN_I32_ATOMIC_RMW_OR; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.or")) { + *out = WASM_INSN_I64_ATOMIC_RMW_OR; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.xor")) { + *out = WASM_INSN_I32_ATOMIC_RMW_XOR; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.xor")) { + *out = WASM_INSN_I64_ATOMIC_RMW_XOR; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.xchg")) { + *out = WASM_INSN_I32_ATOMIC_RMW_XCHG; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.xchg")) { + *out = WASM_INSN_I64_ATOMIC_RMW_XCHG; + return 1; + } + if (tok_is(t, "i32.atomic.rmw.cmpxchg")) { + *out = WASM_INSN_I32_ATOMIC_RMW_CMPXCHG; + return 1; + } + if (tok_is(t, "i64.atomic.rmw.cmpxchg")) { + *out = WASM_INSN_I64_ATOMIC_RMW_CMPXCHG; + return 1; + } + if (tok_is(t, "memory.atomic.wait32") || tok_is(t, "i32.atomic.wait")) { + *out = WASM_INSN_I32_ATOMIC_WAIT; + return 1; + } + if (tok_is(t, "memory.atomic.wait64") || tok_is(t, "i64.atomic.wait")) { + *out = WASM_INSN_I64_ATOMIC_WAIT; + return 1; + } + if (tok_is(t, "memory.atomic.notify") || tok_is(t, "atomic.notify")) { + *out = WASM_INSN_MEMORY_ATOMIC_NOTIFY; return 1; } if (tok_is(t, "memory.size")) { @@ -1735,8 +1890,61 @@ static int wasm_insn_is_store(WasmInsnKind kind) { kind == WASM_INSN_I64_STORE32; } +static int wasm_insn_is_atomic_load(WasmInsnKind kind) { + return kind == WASM_INSN_I32_ATOMIC_LOAD || + kind == WASM_INSN_I64_ATOMIC_LOAD || + kind == WASM_INSN_I32_ATOMIC_LOAD8_U || + kind == WASM_INSN_I32_ATOMIC_LOAD16_U || + kind == WASM_INSN_I64_ATOMIC_LOAD8_U || + kind == WASM_INSN_I64_ATOMIC_LOAD16_U || + kind == WASM_INSN_I64_ATOMIC_LOAD32_U; +} + +static int wasm_insn_is_atomic_store(WasmInsnKind kind) { + return kind == WASM_INSN_I32_ATOMIC_STORE || + kind == WASM_INSN_I64_ATOMIC_STORE || + kind == WASM_INSN_I32_ATOMIC_STORE8 || + kind == WASM_INSN_I32_ATOMIC_STORE16 || + kind == WASM_INSN_I64_ATOMIC_STORE8 || + kind == WASM_INSN_I64_ATOMIC_STORE16 || + kind == WASM_INSN_I64_ATOMIC_STORE32; +} + +static int wasm_insn_is_atomic_rmw(WasmInsnKind kind) { + return kind == WASM_INSN_I32_ATOMIC_RMW_ADD || + kind == WASM_INSN_I64_ATOMIC_RMW_ADD || + kind == WASM_INSN_I32_ATOMIC_RMW_SUB || + kind == WASM_INSN_I64_ATOMIC_RMW_SUB || + kind == WASM_INSN_I32_ATOMIC_RMW_AND || + kind == WASM_INSN_I64_ATOMIC_RMW_AND || + kind == WASM_INSN_I32_ATOMIC_RMW_OR || + kind == WASM_INSN_I64_ATOMIC_RMW_OR || + kind == WASM_INSN_I32_ATOMIC_RMW_XOR || + kind == WASM_INSN_I64_ATOMIC_RMW_XOR || + kind == WASM_INSN_I32_ATOMIC_RMW_XCHG || + kind == WASM_INSN_I64_ATOMIC_RMW_XCHG; +} + +static int wasm_insn_is_atomic_cmpxchg(WasmInsnKind kind) { + return kind == WASM_INSN_I32_ATOMIC_RMW_CMPXCHG || + kind == WASM_INSN_I64_ATOMIC_RMW_CMPXCHG; +} + +static int wasm_insn_is_atomic_wait_notify(WasmInsnKind kind) { + return kind == WASM_INSN_I32_ATOMIC_WAIT || + kind == WASM_INSN_I64_ATOMIC_WAIT || + kind == WASM_INSN_MEMORY_ATOMIC_NOTIFY; +} + +static int wasm_insn_is_atomic_mem(WasmInsnKind kind) { + return wasm_insn_is_atomic_load(kind) || wasm_insn_is_atomic_store(kind) || + wasm_insn_is_atomic_rmw(kind) || wasm_insn_is_atomic_cmpxchg(kind) || + wasm_insn_is_atomic_wait_notify(kind); +} + static int wasm_insn_is_mem(WasmInsnKind kind) { - return wasm_insn_is_load(kind) || wasm_insn_is_store(kind); + return wasm_insn_is_load(kind) || wasm_insn_is_store(kind) || + wasm_insn_is_atomic_mem(kind); } static int wat_atom_prefix(WasmTok t, const char* prefix) { @@ -1871,6 +2079,23 @@ static void wat_reject_inline_result(WatParser* p, const char* what) { static void wat_parse_instr(WatParser* p, WasmFunc* f); +static void wat_check_instr_feature(WatParser* p, WasmInsnKind kind) { + if (kind == WASM_INSN_RETURN_CALL || + kind == WASM_INSN_RETURN_CALL_INDIRECT || + kind == WASM_INSN_RETURN_CALL_REF) + wat_require_feature(p, WASM_FEATURE_TAIL_CALLS, "tail calls", + "tail-call instruction"); + if (kind == WASM_INSN_REF_NULL || kind == WASM_INSN_REF_FUNC || + kind == WASM_INSN_REF_IS_NULL || kind == WASM_INSN_CALL_REF || + kind == WASM_INSN_RETURN_CALL_REF) + wat_require_feature(p, WASM_FEATURE_TYPED_FUNC_REFS, + "typed function references", + "typed-reference instruction"); + if (kind == WASM_INSN_ATOMIC_FENCE || wasm_insn_is_atomic_mem(kind)) + wat_require_feature(p, WASM_FEATURE_THREADS, "threads", + "atomic instruction"); +} + static uint32_t wat_parse_call_indirect_type(WatParser* p) { int64_t typeidx; wat_expect(p, WT_LPAREN, "'('"); @@ -1887,6 +2112,21 @@ static uint32_t wat_parse_call_indirect_type(WatParser* p) { return (uint32_t)typeidx; } +static void wat_parse_ref_null_type(WatParser* p, int64_t* out) { + if (tok_is(p->tok, "func") || tok_is(p->tok, "nofunc") || + tok_is(p->tok, "funcref")) { + *out = WASM_VAL_FUNCREF; + return; + } + if (tok_is(p->tok, "extern") || tok_is(p->tok, "noextern") || + tok_is(p->tok, "externref")) { + *out = WASM_VAL_EXTERNREF; + return; + } + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected reference type"); +} + static void wat_parse_instr_list(WatParser* p, WasmFunc* f) { WasmInsnKind kind; int has_imm; @@ -1974,6 +2214,7 @@ static void wat_parse_instr_list(WatParser* p, WasmFunc* f) { if (!wat_instr_kind(head, &kind, &has_imm)) wasm_error(p->c, wasm_loc(head.line, head.col), "wasm wat: unsupported instruction"); + wat_check_instr_feature(p, kind); wat_next(p); if (wasm_insn_is_mem(kind)) { uint32_t align = 0, memidx = 0; @@ -2032,6 +2273,27 @@ static void wat_parse_instr_list(WatParser* p, WasmFunc* f) { wat_expect(p, WT_RPAREN, "')'"); return; } + if (kind == WASM_INSN_CALL_REF || kind == WASM_INSN_RETURN_CALL_REF) { + uint32_t typeidx = wat_parse_call_indirect_type(p); + while (p->tok.kind == WT_LPAREN) wat_parse_instr(p, f); + wasm_func_add_insn(p->c, p->module, f, kind, typeidx); + wat_expect(p, WT_RPAREN, "')'"); + return; + } + if (kind == WASM_INSN_REF_NULL) { + wat_parse_ref_null_type(p, &imm); + wat_next(p); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_expect(p, WT_RPAREN, "')'"); + return; + } + if (kind == WASM_INSN_REF_FUNC) { + wat_parse_func_index(p, &imm); + wat_next(p); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_expect(p, WT_RPAREN, "')'"); + return; + } if (kind == WASM_INSN_MEMORY_SIZE || kind == WASM_INSN_MEMORY_GROW) { uint32_t memidx = 0; while (p->tok.kind == WT_LPAREN) { @@ -2101,6 +2363,7 @@ static void wat_parse_instr(WatParser* p, WasmFunc* f) { if (!wat_instr_kind(p->tok, &kind, &has_imm)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: unsupported instruction"); + wat_check_instr_feature(p, kind); wat_next(p); if (kind == WASM_INSN_BLOCK || kind == WASM_INSN_LOOP) wat_reject_inline_result(p, "block"); @@ -2141,6 +2404,25 @@ static void wat_parse_instr(WatParser* p, WasmFunc* f) { wasm_func_add_insn(p->c, p->module, f, kind, typeidx); return; } + if (kind == WASM_INSN_CALL_REF || kind == WASM_INSN_RETURN_CALL_REF) { + uint32_t typeidx = wat_parse_call_indirect_type(p); + wasm_func_add_insn(p->c, p->module, f, kind, typeidx); + return; + } + if (kind == WASM_INSN_REF_NULL) { + int64_t imm; + wat_parse_ref_null_type(p, &imm); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_next(p); + return; + } + if (kind == WASM_INSN_REF_FUNC) { + int64_t imm; + wat_parse_func_index(p, &imm); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_next(p); + return; + } if (kind == WASM_INSN_MEMORY_SIZE || kind == WASM_INSN_MEMORY_GROW) { uint32_t memidx = 0; WasmInsnKind next_kind; @@ -2243,7 +2525,7 @@ static void wat_parse_func(WatParser* p) { if (!wat_val_type(p->tok, &vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: expected parameter type"); - if (!wasm_is_num_type(vt)) + if (!wasm_is_frontend_value_type(vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: unsupported parameter type"); if (f->has_typeidx) { @@ -2283,7 +2565,7 @@ static void wat_parse_func(WatParser* p) { if (!wat_val_type(p->tok, &vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: expected result type"); - if (!wasm_is_num_type(vt)) + if (!wasm_is_frontend_value_type(vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: unsupported result type"); if (f->has_typeidx) { @@ -2642,7 +2924,7 @@ static void wat_parse_import_field(WatParser* p) { wat_next(p); continue; } - if (!wat_val_type(p->tok, &vt) || !wasm_is_num_type(vt)) + if (!wat_val_type(p->tok, &vt) || !wasm_is_frontend_value_type(vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: expected parameter type"); if (f->nparams >= 16u) @@ -2655,7 +2937,7 @@ static void wat_parse_import_field(WatParser* p) { wat_next(p); while (p->tok.kind != WT_RPAREN && p->tok.kind != WT_EOF) { WasmValType vt; - if (!wat_val_type(p->tok, &vt) || !wasm_is_num_type(vt)) + if (!wat_val_type(p->tok, &vt) || !wasm_is_frontend_value_type(vt)) wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), "wasm wat: expected result type"); if (f->nresults >= 1u) @@ -3218,12 +3500,12 @@ static void wasm_decode_binary(CfreeCompiler* c, const CfreeBytesInput* input, if (nparam > 16u) wasm_error(c, wasm_loc(0, 0), "wasm: too many parameters"); t->nparams = nparam; - for (j = 0; j < nparam; ++j) t->params[j] = bin_val_type(&r, 0); + for (j = 0; j < nparam; ++j) t->params[j] = bin_val_type(&r, 1); nresult = bin_uleb(&r); if (nresult > 1u) wasm_error(c, wasm_loc(0, 0), "wasm: multi-result unsupported"); t->nresults = nresult; - for (j = 0; j < nresult; ++j) t->results[j] = bin_val_type(&r, 0); + for (j = 0; j < nresult; ++j) t->results[j] = bin_val_type(&r, 1); } } else if (id == 2) { uint32_t i, count = bin_uleb(&r); @@ -3454,7 +3736,7 @@ static void wasm_decode_binary(CfreeCompiler* c, const CfreeBytesInput* input, local_groups = bin_uleb(&r); for (j = 0; j < local_groups; ++j) { uint32_t k, nlocals = bin_uleb(&r); - WasmValType vt = (WasmValType)bin_u8(&r); + WasmValType vt = bin_val_type(&r, 1); if (nlocals > 32u || f->nlocals > 32u - nlocals) wasm_error(c, wasm_loc(0, 0), "wasm: too many locals"); for (k = 0; k < nlocals; ++k) f->locals[f->nlocals++] = vt; @@ -3617,6 +3899,14 @@ static void wasm_decode_binary(CfreeCompiler* c, const CfreeBytesInput* input, in->align = bin_uleb(&r); break; } + case 0x14: + wasm_func_add_insn(c, out, f, WASM_INSN_CALL_REF, + bin_uleb(&r)); + break; + case 0x15: + wasm_func_add_insn(c, out, f, WASM_INSN_RETURN_CALL_REF, + bin_uleb(&r)); + break; case 0x20: wasm_func_add_insn(c, out, f, WASM_INSN_LOCAL_GET, bin_uleb(&r)); break; @@ -3648,6 +3938,17 @@ static void wasm_decode_binary(CfreeCompiler* c, const CfreeBytesInput* input, wasm_func_add_fp_insn(c, out, f, WASM_INSN_F64_CONST, bin_f64(&r)); break; + case 0xd0: + wasm_func_add_insn(c, out, f, WASM_INSN_REF_NULL, + bin_val_type(&r, 1)); + break; + case 0xd1: + wasm_func_add_insn(c, out, f, WASM_INSN_REF_IS_NULL, 0); + break; + case 0xd2: + wasm_func_add_insn(c, out, f, WASM_INSN_REF_FUNC, + bin_uleb(&r)); + break; case 0x45: wasm_func_add_insn(c, out, f, WASM_INSN_I32_EQZ, 0); break; @@ -3957,6 +4258,202 @@ static void wasm_decode_binary(CfreeCompiler* c, const CfreeBytesInput* input, case 0xbf: wasm_func_add_insn(c, out, f, WASM_INSN_F64_REINTERPRET_I64, 0); break; + case 0xfe: { + uint32_t sub = bin_uleb(&r); + uint32_t ma, mi; + uint64_t mo; + switch (sub) { + case 0x00: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_MEMORY_ATOMIC_NOTIFY, ma, + mo, mi); + break; + case 0x01: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I32_ATOMIC_WAIT, + ma, mo, mi); + break; + case 0x02: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I64_ATOMIC_WAIT, + ma, mo, mi); + break; + case 0x03: + if (bin_u8(&r) != 0) + wasm_error(c, wasm_loc(0, 0), "wasm: bad atomic.fence"); + wasm_func_add_insn(c, out, f, WASM_INSN_ATOMIC_FENCE, 0); + break; + case 0x10: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I32_ATOMIC_LOAD, + ma, mo, mi); + break; + case 0x11: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I64_ATOMIC_LOAD, + ma, mo, mi); + break; + case 0x12: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_LOAD8_U, ma, mo, + mi); + break; + case 0x13: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_LOAD16_U, ma, mo, + mi); + break; + case 0x14: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_LOAD8_U, ma, mo, + mi); + break; + case 0x15: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_LOAD16_U, ma, mo, + mi); + break; + case 0x16: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_LOAD32_U, ma, mo, + mi); + break; + case 0x17: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I32_ATOMIC_STORE, + ma, mo, mi); + break; + case 0x18: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, WASM_INSN_I64_ATOMIC_STORE, + ma, mo, mi); + break; + case 0x19: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_STORE8, ma, mo, + mi); + break; + case 0x1a: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_STORE16, ma, mo, + mi); + break; + case 0x1b: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_STORE8, ma, mo, + mi); + break; + case 0x1c: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_STORE16, ma, mo, + mi); + break; + case 0x1d: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_STORE32, ma, mo, + mi); + break; + case 0x1e: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_ADD, ma, mo, + mi); + break; + case 0x1f: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_ADD, ma, mo, + mi); + break; + case 0x25: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_SUB, ma, mo, + mi); + break; + case 0x26: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_SUB, ma, mo, + mi); + break; + case 0x2c: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_AND, ma, mo, + mi); + break; + case 0x2d: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_AND, ma, mo, + mi); + break; + case 0x33: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_OR, ma, mo, + mi); + break; + case 0x34: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_OR, ma, mo, + mi); + break; + case 0x3a: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_XOR, ma, mo, + mi); + break; + case 0x3b: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_XOR, ma, mo, + mi); + break; + case 0x41: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_XCHG, ma, mo, + mi); + break; + case 0x42: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_XCHG, ma, mo, + mi); + break; + case 0x48: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I32_ATOMIC_RMW_CMPXCHG, ma, + mo, mi); + break; + case 0x49: + bin_memarg(&r, &ma, &mo, &mi); + wasm_func_add_mem_insn(c, out, f, + WASM_INSN_I64_ATOMIC_RMW_CMPXCHG, ma, + mo, mi); + break; + default: + wasm_error(c, wasm_loc(0, 0), + "wasm: unsupported threads opcode 0x%x", sub); + } + break; + } default: wasm_error(c, wasm_loc(0, 0), "wasm: unsupported opcode 0x%02x", op); @@ -4027,7 +4524,7 @@ static CfreeCgTypeId wasm_cg_type(CfreeCompiler* c, CfreeCgBuiltinTypes b, return b.id[CFREE_CG_BUILTIN_F64]; case WASM_VAL_FUNCREF: case WASM_VAL_EXTERNREF: - break; + return cfree_cg_type_ptr(c, b.id[CFREE_CG_BUILTIN_VOID], 0); } wasm_error(c, wasm_loc(0, 0), "wasm: unsupported value type"); return b.id[CFREE_CG_BUILTIN_I32]; @@ -4056,7 +4553,9 @@ static CfreeCgMemAccess wasm_cg_mem_type(CfreeCgTypeId ty) { static void wasm_cg_push_zero(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b, WasmValType vt) { CfreeCgTypeId ty = wasm_cg_type(c, b, vt); - if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) + if (wasm_is_ref_type(vt)) + cfree_cg_push_null(cg, ty); + else if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) cfree_cg_push_float(cg, 0.0, ty); else cfree_cg_push_int(cg, 0, ty); @@ -4109,6 +4608,10 @@ static uint32_t wasm_mem_width(uint8_t kind) { case WASM_INSN_I64_LOAD8_U: case WASM_INSN_I32_STORE8: case WASM_INSN_I64_STORE8: + case WASM_INSN_I32_ATOMIC_LOAD8_U: + case WASM_INSN_I64_ATOMIC_LOAD8_U: + case WASM_INSN_I32_ATOMIC_STORE8: + case WASM_INSN_I64_ATOMIC_STORE8: return 1; case WASM_INSN_I32_LOAD16_S: case WASM_INSN_I32_LOAD16_U: @@ -4116,12 +4619,29 @@ static uint32_t wasm_mem_width(uint8_t kind) { case WASM_INSN_I64_LOAD16_U: case WASM_INSN_I32_STORE16: case WASM_INSN_I64_STORE16: + case WASM_INSN_I32_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD16_U: + case WASM_INSN_I32_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE16: return 2; case WASM_INSN_I32_LOAD: case WASM_INSN_I64_LOAD32_S: case WASM_INSN_I64_LOAD32_U: case WASM_INSN_I32_STORE: case WASM_INSN_I64_STORE32: + case WASM_INSN_I32_ATOMIC_LOAD: + case WASM_INSN_I64_ATOMIC_LOAD32_U: + case WASM_INSN_I32_ATOMIC_STORE: + case WASM_INSN_I64_ATOMIC_STORE32: + case WASM_INSN_I32_ATOMIC_RMW_ADD: + case WASM_INSN_I32_ATOMIC_RMW_SUB: + case WASM_INSN_I32_ATOMIC_RMW_AND: + case WASM_INSN_I32_ATOMIC_RMW_OR: + case WASM_INSN_I32_ATOMIC_RMW_XOR: + case WASM_INSN_I32_ATOMIC_RMW_XCHG: + case WASM_INSN_I32_ATOMIC_RMW_CMPXCHG: + case WASM_INSN_I32_ATOMIC_WAIT: + case WASM_INSN_MEMORY_ATOMIC_NOTIFY: return 4; default: return 8; @@ -4155,6 +4675,7 @@ typedef struct WasmCgRuntime { uint32_t memory_max_pages_field; uint32_t memory_flags_field; uint32_t func_import_field[64]; + uint32_t func_ref_entry_field[64]; uint32_t global_field[64]; uint32_t global_import_addr_field; uint32_t table_field[64]; @@ -4319,6 +4840,16 @@ static void wasm_cg_build_runtime(CfreeCompiler* c, CfreeCgBuiltinTypes b, instance_fields[nfields].type = rt->func_import_ty; nfields++; } + for (uint32_t i = 0; i < m->nfuncs; ++i) { + char name[40]; + if (nfields >= 256u) + wasm_error(c, wasm_loc(0, 0), "wasm: instance layout too large"); + wasm_indexed_name(name, sizeof name, "func_ref_", i); + rt->func_ref_entry_field[i] = nfields; + instance_fields[nfields].name = cfree_sym_intern(c, name); + instance_fields[nfields].type = rt->table_entry_ty; + nfields++; + } for (uint32_t i = 0; i < m->nglobals; ++i) { char name[32]; if (nfields >= 256u) @@ -4416,6 +4947,13 @@ static void wasm_cg_push_import_func_ptr(CfreeCg* cg, const WasmCgRuntime* rt, cfree_cg_load(cg, wasm_cg_mem_type(rt->void_ptr_ty)); } +static void wasm_cg_push_func_ref_lvalue(CfreeCg* cg, const WasmCgRuntime* rt, + CfreeCgLocal instance_local, + uint32_t func_index) { + wasm_cg_push_instance_lvalue(cg, rt, instance_local); + cfree_cg_field(cg, rt->func_ref_entry_field[func_index]); +} + static void wasm_cg_push_global_value_lvalue(CfreeCompiler* c, CfreeCg* cg, CfreeCgBuiltinTypes b, const WasmCgRuntime* rt, @@ -4835,6 +5373,10 @@ static WasmValType wasm_load_result_type(uint8_t kind) { case WASM_INSN_I64_LOAD16_U: case WASM_INSN_I64_LOAD32_S: case WASM_INSN_I64_LOAD32_U: + case WASM_INSN_I64_ATOMIC_LOAD: + case WASM_INSN_I64_ATOMIC_LOAD8_U: + case WASM_INSN_I64_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD32_U: return WASM_VAL_I64; default: return WASM_VAL_I32; @@ -4847,12 +5389,62 @@ static WasmValType wasm_store_value_type(uint8_t kind) { case WASM_INSN_I64_STORE8: case WASM_INSN_I64_STORE16: case WASM_INSN_I64_STORE32: + case WASM_INSN_I64_ATOMIC_STORE: + case WASM_INSN_I64_ATOMIC_STORE8: + case WASM_INSN_I64_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE32: + return WASM_VAL_I64; + default: + return WASM_VAL_I32; + } +} + +static WasmValType wasm_atomic_value_type(uint8_t kind) { + switch (kind) { + case WASM_INSN_I64_ATOMIC_LOAD: + case WASM_INSN_I64_ATOMIC_LOAD8_U: + case WASM_INSN_I64_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD32_U: + case WASM_INSN_I64_ATOMIC_STORE: + case WASM_INSN_I64_ATOMIC_STORE8: + case WASM_INSN_I64_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE32: + case WASM_INSN_I64_ATOMIC_RMW_ADD: + case WASM_INSN_I64_ATOMIC_RMW_SUB: + case WASM_INSN_I64_ATOMIC_RMW_AND: + case WASM_INSN_I64_ATOMIC_RMW_OR: + case WASM_INSN_I64_ATOMIC_RMW_XOR: + case WASM_INSN_I64_ATOMIC_RMW_XCHG: + case WASM_INSN_I64_ATOMIC_RMW_CMPXCHG: + case WASM_INSN_I64_ATOMIC_WAIT: return WASM_VAL_I64; default: return WASM_VAL_I32; } } +static CfreeCgAtomicOp wasm_atomic_rmw_op(uint8_t kind) { + switch (kind) { + case WASM_INSN_I32_ATOMIC_RMW_ADD: + case WASM_INSN_I64_ATOMIC_RMW_ADD: + return CFREE_CG_ATOMIC_ADD; + case WASM_INSN_I32_ATOMIC_RMW_SUB: + case WASM_INSN_I64_ATOMIC_RMW_SUB: + return CFREE_CG_ATOMIC_SUB; + case WASM_INSN_I32_ATOMIC_RMW_AND: + case WASM_INSN_I64_ATOMIC_RMW_AND: + return CFREE_CG_ATOMIC_AND; + case WASM_INSN_I32_ATOMIC_RMW_OR: + case WASM_INSN_I64_ATOMIC_RMW_OR: + return CFREE_CG_ATOMIC_OR; + case WASM_INSN_I32_ATOMIC_RMW_XOR: + case WASM_INSN_I64_ATOMIC_RMW_XOR: + return CFREE_CG_ATOMIC_XOR; + default: + return CFREE_CG_ATOMIC_XCHG; + } +} + static int wasm_int_unop_kind(uint8_t kind, WasmValType* vt) { if (kind == WASM_INSN_I32_CLZ || kind == WASM_INSN_I32_CTZ || kind == WASM_INSN_I32_POPCNT) { @@ -5025,6 +5617,29 @@ static int wasm_stack_pop(CfreeCompiler* c, WasmValStack* s, return 1; } +static WasmValType wasm_stack_pop_any(CfreeCompiler* c, WasmValStack* s, + WasmControlFrame* frames, + uint32_t nframes, const char* what) { + WasmControlFrame* top = &frames[nframes - 1u]; + WasmValType vt; + if (s->depth <= top->height) { + if (top->unreachable) return WASM_VAL_I32; + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + } + vt = s->vals[s->depth - 1u]; + if (!vt) wasm_error(c, wasm_loc(0, 0), "wasm: %s type mismatch", what); + s->depth--; + return vt; +} + +static void wasm_stack_pop_ref(CfreeCompiler* c, WasmValStack* s, + WasmControlFrame* frames, uint32_t nframes, + const char* what) { + WasmValType vt = wasm_stack_pop_any(c, s, frames, nframes, what); + if (!wasm_is_ref_type(vt)) + wasm_error(c, wasm_loc(0, 0), "wasm: %s type mismatch", what); +} + static void wasm_mark_unreachable(WasmValStack* s, WasmControlFrame* frames, uint32_t nframes) { WasmControlFrame* top = &frames[nframes - 1u]; @@ -5036,10 +5651,10 @@ static void wasm_validate(WasmModule* m, CfreeCompiler* c) { uint32_t i, j; for (i = 0; i < m->ntypes; ++i) { for (j = 0; j < m->types[i].nparams; ++j) - if (!wasm_is_num_type(m->types[i].params[j])) + if (!wasm_is_frontend_value_type(m->types[i].params[j])) wasm_error(c, wasm_loc(0, 0), "wasm: unsupported parameter type"); for (j = 0; j < m->types[i].nresults; ++j) - if (!wasm_is_num_type(m->types[i].results[j])) + if (!wasm_is_frontend_value_type(m->types[i].results[j])) wasm_error(c, wasm_loc(0, 0), "wasm: unsupported result type"); } for (i = 0; i < m->nmemories; ++i) { @@ -5147,6 +5762,8 @@ static void wasm_validate(WasmModule* m, CfreeCompiler* c) { if (in->imm < 0 || (uint64_t)in->imm >= m->nfuncs) wasm_error(c, wasm_loc(0, 0), "wasm: call index out of range"); if (in->kind == WASM_INSN_RETURN_CALL) { + wasm_require_feature(c, m, WASM_FEATURE_TAIL_CALLS, "tail calls", + "return_call"); if (m->funcs[in->imm].nresults != f->nresults || (f->nresults && m->funcs[in->imm].results[0] != f->results[0])) @@ -5185,6 +5802,8 @@ static void wasm_validate(WasmModule* m, CfreeCompiler* c) { } case WASM_INSN_RETURN_CALL_INDIRECT: { WasmFuncType* t; + wasm_require_feature(c, m, WASM_FEATURE_TAIL_CALLS, "tail calls", + "return_call_indirect"); if (in->imm < 0 || (uint64_t)in->imm >= m->ntypes) wasm_error(c, wasm_loc(0, 0), "wasm: return_call_indirect type index out of range"); @@ -5206,6 +5825,59 @@ static void wasm_validate(WasmModule* m, CfreeCompiler* c) { wasm_mark_unreachable(&stack, control, ncontrol); break; } + case WASM_INSN_REF_NULL: + wasm_require_feature(c, m, WASM_FEATURE_TYPED_FUNC_REFS, + "typed function references", "ref.null"); + if (!wasm_is_ref_type((WasmValType)in->imm)) + wasm_error(c, wasm_loc(0, 0), "wasm: bad ref.null type"); + if ((WasmValType)in->imm != WASM_VAL_FUNCREF) + wasm_error(c, wasm_loc(0, 0), + "wasm: unsupported reference type"); + wasm_stack_push(c, &stack, (WasmValType)in->imm); + break; + case WASM_INSN_REF_FUNC: + wasm_require_feature(c, m, WASM_FEATURE_TYPED_FUNC_REFS, + "typed function references", "ref.func"); + if (in->imm < 0 || (uint64_t)in->imm >= m->nfuncs) + wasm_error(c, wasm_loc(0, 0), + "wasm: ref.func index out of range"); + wasm_stack_push(c, &stack, WASM_VAL_FUNCREF); + break; + case WASM_INSN_REF_IS_NULL: + wasm_require_feature(c, m, WASM_FEATURE_TYPED_FUNC_REFS, + "typed function references", "ref.is_null"); + wasm_stack_pop_ref(c, &stack, control, ncontrol, "ref.is_null"); + wasm_stack_push(c, &stack, WASM_VAL_I32); + break; + case WASM_INSN_CALL_REF: + case WASM_INSN_RETURN_CALL_REF: { + WasmFuncType* t; + wasm_require_feature(c, m, WASM_FEATURE_TYPED_FUNC_REFS, + "typed function references", "call_ref"); + if (in->kind == WASM_INSN_RETURN_CALL_REF) + wasm_require_feature(c, m, WASM_FEATURE_TAIL_CALLS, "tail calls", + "return_call_ref"); + if (in->imm < 0 || (uint64_t)in->imm >= m->ntypes) + wasm_error(c, wasm_loc(0, 0), + "wasm: call_ref type index out of range"); + t = &m->types[in->imm]; + if (in->kind == WASM_INSN_RETURN_CALL_REF && + (t->nresults != f->nresults || + (f->nresults && t->results[0] != f->results[0]))) + wasm_error(c, wasm_loc(0, 0), + "wasm: return_call_ref result type mismatch"); + wasm_stack_pop_ref(c, &stack, control, ncontrol, "call_ref callee"); + for (uint32_t k = 0; k < t->nparams; ++k) { + uint32_t param = t->nparams - 1u - k; + wasm_stack_pop(c, &stack, control, ncontrol, t->params[param], + "call_ref argument"); + } + if (in->kind == WASM_INSN_RETURN_CALL_REF) + wasm_mark_unreachable(&stack, control, ncontrol); + else if (t->nresults) + wasm_stack_push(c, &stack, t->results[0]); + break; + } case WASM_INSN_GLOBAL_GET: if (in->imm < 0 || (uint64_t)in->imm >= m->nglobals) wasm_error(c, wasm_loc(0, 0), "wasm: global index out of range"); @@ -5332,6 +6004,138 @@ static void wasm_validate(WasmModule* m, CfreeCompiler* c) { m->memories[in->memidx].is64 ? WASM_VAL_I64 : WASM_VAL_I32); break; + case WASM_INSN_ATOMIC_FENCE: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic.fence"); + break; + case WASM_INSN_I32_ATOMIC_LOAD: + case WASM_INSN_I64_ATOMIC_LOAD: + case WASM_INSN_I32_ATOMIC_LOAD8_U: + case WASM_INSN_I32_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD8_U: + case WASM_INSN_I64_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD32_U: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic load"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), "wasm: atomic load without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic load requires shared memory"); + if (in->align && in->align > wasm_mem_width(in->kind)) + wasm_error(c, wasm_loc(0, 0), "wasm: bad atomic alignment"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic load"); + wasm_stack_push(c, &stack, wasm_atomic_value_type(in->kind)); + break; + case WASM_INSN_I32_ATOMIC_STORE: + case WASM_INSN_I64_ATOMIC_STORE: + case WASM_INSN_I32_ATOMIC_STORE8: + case WASM_INSN_I32_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE8: + case WASM_INSN_I64_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE32: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic store"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), "wasm: atomic store without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic store requires shared memory"); + if (in->align && in->align > wasm_mem_width(in->kind)) + wasm_error(c, wasm_loc(0, 0), "wasm: bad atomic alignment"); + wasm_stack_pop(c, &stack, control, ncontrol, + wasm_atomic_value_type(in->kind), "atomic store"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic store"); + break; + case WASM_INSN_I32_ATOMIC_RMW_ADD: + case WASM_INSN_I64_ATOMIC_RMW_ADD: + case WASM_INSN_I32_ATOMIC_RMW_SUB: + case WASM_INSN_I64_ATOMIC_RMW_SUB: + case WASM_INSN_I32_ATOMIC_RMW_AND: + case WASM_INSN_I64_ATOMIC_RMW_AND: + case WASM_INSN_I32_ATOMIC_RMW_OR: + case WASM_INSN_I64_ATOMIC_RMW_OR: + case WASM_INSN_I32_ATOMIC_RMW_XOR: + case WASM_INSN_I64_ATOMIC_RMW_XOR: + case WASM_INSN_I32_ATOMIC_RMW_XCHG: + case WASM_INSN_I64_ATOMIC_RMW_XCHG: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic rmw"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), "wasm: atomic rmw without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic rmw requires shared memory"); + wasm_stack_pop(c, &stack, control, ncontrol, + wasm_atomic_value_type(in->kind), "atomic rmw"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic rmw"); + wasm_stack_push(c, &stack, wasm_atomic_value_type(in->kind)); + break; + case WASM_INSN_I32_ATOMIC_RMW_CMPXCHG: + case WASM_INSN_I64_ATOMIC_RMW_CMPXCHG: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic cmpxchg"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic cmpxchg without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic cmpxchg requires shared memory"); + wasm_stack_pop(c, &stack, control, ncontrol, + wasm_atomic_value_type(in->kind), "atomic cmpxchg"); + wasm_stack_pop(c, &stack, control, ncontrol, + wasm_atomic_value_type(in->kind), "atomic cmpxchg"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic cmpxchg"); + wasm_stack_push(c, &stack, wasm_atomic_value_type(in->kind)); + break; + case WASM_INSN_I32_ATOMIC_WAIT: + case WASM_INSN_I64_ATOMIC_WAIT: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic wait"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), "wasm: atomic wait without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic wait requires shared memory"); + wasm_stack_pop(c, &stack, control, ncontrol, WASM_VAL_I64, + "atomic wait timeout"); + wasm_stack_pop(c, &stack, control, ncontrol, + wasm_atomic_value_type(in->kind), + "atomic wait expected"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic wait address"); + wasm_stack_push(c, &stack, WASM_VAL_I32); + break; + case WASM_INSN_MEMORY_ATOMIC_NOTIFY: + wasm_require_feature(c, m, WASM_FEATURE_THREADS, "threads", + "atomic notify"); + if (in->memidx >= m->nmemories) + wasm_error(c, wasm_loc(0, 0), "wasm: atomic notify without memory"); + if (!m->memories[in->memidx].shared) + wasm_error(c, wasm_loc(0, 0), + "wasm: atomic notify requires shared memory"); + wasm_stack_pop(c, &stack, control, ncontrol, WASM_VAL_I32, + "atomic notify count"); + wasm_stack_pop(c, &stack, control, ncontrol, + m->memories[in->memidx].is64 ? WASM_VAL_I64 + : WASM_VAL_I32, + "atomic notify address"); + wasm_stack_push(c, &stack, WASM_VAL_I32); + break; case WASM_INSN_I32_LOAD: case WASM_INSN_I64_LOAD: case WASM_INSN_I32_LOAD8_S: @@ -5476,16 +6280,14 @@ static void wasm_cg_call_func(CfreeCompiler* c, CfreeCg* cg, if (f->is_import) cfree_cg_call(cg, f->nparams + 1u, func_type, (CfreeCgCallAttrs){ - .tail = CFREE_CG_TAIL_DEFAULT}); + .tail = must_tail ? CFREE_CG_TAIL_MUST + : CFREE_CG_TAIL_DEFAULT}); else cfree_cg_call_symbol(cg, sym, f->nparams + 1u, (CfreeCgCallAttrs){ - .tail = CFREE_CG_TAIL_DEFAULT}); + .tail = must_tail ? CFREE_CG_TAIL_MUST + : CFREE_CG_TAIL_DEFAULT}); if (must_tail) { - if (f->nresults) - cfree_cg_ret(cg); - else - cfree_cg_ret_void(cg); if (f->nresults) wasm_cg_push_zero(c, cg, b, f->results[0]); cfree_cg_unreachable(cg); } @@ -5640,6 +6442,21 @@ static void wasm_emit_cg(CfreeCompiler* c, const CfreeCompileOptions* opts, cfree_cg_memcpy(cg, m->memories[i].data_init_len, byte_mem, byte_mem); } } + for (i = 0; i < m->nfuncs; ++i) { + wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i); + cfree_cg_field(cg, rt.table_entry_fn_field); + if (m->funcs[i].is_import) { + wasm_cg_push_import_func_ptr(cg, &rt, instance_local, i); + } else { + cfree_cg_push_symbol_addr(cg, syms[i], 0); + cfree_cg_bitcast(cg, rt.void_ptr_ty); + } + cfree_cg_store(cg, wasm_cg_mem_type(rt.void_ptr_ty)); + wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, i); + cfree_cg_field(cg, rt.table_entry_typeidx_field); + cfree_cg_push_int(cg, m->funcs[i].typeidx, b.id[CFREE_CG_BUILTIN_I32]); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + } for (i = 0; i < m->ntables; ++i) { const WasmTable* t = &m->tables[i]; uint32_t max = t->has_max ? t->max : t->min; @@ -5931,7 +6748,8 @@ static void wasm_emit_cg(CfreeCompiler* c, const CfreeCompileOptions* opts, (uint32_t)in.imm, in.kind == WASM_INSN_RETURN_CALL); break; - case WASM_INSN_CALL_INDIRECT: { + case WASM_INSN_CALL_INDIRECT: + case WASM_INSN_RETURN_CALL_INDIRECT: { const WasmFuncType* t = &m->types[in.imm]; CfreeCgLocalAttrs attrs; CfreeCgLocal selector, callee, args[16], result = CFREE_CG_LOCAL_NONE; @@ -6019,12 +6837,124 @@ static void wasm_emit_cg(CfreeCompiler* c, const CfreeCompileOptions* opts, cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p])); } cfree_cg_call(cg, t->nparams + 1u, indirect_func_type, - (CfreeCgCallAttrs){.tail = CFREE_CG_TAIL_DEFAULT}); + (CfreeCgCallAttrs){ + .tail = in.kind == WASM_INSN_RETURN_CALL_INDIRECT + ? CFREE_CG_TAIL_MUST + : CFREE_CG_TAIL_DEFAULT}); if (in.kind == WASM_INSN_RETURN_CALL_INDIRECT) { - if (t->nresults) - cfree_cg_ret(cg); - else - cfree_cg_ret_void(cg); + if (t->nresults) wasm_cg_push_zero(c, cg, b, t->results[0]); + cfree_cg_unreachable(cg); + } else if (t->nresults) { + cfree_cg_push_local(cg, result); + cfree_cg_swap(cg); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->results[0])); + cfree_cg_push_local(cg, result); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->results[0])); + } + break; + } + case WASM_INSN_REF_NULL: + cfree_cg_push_null(cg, rt.void_ptr_ty); + break; + case WASM_INSN_REF_FUNC: + wasm_cg_push_func_ref_lvalue(cg, &rt, instance_local, + (uint32_t)in.imm); + cfree_cg_addr(cg); + cfree_cg_bitcast(cg, rt.void_ptr_ty); + break; + case WASM_INSN_REF_IS_NULL: + cfree_cg_push_null(cg, rt.void_ptr_ty); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + break; + case WASM_INSN_CALL_REF: + case WASM_INSN_RETURN_CALL_REF: { + const WasmFuncType* t = &m->types[in.imm]; + CfreeCgLocalAttrs attrs; + CfreeCgLocal callee_ref, callee, args[16], + result = CFREE_CG_LOCAL_NONE; + CfreeCgLabel ok; + CfreeCgMemAccess ref_mem = wasm_cg_mem_type(rt.void_ptr_ty); + CfreeCgMemAccess i32_mem = wasm_cg_mem(c, b, WASM_VAL_I32); + CfreeCgFuncParam ref_params[17]; + CfreeCgFuncSig ref_sig; + CfreeCgTypeId ref_func_type; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + if (t->nparams > 16u) + wasm_error(c, wasm_loc(0, 0), "wasm: too many call_ref args"); + callee_ref = cfree_cg_local(cg, rt.void_ptr_ty, attrs); + callee = cfree_cg_local(cg, rt.void_ptr_ty, attrs); + cfree_cg_push_local(cg, callee_ref); + cfree_cg_swap(cg); + cfree_cg_store(cg, ref_mem); + for (uint32_t p = 0; p < t->nparams; ++p) { + uint32_t param = t->nparams - 1u - p; + args[param] = + cfree_cg_local(cg, wasm_cg_type(c, b, t->params[param]), attrs); + cfree_cg_push_local(cg, args[param]); + cfree_cg_swap(cg); + cfree_cg_store(cg, wasm_cg_mem(c, b, t->params[param])); + } + ok = cfree_cg_label_new(cg); + cfree_cg_push_local(cg, callee_ref); + cfree_cg_load(cg, ref_mem); + cfree_cg_push_null(cg, rt.void_ptr_ty); + cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); + cfree_cg_branch_true(cg, ok); + wasm_cg_trap_table(cg, &rt); + cfree_cg_label_place(cg, ok); + + ok = cfree_cg_label_new(cg); + cfree_cg_push_local(cg, callee_ref); + cfree_cg_load(cg, ref_mem); + cfree_cg_bitcast(cg, rt.table_entry_ptr_ty); + cfree_cg_indirect(cg); + cfree_cg_field(cg, rt.table_entry_typeidx_field); + cfree_cg_load(cg, i32_mem); + cfree_cg_push_int(cg, (uint32_t)in.imm, b.id[CFREE_CG_BUILTIN_I32]); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + cfree_cg_branch_true(cg, ok); + wasm_cg_trap_signature(cg, &rt); + cfree_cg_label_place(cg, ok); + + cfree_cg_push_local(cg, callee); + cfree_cg_push_local(cg, callee_ref); + cfree_cg_load(cg, ref_mem); + cfree_cg_bitcast(cg, rt.table_entry_ptr_ty); + cfree_cg_indirect(cg); + cfree_cg_field(cg, rt.table_entry_fn_field); + cfree_cg_load(cg, ref_mem); + cfree_cg_store(cg, ref_mem); + + memset(ref_params, 0, sizeof ref_params); + ref_params[0].type = rt.instance_ptr_ty; + for (uint32_t p = 0; p < t->nparams; ++p) + ref_params[p + 1u].type = wasm_cg_type(c, b, t->params[p]); + memset(&ref_sig, 0, sizeof ref_sig); + ref_sig.ret = t->nresults ? wasm_cg_type(c, b, t->results[0]) + : b.id[CFREE_CG_BUILTIN_VOID]; + ref_sig.params = ref_params; + ref_sig.nparams = t->nparams + 1u; + ref_sig.call_conv = CFREE_CG_CC_TARGET_C; + ref_func_type = cfree_cg_type_func(c, ref_sig); + if (t->nresults) + result = + cfree_cg_local(cg, wasm_cg_type(c, b, t->results[0]), attrs); + cfree_cg_push_local(cg, callee); + cfree_cg_load(cg, ref_mem); + cfree_cg_bitcast(cg, cfree_cg_type_ptr(c, ref_func_type, 0)); + cfree_cg_push_local(cg, instance_local); + cfree_cg_load(cg, wasm_cg_mem_type(rt.instance_ptr_ty)); + for (uint32_t p = 0; p < t->nparams; ++p) { + cfree_cg_push_local(cg, args[p]); + cfree_cg_load(cg, wasm_cg_mem(c, b, t->params[p])); + } + cfree_cg_call(cg, t->nparams + 1u, ref_func_type, + (CfreeCgCallAttrs){ + .tail = in.kind == WASM_INSN_RETURN_CALL_REF + ? CFREE_CG_TAIL_MUST + : CFREE_CG_TAIL_DEFAULT}); + if (in.kind == WASM_INSN_RETURN_CALL_REF) { if (t->nresults) wasm_cg_push_zero(c, cg, b, t->results[0]); cfree_cg_unreachable(cg); } else if (t->nresults) { @@ -6127,6 +7057,178 @@ static void wasm_emit_cg(CfreeCompiler* c, const CfreeCompileOptions* opts, cfree_cg_load(cg, wasm_cg_mem(c, b, page_vt)); break; } + case WASM_INSN_ATOMIC_FENCE: + cfree_cg_atomic_fence(cg, CFREE_CG_MO_SEQ_CST); + break; + case WASM_INSN_I32_ATOMIC_LOAD: + case WASM_INSN_I64_ATOMIC_LOAD: + case WASM_INSN_I32_ATOMIC_LOAD8_U: + case WASM_INSN_I32_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD8_U: + case WASM_INSN_I64_ATOMIC_LOAD16_U: + case WASM_INSN_I64_ATOMIC_LOAD32_U: { + CfreeCgTypeId ty = wasm_cg_type(c, b, wasm_atomic_value_type(in.kind)); + CfreeCgMemAccess mem = wasm_cg_mem_type(ty); + mem.align = in.align; + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, + in.offset64); + cfree_cg_addr(cg); + cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST); + break; + } + case WASM_INSN_I32_ATOMIC_STORE: + case WASM_INSN_I64_ATOMIC_STORE: + case WASM_INSN_I32_ATOMIC_STORE8: + case WASM_INSN_I32_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE8: + case WASM_INSN_I64_ATOMIC_STORE16: + case WASM_INSN_I64_ATOMIC_STORE32: { + WasmValType vt = wasm_atomic_value_type(in.kind); + CfreeCgTypeId ty = wasm_cg_type(c, b, vt); + CfreeCgMemAccess mem = wasm_cg_mem_type(ty); + CfreeCgLocalAttrs attrs; + CfreeCgLocal value_tmp; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + mem.align = in.align; + value_tmp = cfree_cg_local(cg, ty, attrs); + cfree_cg_push_local(cg, value_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, mem); + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, + in.offset64); + cfree_cg_addr(cg); + cfree_cg_push_local(cg, value_tmp); + cfree_cg_load(cg, mem); + cfree_cg_atomic_store(cg, mem, CFREE_CG_MO_SEQ_CST); + break; + } + case WASM_INSN_I32_ATOMIC_RMW_ADD: + case WASM_INSN_I64_ATOMIC_RMW_ADD: + case WASM_INSN_I32_ATOMIC_RMW_SUB: + case WASM_INSN_I64_ATOMIC_RMW_SUB: + case WASM_INSN_I32_ATOMIC_RMW_AND: + case WASM_INSN_I64_ATOMIC_RMW_AND: + case WASM_INSN_I32_ATOMIC_RMW_OR: + case WASM_INSN_I64_ATOMIC_RMW_OR: + case WASM_INSN_I32_ATOMIC_RMW_XOR: + case WASM_INSN_I64_ATOMIC_RMW_XOR: + case WASM_INSN_I32_ATOMIC_RMW_XCHG: + case WASM_INSN_I64_ATOMIC_RMW_XCHG: { + WasmValType vt = wasm_atomic_value_type(in.kind); + CfreeCgTypeId ty = wasm_cg_type(c, b, vt); + CfreeCgMemAccess mem = wasm_cg_mem_type(ty); + CfreeCgLocalAttrs attrs; + CfreeCgLocal value_tmp; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + mem.align = in.align; + value_tmp = cfree_cg_local(cg, ty, attrs); + cfree_cg_push_local(cg, value_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, mem); + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, + in.offset64); + cfree_cg_addr(cg); + cfree_cg_push_local(cg, value_tmp); + cfree_cg_load(cg, mem); + cfree_cg_atomic_rmw(cg, mem, wasm_atomic_rmw_op(in.kind), + CFREE_CG_MO_SEQ_CST); + break; + } + case WASM_INSN_I32_ATOMIC_RMW_CMPXCHG: + case WASM_INSN_I64_ATOMIC_RMW_CMPXCHG: { + WasmValType vt = wasm_atomic_value_type(in.kind); + CfreeCgTypeId ty = wasm_cg_type(c, b, vt); + CfreeCgMemAccess mem = wasm_cg_mem_type(ty); + CfreeCgLocalAttrs attrs; + CfreeCgLocal expected_tmp, desired_tmp; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + mem.align = in.align; + desired_tmp = cfree_cg_local(cg, ty, attrs); + expected_tmp = cfree_cg_local(cg, ty, attrs); + cfree_cg_push_local(cg, desired_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, mem); + cfree_cg_push_local(cg, expected_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, mem); + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, + in.offset64); + cfree_cg_addr(cg); + cfree_cg_push_local(cg, expected_tmp); + cfree_cg_load(cg, mem); + cfree_cg_push_local(cg, desired_tmp); + cfree_cg_load(cg, mem); + cfree_cg_atomic_cmpxchg(cg, mem, CFREE_CG_MO_SEQ_CST, + CFREE_CG_MO_SEQ_CST, 0); + cfree_cg_drop(cg); + break; + } + case WASM_INSN_I32_ATOMIC_WAIT: + case WASM_INSN_I64_ATOMIC_WAIT: { + WasmValType vt = wasm_atomic_value_type(in.kind); + CfreeCgTypeId ty = wasm_cg_type(c, b, vt); + CfreeCgMemAccess mem = wasm_cg_mem_type(ty); + CfreeCgLocalAttrs attrs; + CfreeCgLocal timeout_tmp, expected_tmp, result_tmp; + CfreeCgLabel equal = cfree_cg_label_new(cg); + CfreeCgLabel done = cfree_cg_label_new(cg); + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + mem.align = in.align; + timeout_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I64], attrs); + expected_tmp = cfree_cg_local(cg, ty, attrs); + result_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs); + cfree_cg_push_local(cg, timeout_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I64)); + cfree_cg_push_local(cg, expected_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, mem); + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + wasm_cg_memory_lvalue(cg, &rt, instance_local, in.memidx, + in.offset64); + cfree_cg_addr(cg); + cfree_cg_atomic_load(cg, mem, CFREE_CG_MO_SEQ_CST); + cfree_cg_push_local(cg, expected_tmp); + cfree_cg_load(cg, mem); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + cfree_cg_branch_true(cg, equal); + cfree_cg_push_local(cg, result_tmp); + cfree_cg_push_int(cg, 1, b.id[CFREE_CG_BUILTIN_I32]); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_jump(cg, done); + cfree_cg_label_place(cg, equal); + (void)timeout_tmp; + cfree_cg_push_local(cg, result_tmp); + cfree_cg_push_int(cg, 2, b.id[CFREE_CG_BUILTIN_I32]); + cfree_cg_store(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + cfree_cg_label_place(cg, done); + cfree_cg_push_local(cg, result_tmp); + cfree_cg_load(cg, wasm_cg_mem(c, b, WASM_VAL_I32)); + break; + } + case WASM_INSN_MEMORY_ATOMIC_NOTIFY: { + CfreeCgMemAccess i32_mem = wasm_cg_mem(c, b, WASM_VAL_I32); + CfreeCgLocalAttrs attrs; + CfreeCgLocal count_tmp; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + count_tmp = cfree_cg_local(cg, b.id[CFREE_CG_BUILTIN_I32], attrs); + cfree_cg_push_local(cg, count_tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, i32_mem); + wasm_cg_memory_check(c, cg, b, m, &rt, instance_local, &in); + cfree_cg_drop(cg); + cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]); + break; + } case WASM_INSN_I32_LOAD: case WASM_INSN_I64_LOAD: case WASM_INSN_I32_LOAD8_S: @@ -6676,6 +7778,16 @@ static uint8_t wasm_opcode(uint8_t kind) { return 0x12; case WASM_INSN_RETURN_CALL_INDIRECT: return 0x13; + case WASM_INSN_CALL_REF: + return 0x14; + case WASM_INSN_RETURN_CALL_REF: + return 0x15; + case WASM_INSN_REF_NULL: + return 0xd0; + case WASM_INSN_REF_IS_NULL: + return 0xd1; + case WASM_INSN_REF_FUNC: + return 0xd2; case WASM_INSN_GLOBAL_GET: return 0x23; case WASM_INSN_GLOBAL_SET: @@ -6938,6 +8050,77 @@ static uint8_t wasm_opcode(uint8_t kind) { return 0; } +static uint32_t wasm_threads_subopcode(uint8_t kind) { + switch (kind) { + case WASM_INSN_MEMORY_ATOMIC_NOTIFY: + return 0x00; + case WASM_INSN_I32_ATOMIC_WAIT: + return 0x01; + case WASM_INSN_I64_ATOMIC_WAIT: + return 0x02; + case WASM_INSN_ATOMIC_FENCE: + return 0x03; + case WASM_INSN_I32_ATOMIC_LOAD: + return 0x10; + case WASM_INSN_I64_ATOMIC_LOAD: + return 0x11; + case WASM_INSN_I32_ATOMIC_LOAD8_U: + return 0x12; + case WASM_INSN_I32_ATOMIC_LOAD16_U: + return 0x13; + case WASM_INSN_I64_ATOMIC_LOAD8_U: + return 0x14; + case WASM_INSN_I64_ATOMIC_LOAD16_U: + return 0x15; + case WASM_INSN_I64_ATOMIC_LOAD32_U: + return 0x16; + case WASM_INSN_I32_ATOMIC_STORE: + return 0x17; + case WASM_INSN_I64_ATOMIC_STORE: + return 0x18; + case WASM_INSN_I32_ATOMIC_STORE8: + return 0x19; + case WASM_INSN_I32_ATOMIC_STORE16: + return 0x1a; + case WASM_INSN_I64_ATOMIC_STORE8: + return 0x1b; + case WASM_INSN_I64_ATOMIC_STORE16: + return 0x1c; + case WASM_INSN_I64_ATOMIC_STORE32: + return 0x1d; + case WASM_INSN_I32_ATOMIC_RMW_ADD: + return 0x1e; + case WASM_INSN_I64_ATOMIC_RMW_ADD: + return 0x1f; + case WASM_INSN_I32_ATOMIC_RMW_SUB: + return 0x25; + case WASM_INSN_I64_ATOMIC_RMW_SUB: + return 0x26; + case WASM_INSN_I32_ATOMIC_RMW_AND: + return 0x2c; + case WASM_INSN_I64_ATOMIC_RMW_AND: + return 0x2d; + case WASM_INSN_I32_ATOMIC_RMW_OR: + return 0x33; + case WASM_INSN_I64_ATOMIC_RMW_OR: + return 0x34; + case WASM_INSN_I32_ATOMIC_RMW_XOR: + return 0x3a; + case WASM_INSN_I64_ATOMIC_RMW_XOR: + return 0x3b; + case WASM_INSN_I32_ATOMIC_RMW_XCHG: + return 0x41; + case WASM_INSN_I64_ATOMIC_RMW_XCHG: + return 0x42; + case WASM_INSN_I32_ATOMIC_RMW_CMPXCHG: + return 0x48; + case WASM_INSN_I64_ATOMIC_RMW_CMPXCHG: + return 0x49; + default: + return UINT32_MAX; + } +} + static void enc_code(CfreeWriter* w, const WasmModule* m) { uint32_t i, j; uint32_t n = 0; @@ -6977,8 +8160,16 @@ static void enc_code(CfreeWriter* w, const WasmModule* m) { for (j = 0; j < m->funcs[i].ninsns; ++j) { WasmInsn in = m->funcs[i].insns[j]; uint8_t op = wasm_opcode(in.kind); - write_byte(body, op); - if (in.kind == WASM_INSN_BLOCK || in.kind == WASM_INSN_LOOP || + uint32_t threads_op = wasm_threads_subopcode(in.kind); + if (threads_op != UINT32_MAX) { + write_byte(body, 0xfe); + write_uleb(body, threads_op); + } else { + write_byte(body, op); + } + if (in.kind == WASM_INSN_ATOMIC_FENCE) { + write_byte(body, 0); + } else if (in.kind == WASM_INSN_BLOCK || in.kind == WASM_INSN_LOOP || in.kind == WASM_INSN_IF) write_byte(body, 0x40); else if (in.kind == WASM_INSN_I32_CONST || in.kind == WASM_INSN_I64_CONST) @@ -6993,6 +8184,10 @@ static void enc_code(CfreeWriter* w, const WasmModule* m) { in.kind == WASM_INSN_GLOBAL_GET || in.kind == WASM_INSN_GLOBAL_SET || in.kind == WASM_INSN_CALL || in.kind == WASM_INSN_RETURN_CALL || + in.kind == WASM_INSN_CALL_REF || + in.kind == WASM_INSN_RETURN_CALL_REF || + in.kind == WASM_INSN_REF_NULL || + in.kind == WASM_INSN_REF_FUNC || in.kind == WASM_INSN_BR || in.kind == WASM_INSN_BR_IF) write_uleb(body, (uint64_t)in.imm); else if (in.kind == WASM_INSN_CALL_INDIRECT || diff --git a/src/api/cg.c b/src/api/cg.c @@ -5314,6 +5314,8 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); } + if (tail) + api_regalloc_finish(g); T->call(T, &desc); for (u32 i = 0; i < nargs; ++i) { @@ -5389,6 +5391,7 @@ static void api_cg_tail_call(CfreeCg *g, uint32_t nargs, desc.ret.type = cg_type_func_ret_id(g->c, fty); desc.ret.abi = &abi->ret; desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); + api_regalloc_finish(g); T->call(T, &desc); for (u32 i = 0; i < nargs; ++i) { api_release_arg_storage(g, &avs[i].storage); @@ -5484,6 +5487,8 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, } else { desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); } + if (tail) + api_regalloc_finish(g); T->call(T, &desc); for (u32 i = 0; i < nargs; ++i) { api_release_arg_storage(g, &avs[i].storage); diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -975,6 +975,77 @@ static u32 aa_call_stack_size(CGTarget* t, const CGCallDesc* d) { return (stack_off + 15u) & ~15u; } +static u32 aa_collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) out[n++] = r; + } + return n; +} + +static void aa_tail_restore_frame(CGTarget* t) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 int_regs[10]; + u32 fp_regs[8]; + u32 n_int_saves = + aa_collect_mask_regs(a->used_cs_int_mask, 19u, 28u, int_regs); + u32 n_fp_saves = + aa_collect_mask_regs(a->used_cs_fp_mask, 8u, 15u, fp_regs); + u32 int_save_off = a->max_outgoing; + u32 fp_save_off = int_save_off + n_int_saves * 8u; + u32 locals_off = fp_save_off + n_fp_saves * 8u; + u32 fp_lr_off = locals_off + a->cum_off; + u32 frame_size = (fp_lr_off + 16u + 15u) & ~15u; + fp_lr_off = frame_size - 16u; + + if (a->omit_frame) return; + if (a->has_alloca) { + if (fp_lr_off <= 0xfff) { + aa64_emit32(mc, aa64_sub_imm(1, 31, 29, fp_lr_off, 0)); + } else { + compiler_panic(t->c, a->loc, + "aarch64 tail call: fp/lr offset too large"); + } + } + for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { + aa64_emit32(mc, + aa64_ldr_fp_uimm(3, fp_regs[i], 31, fp_save_off + (u32)i * 8u)); + } + for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { + aa64_emit32(mc, + aa64_ldr_uimm(3, int_regs[i], 31, int_save_off + (u32)i * 8u)); + } + if (fp_lr_off <= 504u) { + aa64_emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off)); + } else { + aa64_emit32(mc, aa64_ldr_uimm(3, 29, 31, fp_lr_off)); + aa64_emit32(mc, aa64_ldr_uimm(3, 30, 31, fp_lr_off + 8u)); + } + emit_sp_add(mc, frame_size); +} + +static void aa_tail_branch(CGTarget* t, Operand callee) { + AAImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (callee.kind == OPK_REG) { + if (reg_num(callee) != AA_TMP0) + aa64_emit32(mc, aa64_mov_reg(1, AA_TMP0, reg_num(callee))); + aa_tail_restore_frame(t); + aa64_emit32(mc, aa64_br(AA_TMP0)); + } else if (callee.kind == OPK_GLOBAL) { + aa_tail_restore_frame(t); + u32 b_pos = mc->pos(mc); + aa64_emit32(mc, aa64_b_base()); + mc->emit_reloc_at(mc, mc->section_id, b_pos, R_AARCH64_JUMP26, + callee.v.global.sym, callee.v.global.addend, 0, 0); + } else { + compiler_panic(t->c, a->loc, + "aarch64 tail call: callee kind %d unsupported", + (int)callee.kind); + } +} + static void aa_call(CGTarget* t, const CGCallDesc* d) { AAImpl* a = impl_of(t); MCEmitter* mc = t->mc; @@ -1004,6 +1075,16 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { a->max_outgoing = needed; } + if (d->flags & CG_CALL_TAIL) { + if (d->abi && d->abi->has_sret) + compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported"); + if (needed) + compiler_panic(t->c, a->loc, + "aarch64 tail call: stack arguments unsupported"); + aa_tail_branch(t, d->callee); + return; + } + if (d->callee.kind == OPK_GLOBAL) { u32 bl_pos = mc->pos(mc); aa64_emit32(mc, aa64_bl_base()); @@ -1082,6 +1163,16 @@ static void aa_emit_call_plan(CGTarget* t, const CGCallPlan* p) { AAImpl* a = impl_of(t); MCEmitter* mc = t->mc; + if (p->flags & CG_CALL_TAIL) { + if (p->has_sret) + compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported"); + if (p->stack_arg_size) + compiler_panic(t->c, a->loc, + "aarch64 tail call: stack arguments unsupported"); + aa_tail_branch(t, p->callee); + return; + } + if (p->callee.kind == OPK_GLOBAL) { u32 bl_pos = mc->pos(mc); aa64_emit32(mc, aa64_bl_base()); diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -984,6 +984,90 @@ static u32 rv_call_stack_size(CGTarget* t, const CGCallDesc* d) { return (stack_off + 15u) & ~15u; } +typedef struct RvTailFrameLayout { + u32 max_out; + u32 fp_saves_sz; + u32 fp_pair_off; + u32 frame_size; + i32 fp_save_base; + i32 int_save_base; +} RvTailFrameLayout; + +static u32 rv_tail_collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) out[n++] = r; + } + return n; +} + +static void rv_tail_compute_frame(const RImpl* a, u32 n_int_saves, + u32 n_fp_saves, RvTailFrameLayout* fl) { + fl->max_out = (a->max_outgoing + 15u) & ~15u; + u32 int_saves_sz = n_int_saves * 8u; + fl->fp_saves_sz = n_fp_saves * 8u; + u32 va_save_sz = a->is_variadic ? 64u : 0u; + u32 locals_off = fl->max_out + int_saves_sz + fl->fp_saves_sz; + fl->fp_pair_off = locals_off + a->cum_off; + fl->frame_size = fl->fp_pair_off + 16u + va_save_sz; + fl->frame_size = (fl->frame_size + 15u) & ~15u; + fl->fp_pair_off = fl->frame_size - 16u - va_save_sz; + fl->fp_save_base = -(i32)a->cum_off - 8; + fl->int_save_base = fl->fp_save_base - (i32)fl->fp_saves_sz; +} + +static void rv_tail_restore_frame(CGTarget* t) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + u32 int_regs[10]; + u32 fp_regs[10]; + RvTailFrameLayout fl; + u32 n_int_saves = + rv_tail_collect_mask_regs(a->used_cs_int_mask, 18u, 27u, int_regs); + u32 n_fp_saves = + rv_tail_collect_mask_regs(a->used_cs_fp_mask, 18u, 27u, fp_regs); + rv_tail_compute_frame(a, n_int_saves, n_fp_saves, &fl); + + if (a->omit_frame) return; + for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { + rv64_emit32(mc, rv_ld(int_regs[i], RV_S0, fl.int_save_base - 8 * i)); + } + for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { + rv64_emit32(mc, rv_fld(fp_regs[i], RV_S0, fl.fp_save_base - 8 * i)); + } + if (a->has_alloca) { + if ((i32)fl.fp_pair_off > 2047) { + compiler_panic(t->c, a->loc, "rv64 tail call: fp pair offset too large"); + } + rv64_emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fl.fp_pair_off)); + } + rv64_emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fl.fp_pair_off)); + rv64_emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fl.fp_pair_off + 8)); + emit_sp_addi(mc, (i64)fl.frame_size); +} + +static void rv_tail_branch(CGTarget* t, Operand callee) { + RImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (callee.kind == OPK_REG) { + if (reg_num(callee) != RV_T1) + rv64_emit32(mc, rv_addi(RV_T1, reg_num(callee), 0)); + rv_tail_restore_frame(t); + rv64_emit32(mc, rv_jr(RV_T1)); + } else if (callee.kind == OPK_GLOBAL) { + rv_tail_restore_frame(t); + u32 sec = mc->section_id; + u32 pos = mc->pos(mc); + rv64_emit32(mc, rv_auipc(RV_T1, 0)); + rv64_emit32(mc, rv_jalr(RV_ZERO, RV_T1, 0)); + mc->emit_reloc_at(mc, sec, pos, R_RV_CALL, + callee.v.global.sym, callee.v.global.addend, 0, 0); + } else { + compiler_panic(t->c, a->loc, "rv64 tail call: callee kind %d unsupported", + (int)callee.kind); + } +} + static void rv_call(CGTarget* t, const CGCallDesc* d) { RImpl* a = impl_of(t); MCEmitter* mc = t->mc; @@ -1018,6 +1102,16 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) { a->max_outgoing = needed; } + if (d->flags & CG_CALL_TAIL) { + if (d->abi && d->abi->has_sret) + compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported"); + if (needed) + compiler_panic(t->c, a->loc, + "rv64 tail call: stack arguments unsupported"); + rv_tail_branch(t, d->callee); + return; + } + if (d->callee.kind == OPK_GLOBAL) { /* AUIPC ra, 0 ; JALR ra, ra, 0 with R_RV_CALL on AUIPC */ u32 sec = mc->section_id; @@ -1087,6 +1181,16 @@ static void rv_emit_call_plan(CGTarget* t, const CGCallPlan* p) { RImpl* a = impl_of(t); MCEmitter* mc = t->mc; + if (p->flags & CG_CALL_TAIL) { + if (p->has_sret) + compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported"); + if (p->stack_arg_size) + compiler_panic(t->c, a->loc, + "rv64 tail call: stack arguments unsupported"); + rv_tail_branch(t, p->callee); + return; + } + if (p->callee.kind == OPK_GLOBAL) { u32 sec = mc->section_id; u32 pos = mc->pos(mc); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -897,6 +897,59 @@ static u32 x_call_stack_size(CGTarget* t, const CGCallDesc* d) { return (stack_off + 15u) & ~15u; } +static u32 x_tail_collect_cs_regs(const XImpl* a, Reg* cs_regs) { + u32 cs_used = 0; + for (u32 i = 0; i < 5u; ++i) { + Reg r = g_int_order[i]; + if (a->used_cs_int_mask & (1u << r)) + cs_regs[cs_used++] = r; + } + return cs_used; +} + +static void x_tail_restore_frame(CGTarget* t) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + Reg cs_regs[5]; + u32 cs_used = x_tail_collect_cs_regs(a, cs_regs); + + if (a->omit_frame) return; + for (i32 i = (i32)cs_used - 1; i >= 0; --i) { + u32 reg = cs_regs[i]; + i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + emit_mov_load(mc, 8, 0, reg, X64_RBP, off); + } + { + u8 op = 0xC9; + mc->emit_bytes(mc, &op, 1); + } +} + +static void x_tail_branch(CGTarget* t, Operand callee) { + XImpl* a = impl_of(t); + MCEmitter* mc = t->mc; + if (callee.kind == OPK_REG) { + u32 r = callee.v.reg & 0xFu; + if (r != X64_R11) + emit_mov_rr(mc, 1, X64_R11, r); + x_tail_restore_frame(t); + emit_rex(mc, 0, 0, 0, X64_R11); + u8 buf[2] = {0xFF, modrm(3u, 4u, X64_R11)}; + mc->emit_bytes(mc, buf, 2); + } else if (callee.kind == OPK_GLOBAL) { + x_tail_restore_frame(t); + u8 op = 0xE9; + mc->emit_bytes(mc, &op, 1); + u32 disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32, + callee.v.global.sym, callee.v.global.addend - 4, 1, 0); + } else { + compiler_panic(t->c, a->loc, "x64 tail call: callee kind %d unsupported", + (int)callee.kind); + } +} + static void x_call(CGTarget* t, const CGCallDesc* d) { XImpl* a = impl_of(t); MCEmitter* mc = t->mc; @@ -929,6 +982,16 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { x64_emit_load_imm(mc, 0, X64_RAX, (i64)next_fp); } + if (d->flags & CG_CALL_TAIL) { + if (d->abi && d->abi->has_sret) + compiler_panic(t->c, a->loc, "x64 tail call: sret unsupported"); + if (needed) + compiler_panic(t->c, a->loc, + "x64 tail call: stack arguments unsupported"); + x_tail_branch(t, d->callee); + return; + } + if (d->callee.kind == OPK_GLOBAL) { /* call rel32: E8 + disp32 + R_X64_PLT32. */ u8 op = 0xE8; @@ -1014,6 +1077,17 @@ static void x_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (p->is_variadic) x64_emit_load_imm(mc, 0, X64_RAX, (i64)p->variadic_fp_count); + if (p->flags & CG_CALL_TAIL) { + if (p->has_sret) + compiler_panic(t->c, impl_of(t)->loc, + "x64 tail call: sret unsupported"); + if (p->stack_arg_size) + compiler_panic(t->c, impl_of(t)->loc, + "x64 tail call: stack arguments unsupported"); + x_tail_branch(t, p->callee); + return; + } + if (p->callee.kind == OPK_GLOBAL) { u8 op = 0xE8; mc->emit_bytes(mc, &op, 1); diff --git a/test/wasm/cases/atomic_rmw_wait_notify.expect b/test/wasm/cases/atomic_rmw_wait_notify.expect @@ -0,0 +1 @@ +99 diff --git a/test/wasm/cases/atomic_rmw_wait_notify.wat b/test/wasm/cases/atomic_rmw_wait_notify.wat @@ -0,0 +1,26 @@ +(module + (memory 1 1 shared) + (func (export "test_main") (result i32) + i32.const 0 + i32.const 40 + i32.atomic.store + i32.const 0 + i32.const 2 + i32.atomic.rmw.add + drop + i32.const 0 + i32.const 42 + i32.const 99 + i32.atomic.rmw.cmpxchg + drop + i32.const 0 + i32.const 99 + i64.const 0 + memory.atomic.wait32 + drop + i32.const 0 + i32.const 1 + memory.atomic.notify + drop + i32.const 0 + i32.atomic.load)) diff --git a/test/wasm/cases/typed_refs_call_ref.expect b/test/wasm/cases/typed_refs_call_ref.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/typed_refs_call_ref.wat b/test/wasm/cases/typed_refs_call_ref.wat @@ -0,0 +1,10 @@ +(module + (type $inc_sig (func (param i32) (result i32))) + (func $inc (type $inc_sig) + local.get 0 + i32.const 1 + i32.add) + (func (export "test_main") (result i32) + i32.const 41 + ref.func $inc + call_ref (type $inc_sig))) diff --git a/test/wasm/cases/typed_refs_return_call_ref.expect b/test/wasm/cases/typed_refs_return_call_ref.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/typed_refs_return_call_ref.wat b/test/wasm/cases/typed_refs_return_call_ref.wat @@ -0,0 +1,7 @@ +(module + (type $value_sig (func (result i32))) + (func $value (type $value_sig) + i32.const 42) + (func (export "test_main") (result i32) + ref.func $value + return_call_ref (type $value_sig)))