kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5630fc443be3e72d1562851d2a98e71b1e2e9447
parent 00158c36b4bbbeaa9197b7142bdbc4c33936c9b0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 17 May 2026 20:38:02 -0700

Add initial wasm frontend

Diffstat:
MMakefile | 6++++++
Mdoc/WASM.md | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/cc.c | 8+++++---
Mdriver/dbg.c | 11+++++++++++
Mdriver/env.c | 3+++
Mdriver/inputs.c | 3++-
Mdriver/inputs.h | 3++-
Mdriver/run.c | 1+
Minclude/cfree.h | 10++++++----
Alang/wasm/wasm.c | 1843+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/wasm/wasm.h | 15+++++++++++++++
Msrc/api/pipeline.c | 5+++++
Mtest/test.mk | 10+++++++++-
Atest/wasm/cases/call_chain.expect | 1+
Atest/wasm/cases/call_chain.wat | 9+++++++++
Atest/wasm/cases/folded.expect | 1+
Atest/wasm/cases/folded.wat | 5+++++
Atest/wasm/cases/int_ops.expect | 1+
Atest/wasm/cases/int_ops.wat | 15+++++++++++++++
Atest/wasm/cases/locals_params.expect | 1+
Atest/wasm/cases/locals_params.wat | 14++++++++++++++
Atest/wasm/cases/named_refs.expect | 1+
Atest/wasm/cases/named_refs.wat | 7+++++++
Atest/wasm/cases/return42.expect | 1+
Atest/wasm/cases/return42.wat | 5+++++
Atest/wasm/harness/wasm_tool.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/wasm/run.sh | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
27 files changed, 2411 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile @@ -21,9 +21,11 @@ LANG_CFLAGS = $(CFLAGS_COMMON) -Iinclude LIB_SRCS = $(shell find src -name '*.c') LANG_C_SRCS = $(shell find lang/c -name '*.c' 2>/dev/null) +LANG_WASM_SRCS = $(shell find lang/wasm -name '*.c' 2>/dev/null) LIB_ASMS = $(shell find src -name '*.S') LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) \ $(patsubst lang/c/%.c,build/lang/c/%.o,$(LANG_C_SRCS)) \ + $(patsubst lang/wasm/%.c,build/lang/wasm/%.o,$(LANG_WASM_SRCS)) \ $(patsubst src/%.S,build/lib/%.o,$(LIB_ASMS)) LIB_DEPS = $(LIB_OBJS:.o=.d) @@ -70,6 +72,10 @@ build/lang/c/%.o: lang/c/%.c @mkdir -p $(dir $@) $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/c $(DEPFLAGS) -c $< -o $@ +build/lang/wasm/%.o: lang/wasm/%.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/wasm $(DEPFLAGS) -c $< -o $@ + build/lib/%.o: src/%.S @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@ diff --git a/doc/WASM.md b/doc/WASM.md @@ -751,6 +751,138 @@ Acceptance: - simple numeric/control modules preserve behavior. - unsupported features still fail before emission. +## Remaining Feature Checklist + +This checklist tracks the path from the initial Wasm/WAT frontend subset to a +complete implementation. Keep each item tied to a small named fixture or +targeted test target. + +### Frontend Source and Driver + +- [x] Add `CFREE_LANG_WASM` and suffix inference for `.wat`/`.wasm`. +- [x] Register the Wasm frontend in driver-created compilers and pipelines. +- [x] Add `make test-wasm-front` and a small WAT-to-Wasm test helper. +- [ ] Add explicit negative frontend tests for malformed WAT, malformed Wasm, + bad indices, stack underflow, unsupported sections, and unsupported opcodes. +- [ ] Decide stdin language selection for WAT input instead of treating `-` as + C-only. +- [ ] Add dbg smoke coverage for `:language wasm` / `:language wat`. + +### WAT Reader + +- [x] Parse modules, functions, exports, params, results, locals, folded + expressions, numeric indices, and `$name` function/local references. +- [x] Parse line comments. +- [ ] Parse block comments. +- [ ] Parse standard WAT string escapes and byte escapes. +- [ ] Parse integer literals with signs, underscores, hex notation, and + boundary diagnostics. +- [ ] Parse float literals for `f32.const` and `f64.const`. +- [ ] Parse module-level type definitions and `(func (type N) ...)`. +- [ ] Parse imports, memories, tables, globals, elements, data segments, start, + and custom/name sections. +- [ ] Preserve source locations through validation and lowering diagnostics. + +### Binary Reader and Encoder + +- [x] Decode and encode the current executable-module subset: type, function, + export, code, locals, constants, calls, local ops, and integer ops. +- [x] Reject `linking` custom sections as frontend input. +- [ ] Move shared binary mechanics into `src/wasm` with decode/encode contexts. +- [ ] Validate section length, ordering, count, and index-space edge cases with + direct format tests. +- [ ] Decode and encode imports, memories, tables, globals, elements, data, + start, custom/name sections, and target-feature metadata. +- [ ] Preserve unknown custom sections when the caller requests preservation. +- [ ] Add deterministic fixtures for malformed LEB128 and truncated bodies. + +### Validation + +- [x] Validate basic stack depth, direct call indices, local indices, and + integer-only locals/params for the current subset. +- [ ] Replace depth-only validation with typed operand and control stacks. +- [ ] Validate exact function result stack shape, unreachable polymorphism, and + fallthrough after `return`/`unreachable`. +- [ ] Validate blocks, loops, if/else, branch depths, branch result arity, and + `br_table`. +- [ ] Validate memory/table/global/data/element indices, limits, active/passive + segment rules, and start function signature. +- [ ] Centralize feature gating in a `WasmFeatureSet`. +- [ ] Add clear diagnostics for every deferred proposal: SIMD, threads, + exceptions, GC, typed function refs, tail calls, multi-memory, and memory64. + +### Native Lowering + +- [x] Lower straight-line exported functions to native object/JIT through + `CfreeCg`. +- [x] Lower i32/i64 constants, local get/set/tee, direct calls, returns, drops, + integer arithmetic, shifts, bitwise ops, and integer comparisons. +- [ ] Lower `unreachable` and deterministic traps. +- [ ] Lower `select`. +- [ ] Lower `local.set`/`local.tee` for address-taken or spilled locals without + depending on backend-local optimization behavior. +- [ ] Lower structured control flow: `block`, `loop`, `if`, `else`, `br`, + `br_if`, `br_table`, and `return`. +- [ ] Lower full i32/i64 integer ops, including count/rotate/popcount/clz/ctz + and all conversions. +- [ ] Lower f32/f64 arithmetic, comparisons, constants, conversions, and + reinterpret ops. +- [ ] Implement memory state in an explicit instance context, with checked + loads/stores, `memory.size`, `memory.grow`, and active data initialization. +- [ ] Implement globals through the instance context. +- [ ] Implement imports through explicit import slots in the instance context. +- [ ] Implement tables and `call_indirect`. +- [ ] Define and implement the C-facing exported wrapper ABI that keeps the + instance parameter explicit. + +### Runtime and Instance Model + +- [ ] Add internal `CfreeWasmInstance`, memory, table, global, and import-slot + structs under `lang/wasm` or `rt`. +- [ ] Add trap helpers for unreachable, division traps, invalid conversion, + bounds checks, table checks, and indirect-call signature checks. +- [ ] Define ownership and initialization for memories, tables, active data, + elements, mutable globals, and start functions. +- [ ] Add tests that instantiate modules with memory/import/global state + without process-global state. + +### Wasm Target Backend + +- [ ] Register `arch_impl_wasm` and a wasm32 ABI vtable. +- [ ] Implement scalar wasm32 BasicCABI classification. +- [ ] Implement a Wasm `CGTarget` that emits function bodies, locals, type + entries, data segments, exports, and relocations. +- [ ] Diagnose unsupported target features: wasm64, WASI startup, varargs, + atomics, SIMD, TLS, inline asm, irreducible control flow, and native-only + ABI hooks. +- [ ] Add `make test-wasm-target`. + +### Object Read/Write and Link + +- [ ] Add `ObjBuilder` extension payload hooks. +- [ ] Add `WasmObjMeta` for module graph, symbols, relocations, data segment + metadata, target features, and init functions. +- [ ] Implement `emit_wasm` for tool-conventions relocatable objects. +- [ ] Implement `read_wasm` for relocatable objects and symbol inspection. +- [ ] Implement reloc custom sections for the current `R_WASM_*` relocation + kinds, including padded LEB rewrites. +- [ ] Add `make test-wasm-obj`. +- [ ] Add a Wasm-specific linker path that emits `WasmLinkImage`, not native + `LinkImage`. +- [ ] Merge type/import/function/table/global/element/data/custom sections and + apply relocations. +- [ ] Support archives, demand loading, undefined imports, export policy, + constructors, memory layout, and table layout. +- [ ] Add `make test-wasm-link`. + +### Wasm-to-Wasm + +- [ ] Use the frontend plus Wasm target to normalize simple modules. +- [ ] Validate emitted modules with cfree's own validator and optional external + validators when available. +- [ ] Preserve behavior for numeric/control/memory fixtures while allowing the + byte representation to change. + ## Testing Strategy Prefer small, named fixtures over broad corpus runs: diff --git a/driver/cc.c b/driver/cc.c @@ -149,9 +149,10 @@ void driver_help_cc(void) { "compile\n" "\n" "DESCRIPTION\n" - " Compiles C11 sources and links them with .o/.a inputs. Inputs are\n" + " Compiles C11/Wasm sources and links them with .o/.a inputs. Inputs are\n" " classified by suffix:\n" " .c C source\n" + " .wat .wasm WebAssembly source module\n" " .o .obj object file (link-time input)\n" " .a static archive (link-time input)\n" " - read C source from stdin (single source only)\n" @@ -383,9 +384,10 @@ static int cc_record_wl(CcOptions* o, const char* arg) { return 0; } -/* Suffix predicate: is `s` a recognized C source suffix? */ +/* Suffix predicate: is `s` a recognized source suffix? */ static int cc_is_c_source(const char* s) { - return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".toy"); + return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".toy") || + driver_has_suffix(s, ".wat") || driver_has_suffix(s, ".wasm"); } /* Decimal uint64 parse for SOURCE_DATE_EPOCH. Stops at the first non-digit; diff --git a/driver/dbg.c b/driver/dbg.c @@ -1514,6 +1514,8 @@ static const char *dbg_jit_language_name(CfreeLanguage lang) { return "asm"; case CFREE_LANG_TOY: return "toy"; + case CFREE_LANG_WASM: + return "wasm"; case CFREE_LANG_C: case CFREE_LANG_COUNT: break; @@ -1527,6 +1529,8 @@ static const char *dbg_jit_language_suffix(CfreeLanguage lang) { return ".s"; case CFREE_LANG_TOY: return ".toy"; + case CFREE_LANG_WASM: + return ".wat"; case CFREE_LANG_C: case CFREE_LANG_COUNT: break; @@ -1540,6 +1544,8 @@ static const char *dbg_jit_default_name(CfreeLanguage lang) { return "<dbg-jit.s>"; case CFREE_LANG_TOY: return "<dbg-jit.toy>"; + case CFREE_LANG_WASM: + return "<dbg-jit.wat>"; case CFREE_LANG_C: case CFREE_LANG_COUNT: break; @@ -1569,6 +1575,11 @@ static CfreeLanguage dbg_jit_language_for_tag(DbgState *s, const char *tag, *name_out = "<dbg-jit.s>"; return CFREE_LANG_ASM; } + if (driver_streq(tag, "wasm") || driver_streq(tag, "wat")) { + if (name_out) + *name_out = "<dbg-jit.wat>"; + return CFREE_LANG_WASM; + } if (name_out) *name_out = tag; return cfree_language_for_path(tag); diff --git a/driver/env.c b/driver/env.c @@ -39,6 +39,7 @@ #include "driver.h" #include "lang/c/c.h" #include "lang/toy/toy.h" +#include "lang/wasm/wasm.h" /* Dual-mapping back-ends for strict W^X. Picks per-platform: * @@ -145,6 +146,7 @@ CfreeCompiler *driver_compiler_new(CfreeTarget t, const CfreeEnv *env) { if (c) { cfree_c_register(c); (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile); + cfree_wasm_register(c); driver_diag_set_compiler(c); } return c; @@ -164,6 +166,7 @@ CfreePipeline *driver_pipeline_new(CfreeTarget t, const CfreeEnv *env) { CfreeCompiler *c = cfree_pipeline_compiler(p); cfree_c_register(c); (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile); + cfree_wasm_register(c); driver_diag_set_compiler(c); } return p; diff --git a/driver/inputs.c b/driver/inputs.c @@ -62,7 +62,8 @@ int driver_inputs_classify(DriverInputs *in, const char *arg) { if (driver_streq(arg, "-")) return inputs_record_stdin(in); if (driver_has_suffix(arg, ".c") || driver_has_suffix(arg, ".toy") || - driver_has_suffix(arg, ".s")) { + driver_has_suffix(arg, ".s") || driver_has_suffix(arg, ".wat") || + driver_has_suffix(arg, ".wasm")) { in->sources[in->nsources++] = arg; return 1; } diff --git a/driver/inputs.h b/driver/inputs.h @@ -53,7 +53,8 @@ void driver_inputs_release(DriverInputs *); /* Try to consume one positional input at `arg`. Recognized: * "-" read C source from stdin (at most once) - * *.c *.toy *.s source path + * *.c *.toy *.s *.wat *.wasm + * source path * *.o *.obj object-file path * *.a static-archive path * diff --git a/driver/run.c b/driver/run.c @@ -126,6 +126,7 @@ void driver_help_run(void) { "\n" " Inputs are classified by suffix:\n" " .c .cc .cpp C source\n" + " .wat .wasm WebAssembly source module\n" " .o .obj object file\n" " .a static archive\n" " - read C source from stdin (single source only)\n" diff --git a/include/cfree.h b/include/cfree.h @@ -729,7 +729,8 @@ typedef enum CfreeLanguage { CFREE_LANG_C = 0, CFREE_LANG_ASM = 1, CFREE_LANG_TOY = 2, - CFREE_LANG_COUNT = 3, + CFREE_LANG_WASM = 3, + CFREE_LANG_COUNT = 4, } CfreeLanguage; typedef int (*CfreeCompileFn)(CfreeCompiler *, const CfreeCompileOptions *, @@ -752,9 +753,10 @@ struct CfreeBytesInput { }; /* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM, `.toy` -> - * CFREE_LANG_TOY, `.c`, `.cc`, `.cpp` and any other suffix (including a path - * with no suffix) -> CFREE_LANG_C. `.S` (preprocessed asm) is not recognized - * — drivers must preprocess first and submit the result as CFREE_LANG_ASM. */ + * CFREE_LANG_TOY, `.wat`/`.wasm` -> CFREE_LANG_WASM, `.c`, `.cc`, `.cpp` and + * any other suffix (including a path with no suffix) -> CFREE_LANG_C. `.S` + * (preprocessed asm) is not recognized — drivers must preprocess first and + * submit the result as CFREE_LANG_ASM. */ CfreeLanguage cfree_language_for_path(const char *path); /* Preprocessor configuration shared by compile_* and the convenience run. */ diff --git a/lang/wasm/wasm.c b/lang/wasm/wasm.c @@ -0,0 +1,1843 @@ +#include "wasm.h" + +#include <cfree/cg.h> +#include <cfree/frontend.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +typedef enum WasmValType { + WASM_VAL_I32 = 0x7f, + WASM_VAL_I64 = 0x7e, + WASM_VAL_F32 = 0x7d, + WASM_VAL_F64 = 0x7c, +} WasmValType; + +typedef enum WasmInsnKind { + WASM_INSN_I32_CONST, + WASM_INSN_I64_CONST, + WASM_INSN_LOCAL_GET, + WASM_INSN_LOCAL_SET, + WASM_INSN_LOCAL_TEE, + WASM_INSN_CALL, + WASM_INSN_RETURN, + WASM_INSN_DROP, + WASM_INSN_I32_ADD, + WASM_INSN_I32_SUB, + WASM_INSN_I32_MUL, + WASM_INSN_I32_DIV_S, + WASM_INSN_I32_DIV_U, + WASM_INSN_I32_REM_S, + WASM_INSN_I32_REM_U, + WASM_INSN_I32_AND, + WASM_INSN_I32_OR, + WASM_INSN_I32_XOR, + WASM_INSN_I32_SHL, + WASM_INSN_I32_SHR_S, + WASM_INSN_I32_SHR_U, + WASM_INSN_I32_EQZ, + WASM_INSN_I32_EQ, + WASM_INSN_I32_NE, + WASM_INSN_I32_LT_S, + WASM_INSN_I32_LT_U, + WASM_INSN_I32_GT_S, + WASM_INSN_I32_GT_U, + WASM_INSN_I32_LE_S, + WASM_INSN_I32_LE_U, + WASM_INSN_I32_GE_S, + WASM_INSN_I32_GE_U, + WASM_INSN_I64_ADD, + WASM_INSN_I64_SUB, + WASM_INSN_I64_MUL, + WASM_INSN_I64_AND, + WASM_INSN_I64_OR, + WASM_INSN_I64_XOR, + WASM_INSN_I64_SHL, + WASM_INSN_I64_SHR_S, + WASM_INSN_I64_SHR_U, + WASM_INSN_I64_EQZ, + WASM_INSN_I64_EQ, + WASM_INSN_I64_NE, + WASM_INSN_I64_LT_S, + WASM_INSN_I64_LT_U, + WASM_INSN_I64_GT_S, + WASM_INSN_I64_GT_U, + WASM_INSN_I64_LE_S, + WASM_INSN_I64_LE_U, + WASM_INSN_I64_GE_S, + WASM_INSN_I64_GE_U, +} WasmInsnKind; + +typedef struct WasmInsn { + uint8_t kind; + int64_t imm; +} WasmInsn; + +typedef struct WasmFunc { + char *name; + WasmValType params[16]; + uint32_t nparams; + WasmValType locals[32]; + uint32_t nlocals; + char *local_names[48]; + WasmValType results[1]; + uint32_t nresults; + char *export_name; + WasmInsn *insns; + uint32_t ninsns; + uint32_t cap_insns; +} WasmFunc; + +typedef struct WasmModule { + CfreeHeap *heap; + WasmFunc *funcs; + uint32_t nfuncs; + uint32_t cap_funcs; +} WasmModule; + +typedef struct WasmTok { + const char *p; + size_t len; + uint32_t line; + uint32_t col; + uint8_t kind; +} WasmTok; + +enum { + WT_EOF = 0, + WT_LPAREN, + WT_RPAREN, + WT_ATOM, + WT_STRING, +}; + +typedef struct WatParser { + CfreeCompiler *c; + const char *name; + const char *src; + size_t len; + size_t pos; + uint32_t line; + uint32_t col; + WasmTok tok; + WasmModule *module; +} WatParser; + +typedef struct BinReader { + CfreeCompiler *c; + const uint8_t *data; + size_t len; + size_t pos; + WasmModule *module; +} BinReader; + +static CfreeSrcLoc wasm_loc(uint32_t line, uint32_t col) { + CfreeSrcLoc loc; + loc.file_id = 0; + loc.line = line; + loc.col = col; + return loc; +} + +static void wasm_error(CfreeCompiler *c, CfreeSrcLoc loc, const char *fmt, + ...) { + va_list ap; + va_start(ap, fmt); + cfree_frontend_vfatal(c, loc, fmt, ap); +} + +static void *wasm_realloc(CfreeHeap *h, void *p, size_t old_n, size_t new_n) { + return h->realloc(h, p, old_n ? old_n : 1u, new_n ? new_n : 1u, + _Alignof(max_align_t)); +} + +static char *wasm_strdup(CfreeHeap *h, const char *s, size_t len) { + char *out = (char *)h->alloc(h, len + 1u, 1); + if (!out) + return NULL; + if (len) + memcpy(out, s, len); + out[len] = '\0'; + return out; +} + +static void wasm_module_init(WasmModule *m, CfreeHeap *heap) { + memset(m, 0, sizeof *m); + m->heap = heap; +} + +static void wasm_module_free(WasmModule *m) { + uint32_t i; + if (!m || !m->heap) + return; + for (i = 0; i < m->nfuncs; ++i) { + WasmFunc *f = &m->funcs[i]; + if (f->name) + m->heap->free(m->heap, f->name, strlen(f->name) + 1u); + if (f->export_name) + m->heap->free(m->heap, f->export_name, strlen(f->export_name) + 1u); + for (uint32_t j = 0; j < f->nparams + f->nlocals; ++j) + if (f->local_names[j]) + m->heap->free(m->heap, f->local_names[j], + strlen(f->local_names[j]) + 1u); + if (f->insns) + m->heap->free(m->heap, f->insns, sizeof(*f->insns) * f->cap_insns); + } + if (m->funcs) + m->heap->free(m->heap, m->funcs, sizeof(*m->funcs) * m->cap_funcs); + memset(m, 0, sizeof *m); +} + +static WasmFunc *wasm_add_func(CfreeCompiler *c, WasmModule *m) { + WasmFunc *f; + if (m->nfuncs == m->cap_funcs) { + uint32_t new_cap = m->cap_funcs ? m->cap_funcs * 2u : 4u; + void *p = wasm_realloc(m->heap, m->funcs, sizeof(*m->funcs) * m->cap_funcs, + sizeof(*m->funcs) * new_cap); + if (!p) + wasm_error(c, wasm_loc(0, 0), "wasm: out of memory"); + m->funcs = (WasmFunc *)p; + memset(m->funcs + m->cap_funcs, 0, + sizeof(*m->funcs) * (new_cap - m->cap_funcs)); + m->cap_funcs = new_cap; + } + f = &m->funcs[m->nfuncs++]; + memset(f, 0, sizeof *f); + return f; +} + +static void wasm_func_add_insn(CfreeCompiler *c, WasmModule *m, WasmFunc *f, + WasmInsnKind kind, int64_t imm) { + if (f->ninsns == f->cap_insns) { + uint32_t new_cap = f->cap_insns ? f->cap_insns * 2u : 16u; + void *p = wasm_realloc(m->heap, f->insns, sizeof(*f->insns) * f->cap_insns, + sizeof(*f->insns) * new_cap); + if (!p) + wasm_error(c, wasm_loc(0, 0), "wasm: out of memory"); + f->insns = (WasmInsn *)p; + f->cap_insns = new_cap; + } + f->insns[f->ninsns].kind = (uint8_t)kind; + f->insns[f->ninsns].imm = imm; + f->ninsns++; +} + +static int tok_is(WasmTok t, const char *s) { + size_t n = strlen(s); + return t.kind == WT_ATOM && t.len == n && memcmp(t.p, s, n) == 0; +} + +static int wasm_name_eq(const char *name, WasmTok t) { + size_t n; + if (!name || t.kind != WT_ATOM) + return 0; + n = strlen(name); + return t.len == n && memcmp(name, t.p, n) == 0; +} + +static void wat_next(WatParser *p) { + const char *s = p->src; + while (p->pos < p->len) { + char ch = s[p->pos]; + if (ch == '\n') { + p->pos++; + p->line++; + p->col = 1; + continue; + } + if (ch == ' ' || ch == '\t' || ch == '\r') { + p->pos++; + p->col++; + continue; + } + if (ch == ';' && p->pos + 1u < p->len && s[p->pos + 1u] == ';') { + while (p->pos < p->len && s[p->pos] != '\n') { + p->pos++; + p->col++; + } + continue; + } + break; + } + p->tok.p = s + p->pos; + p->tok.len = 0; + p->tok.line = p->line; + p->tok.col = p->col; + p->tok.kind = WT_EOF; + if (p->pos >= p->len) + return; + if (s[p->pos] == '(') { + p->tok.kind = WT_LPAREN; + p->tok.len = 1; + p->pos++; + p->col++; + return; + } + if (s[p->pos] == ')') { + p->tok.kind = WT_RPAREN; + p->tok.len = 1; + p->pos++; + p->col++; + return; + } + if (s[p->pos] == '"') { + size_t start = ++p->pos; + p->col++; + p->tok.kind = WT_STRING; + p->tok.p = s + start; + while (p->pos < p->len && s[p->pos] != '"') { + if ((unsigned char)s[p->pos] < 0x20) + wasm_error(p->c, wasm_loc(p->line, p->col), + "wasm wat: unsupported string escape/control character"); + p->pos++; + p->col++; + } + if (p->pos >= p->len) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: unterminated string"); + p->tok.len = (size_t)(s + p->pos - p->tok.p); + p->pos++; + p->col++; + return; + } + p->tok.kind = WT_ATOM; + while (p->pos < p->len) { + char ch = s[p->pos]; + if (ch == '(' || ch == ')' || ch == ' ' || ch == '\t' || ch == '\r' || + ch == '\n') + break; + p->pos++; + p->col++; + } + p->tok.len = (size_t)(s + p->pos - p->tok.p); +} + +static void wat_expect(WatParser *p, uint8_t kind, const char *what) { + if (p->tok.kind != kind) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected %s", what); + wat_next(p); +} + +static int wat_parse_i64(WatParser *p, int64_t *out) { + const char *s = p->tok.p; + size_t n = p->tok.len, i = 0; + uint64_t v = 0; + int neg = 0; + if (p->tok.kind != WT_ATOM || n == 0) + return 0; + if (s[0] == '-') { + neg = 1; + i = 1; + } + if (i == n) + return 0; + for (; i < n; ++i) { + unsigned d; + if (s[i] < '0' || s[i] > '9') + return 0; + d = (unsigned)(s[i] - '0'); + if (v > (UINT64_MAX - d) / 10u) + return 0; + v = v * 10u + d; + } + *out = neg ? -(int64_t)v : (int64_t)v; + return 1; +} + +static int wat_val_type(WasmTok t, WasmValType *out) { + if (tok_is(t, "i32")) { + *out = WASM_VAL_I32; + return 1; + } + if (tok_is(t, "i64")) { + *out = WASM_VAL_I64; + return 1; + } + if (tok_is(t, "f32")) { + *out = WASM_VAL_F32; + return 1; + } + if (tok_is(t, "f64")) { + *out = WASM_VAL_F64; + return 1; + } + return 0; +} + +static void wat_skip_list(WatParser *p) { + uint32_t depth = 1; + while (depth && p->tok.kind != WT_EOF) { + if (p->tok.kind == WT_LPAREN) + depth++; + else if (p->tok.kind == WT_RPAREN) + depth--; + wat_next(p); + } +} + +static int wat_instr_kind(WasmTok t, WasmInsnKind *out, int *has_imm) { + *has_imm = 0; + if (tok_is(t, "i32.const")) { + *out = WASM_INSN_I32_CONST; + *has_imm = 1; + return 1; + } + if (tok_is(t, "i64.const")) { + *out = WASM_INSN_I64_CONST; + *has_imm = 1; + return 1; + } + if (tok_is(t, "local.get")) { + *out = WASM_INSN_LOCAL_GET; + *has_imm = 1; + return 1; + } + if (tok_is(t, "local.set")) { + *out = WASM_INSN_LOCAL_SET; + *has_imm = 1; + return 1; + } + if (tok_is(t, "local.tee")) { + *out = WASM_INSN_LOCAL_TEE; + *has_imm = 1; + return 1; + } + if (tok_is(t, "call")) { + *out = WASM_INSN_CALL; + *has_imm = 1; + return 1; + } + if (tok_is(t, "return")) { + *out = WASM_INSN_RETURN; + return 1; + } + if (tok_is(t, "drop")) { + *out = WASM_INSN_DROP; + return 1; + } + if (tok_is(t, "i32.add")) { + *out = WASM_INSN_I32_ADD; + return 1; + } + if (tok_is(t, "i32.sub")) { + *out = WASM_INSN_I32_SUB; + return 1; + } + if (tok_is(t, "i32.mul")) { + *out = WASM_INSN_I32_MUL; + return 1; + } + if (tok_is(t, "i32.div_s")) { + *out = WASM_INSN_I32_DIV_S; + return 1; + } + if (tok_is(t, "i32.div_u")) { + *out = WASM_INSN_I32_DIV_U; + return 1; + } + if (tok_is(t, "i32.rem_s")) { + *out = WASM_INSN_I32_REM_S; + return 1; + } + if (tok_is(t, "i32.rem_u")) { + *out = WASM_INSN_I32_REM_U; + return 1; + } + if (tok_is(t, "i32.and")) { + *out = WASM_INSN_I32_AND; + return 1; + } + if (tok_is(t, "i32.or")) { + *out = WASM_INSN_I32_OR; + return 1; + } + if (tok_is(t, "i32.xor")) { + *out = WASM_INSN_I32_XOR; + return 1; + } + if (tok_is(t, "i32.shl")) { + *out = WASM_INSN_I32_SHL; + return 1; + } + if (tok_is(t, "i32.shr_s")) { + *out = WASM_INSN_I32_SHR_S; + return 1; + } + if (tok_is(t, "i32.shr_u")) { + *out = WASM_INSN_I32_SHR_U; + return 1; + } + if (tok_is(t, "i32.eqz")) { + *out = WASM_INSN_I32_EQZ; + return 1; + } + if (tok_is(t, "i32.eq")) { + *out = WASM_INSN_I32_EQ; + return 1; + } + if (tok_is(t, "i32.ne")) { + *out = WASM_INSN_I32_NE; + return 1; + } + if (tok_is(t, "i32.lt_s")) { + *out = WASM_INSN_I32_LT_S; + return 1; + } + if (tok_is(t, "i32.lt_u")) { + *out = WASM_INSN_I32_LT_U; + return 1; + } + if (tok_is(t, "i32.gt_s")) { + *out = WASM_INSN_I32_GT_S; + return 1; + } + if (tok_is(t, "i32.gt_u")) { + *out = WASM_INSN_I32_GT_U; + return 1; + } + if (tok_is(t, "i32.le_s")) { + *out = WASM_INSN_I32_LE_S; + return 1; + } + if (tok_is(t, "i32.le_u")) { + *out = WASM_INSN_I32_LE_U; + return 1; + } + if (tok_is(t, "i32.ge_s")) { + *out = WASM_INSN_I32_GE_S; + return 1; + } + if (tok_is(t, "i32.ge_u")) { + *out = WASM_INSN_I32_GE_U; + return 1; + } + if (tok_is(t, "i64.add")) { + *out = WASM_INSN_I64_ADD; + return 1; + } + if (tok_is(t, "i64.sub")) { + *out = WASM_INSN_I64_SUB; + return 1; + } + if (tok_is(t, "i64.mul")) { + *out = WASM_INSN_I64_MUL; + return 1; + } + if (tok_is(t, "i64.and")) { + *out = WASM_INSN_I64_AND; + return 1; + } + if (tok_is(t, "i64.or")) { + *out = WASM_INSN_I64_OR; + return 1; + } + if (tok_is(t, "i64.xor")) { + *out = WASM_INSN_I64_XOR; + return 1; + } + if (tok_is(t, "i64.shl")) { + *out = WASM_INSN_I64_SHL; + return 1; + } + if (tok_is(t, "i64.shr_s")) { + *out = WASM_INSN_I64_SHR_S; + return 1; + } + if (tok_is(t, "i64.shr_u")) { + *out = WASM_INSN_I64_SHR_U; + return 1; + } + if (tok_is(t, "i64.eqz")) { + *out = WASM_INSN_I64_EQZ; + return 1; + } + if (tok_is(t, "i64.eq")) { + *out = WASM_INSN_I64_EQ; + return 1; + } + if (tok_is(t, "i64.ne")) { + *out = WASM_INSN_I64_NE; + return 1; + } + if (tok_is(t, "i64.lt_s")) { + *out = WASM_INSN_I64_LT_S; + return 1; + } + if (tok_is(t, "i64.lt_u")) { + *out = WASM_INSN_I64_LT_U; + return 1; + } + if (tok_is(t, "i64.gt_s")) { + *out = WASM_INSN_I64_GT_S; + return 1; + } + if (tok_is(t, "i64.gt_u")) { + *out = WASM_INSN_I64_GT_U; + return 1; + } + if (tok_is(t, "i64.le_s")) { + *out = WASM_INSN_I64_LE_S; + return 1; + } + if (tok_is(t, "i64.le_u")) { + *out = WASM_INSN_I64_LE_U; + return 1; + } + if (tok_is(t, "i64.ge_s")) { + *out = WASM_INSN_I64_GE_S; + return 1; + } + if (tok_is(t, "i64.ge_u")) { + *out = WASM_INSN_I64_GE_U; + return 1; + } + return 0; +} + +static void wat_parse_func_index(WatParser *p, int64_t *out) { + uint32_t i; + if (p->tok.kind == WT_ATOM && p->tok.len && p->tok.p[0] == '$') { + for (i = 0; i < p->module->nfuncs; ++i) { + if (wasm_name_eq(p->module->funcs[i].name, p->tok)) { + *out = i; + return; + } + } + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: unknown function name"); + } + if (!wat_parse_i64(p, out)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected instruction immediate"); +} + +static void wat_parse_local_index(WatParser *p, WasmFunc *f, int64_t *out) { + uint32_t i, nlocals = f->nparams + f->nlocals; + if (p->tok.kind == WT_ATOM && p->tok.len && p->tok.p[0] == '$') { + for (i = 0; i < nlocals; ++i) { + if (wasm_name_eq(f->local_names[i], p->tok)) { + *out = i; + return; + } + } + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: unknown local name"); + } + if (!wat_parse_i64(p, out)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected instruction immediate"); +} + +static void wat_parse_instr_imm(WatParser *p, WasmFunc *f, WasmInsnKind kind, + int64_t *out) { + switch (kind) { + case WASM_INSN_CALL: + wat_parse_func_index(p, out); + break; + case WASM_INSN_LOCAL_GET: + case WASM_INSN_LOCAL_SET: + case WASM_INSN_LOCAL_TEE: + wat_parse_local_index(p, f, out); + break; + default: + if (!wat_parse_i64(p, out)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected instruction immediate"); + break; + } +} + +static void wat_parse_instr(WatParser *p, WasmFunc *f); + +static void wat_parse_instr_list(WatParser *p, WasmFunc *f) { + WasmInsnKind kind; + int has_imm; + int64_t imm = 0; + WasmTok head; + wat_expect(p, WT_LPAREN, "'('"); + head = p->tok; + if (!wat_instr_kind(head, &kind, &has_imm)) + wasm_error(p->c, wasm_loc(head.line, head.col), + "wasm wat: unsupported instruction"); + wat_next(p); + if (has_imm) { + wat_parse_instr_imm(p, f, kind, &imm); + wat_next(p); + } + while (p->tok.kind == WT_LPAREN) + wat_parse_instr(p, f); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_expect(p, WT_RPAREN, "')'"); +} + +static void wat_parse_instr(WatParser *p, WasmFunc *f) { + WasmInsnKind kind; + int has_imm; + if (p->tok.kind == WT_LPAREN) { + wat_parse_instr_list(p, f); + return; + } + if (!wat_instr_kind(p->tok, &kind, &has_imm)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: unsupported instruction"); + wat_next(p); + if (has_imm) { + int64_t imm; + wat_parse_instr_imm(p, f, kind, &imm); + wasm_func_add_insn(p->c, p->module, f, kind, imm); + wat_next(p); + } else { + wasm_func_add_insn(p->c, p->module, f, kind, 0); + } +} + +static void wat_parse_func(WatParser *p) { + WasmFunc *f = wasm_add_func(p->c, p->module); + wat_expect(p, WT_LPAREN, "'('"); + if (!tok_is(p->tok, "func")) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected func"); + wat_next(p); + if (p->tok.kind == WT_ATOM && p->tok.len > 0 && p->tok.p[0] == '$') { + f->name = wasm_strdup(p->module->heap, p->tok.p, p->tok.len); + if (!f->name) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm: out of memory"); + wat_next(p); + } + while (p->tok.kind != WT_RPAREN && p->tok.kind != WT_EOF) { + if (p->tok.kind == WT_LPAREN) { + wat_next(p); + if (tok_is(p->tok, "export")) { + wat_next(p); + if (p->tok.kind != WT_STRING) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected export string"); + f->export_name = wasm_strdup(p->module->heap, p->tok.p, p->tok.len); + if (!f->export_name) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm: out of memory"); + wat_next(p); + wat_expect(p, WT_RPAREN, "')'"); + } else if (tok_is(p->tok, "param")) { + WasmTok pending_name; + int have_name = 0; + memset(&pending_name, 0, sizeof pending_name); + wat_next(p); + while (p->tok.kind != WT_RPAREN && p->tok.kind != WT_EOF) { + WasmValType vt; + if (p->tok.kind == WT_ATOM && p->tok.len && p->tok.p[0] == '$') { + pending_name = p->tok; + have_name = 1; + wat_next(p); + continue; + } + if (!wat_val_type(p->tok, &vt)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected parameter type"); + if (f->nparams >= 16u) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: too many parameters"); + if (have_name) { + f->local_names[f->nparams] = + wasm_strdup(p->module->heap, pending_name.p, pending_name.len); + if (!f->local_names[f->nparams]) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm: out of memory"); + have_name = 0; + } + f->params[f->nparams++] = vt; + wat_next(p); + } + wat_expect(p, WT_RPAREN, "')'"); + } else if (tok_is(p->tok, "result")) { + WasmValType vt; + wat_next(p); + if (!wat_val_type(p->tok, &vt)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected result type"); + f->results[0] = vt; + f->nresults = 1; + wat_next(p); + wat_expect(p, WT_RPAREN, "')'"); + } else if (tok_is(p->tok, "local")) { + WasmTok pending_name; + int have_name = 0; + memset(&pending_name, 0, sizeof pending_name); + wat_next(p); + while (p->tok.kind != WT_RPAREN && p->tok.kind != WT_EOF) { + WasmValType vt; + if (p->tok.kind == WT_ATOM && p->tok.len && p->tok.p[0] == '$') { + pending_name = p->tok; + have_name = 1; + wat_next(p); + continue; + } + if (!wat_val_type(p->tok, &vt)) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: expected local type"); + if (f->nlocals >= 32u) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm wat: too many locals"); + if (have_name) { + uint32_t index = f->nparams + f->nlocals; + f->local_names[index] = + wasm_strdup(p->module->heap, pending_name.p, pending_name.len); + if (!f->local_names[index]) + wasm_error(p->c, wasm_loc(p->tok.line, p->tok.col), + "wasm: out of memory"); + have_name = 0; + } + f->locals[f->nlocals++] = vt; + wat_next(p); + } + wat_expect(p, WT_RPAREN, "')'"); + } else { + p->pos = (size_t)(p->tok.p - p->src); + p->line = p->tok.line; + p->col = p->tok.col; + p->tok.kind = WT_LPAREN; + p->tok.p = p->src + p->pos - 1u; + p->tok.len = 1; + p->tok.line = p->line; + p->tok.col = p->col - 1u; + wat_parse_instr(p, f); + } + } else { + wat_parse_instr(p, f); + } + } + wat_expect(p, WT_RPAREN, "')'"); +} + +static void wat_parse_module(CfreeCompiler *c, const CfreeBytesInput *input, + WasmModule *out) { + WatParser p; + memset(&p, 0, sizeof p); + p.c = c; + p.name = input->name; + p.src = (const char *)input->data; + p.len = input->len; + p.line = 1; + p.col = 1; + p.module = out; + wat_next(&p); + wat_expect(&p, WT_LPAREN, "'('"); + if (!tok_is(p.tok, "module")) + wasm_error(c, wasm_loc(p.tok.line, p.tok.col), "wasm wat: expected module"); + wat_next(&p); + while (p.tok.kind != WT_RPAREN && p.tok.kind != WT_EOF) { + if (p.tok.kind != WT_LPAREN) + wasm_error(c, wasm_loc(p.tok.line, p.tok.col), + "wasm wat: expected module field"); + wat_next(&p); + if (tok_is(p.tok, "func")) { + p.pos = (size_t)(p.tok.p - p.src); + p.line = p.tok.line; + p.col = p.tok.col; + p.tok.kind = WT_LPAREN; + p.tok.p = p.src + p.pos - 1u; + p.tok.len = 1; + p.tok.line = p.line; + p.tok.col = p.col - 1u; + wat_parse_func(&p); + } else { + wat_skip_list(&p); + } + } + wat_expect(&p, WT_RPAREN, "')'"); + if (p.tok.kind != WT_EOF) + wasm_error(c, wasm_loc(p.tok.line, p.tok.col), + "wasm wat: trailing tokens after module"); +} + +static uint8_t bin_u8(BinReader *r) { + if (r->pos >= r->len) + wasm_error(r->c, wasm_loc(0, 0), "wasm: unexpected end of file"); + return r->data[r->pos++]; +} + +static uint32_t bin_uleb(BinReader *r) { + uint32_t result = 0, shift = 0; + for (;;) { + uint8_t b = bin_u8(r); + if (shift >= 35u) + wasm_error(r->c, wasm_loc(0, 0), "wasm: invalid uleb128"); + result |= (uint32_t)(b & 0x7fu) << shift; + if (!(b & 0x80u)) + return result; + shift += 7u; + } +} + +static int64_t bin_sleb(BinReader *r, uint32_t bits) { + int64_t result = 0; + uint32_t shift = 0; + uint8_t b; + do { + b = bin_u8(r); + result |= (int64_t)(b & 0x7fu) << shift; + shift += 7u; + } while (b & 0x80u); + if (shift < bits && (b & 0x40u)) + result |= -((int64_t)1 << shift); + return result; +} + +static void bin_need(BinReader *r, size_t n) { + if (n > r->len || r->pos > r->len - n) + wasm_error(r->c, wasm_loc(0, 0), "wasm: section length out of bounds"); +} + +typedef struct BinType { + WasmValType params[16]; + uint32_t nparams; + WasmValType results[1]; + uint32_t nresults; +} BinType; + +static void wasm_decode_binary(CfreeCompiler *c, const CfreeBytesInput *input, + WasmModule *out) { + BinReader r; + BinType types[64]; + uint32_t ntypes = 0; + uint32_t nfunc_types = 0; + uint8_t last_id = 0; + memset(&r, 0, sizeof r); + r.c = c; + r.data = input->data; + r.len = input->len; + r.module = out; + if (r.len < 8 || memcmp(r.data, "\0asm\1\0\0\0", 8) != 0) + wasm_error(c, wasm_loc(0, 0), "wasm: bad magic or version"); + r.pos = 8; + while (r.pos < r.len) { + uint8_t id = bin_u8(&r); + uint32_t size = bin_uleb(&r); + size_t end; + bin_need(&r, size); + end = r.pos + size; + if (id != 0 && id <= last_id) + wasm_error(c, wasm_loc(0, 0), "wasm: sections out of order"); + if (id != 0) + last_id = id; + if (id == 0) { + uint32_t name_len = bin_uleb(&r); + bin_need(&r, name_len); + if (name_len == 7u && memcmp(r.data + r.pos, "linking", 7) == 0) + wasm_error(c, wasm_loc(0, 0), + "wasm: relocatable object metadata is not frontend input"); + r.pos = end; + continue; + } + if (id == 1) { + uint32_t i, count = bin_uleb(&r); + if (count > 64u) + wasm_error(c, wasm_loc(0, 0), "wasm: too many types"); + for (i = 0; i < count; ++i) { + uint32_t j, nparam, nresult; + if (bin_u8(&r) != 0x60u) + wasm_error(c, wasm_loc(0, 0), "wasm: expected function type"); + nparam = bin_uleb(&r); + if (nparam > 16u) + wasm_error(c, wasm_loc(0, 0), "wasm: too many parameters"); + types[ntypes].nparams = nparam; + for (j = 0; j < nparam; ++j) + types[ntypes].params[j] = (WasmValType)bin_u8(&r); + nresult = bin_uleb(&r); + if (nresult > 1u) + wasm_error(c, wasm_loc(0, 0), "wasm: multi-result unsupported"); + types[ntypes].nresults = nresult; + for (j = 0; j < nresult; ++j) + types[ntypes].results[j] = (WasmValType)bin_u8(&r); + ntypes++; + } + } else if (id == 3) { + uint32_t i, count = bin_uleb(&r); + if (count > 64u) + wasm_error(c, wasm_loc(0, 0), "wasm: too many functions"); + for (i = 0; i < count; ++i) { + uint32_t typeidx = bin_uleb(&r); + WasmFunc *f; + if (typeidx >= ntypes) + wasm_error(c, wasm_loc(0, 0), "wasm: bad function type index"); + f = wasm_add_func(c, out); + f->nparams = types[typeidx].nparams; + memcpy(f->params, types[typeidx].params, + sizeof(f->params[0]) * f->nparams); + f->nresults = types[typeidx].nresults; + memcpy(f->results, types[typeidx].results, + sizeof(f->results[0]) * f->nresults); + nfunc_types++; + } + } else if (id == 7) { + uint32_t i, count = bin_uleb(&r); + for (i = 0; i < count; ++i) { + uint32_t n = bin_uleb(&r); + const uint8_t *name; + uint8_t kind; + uint32_t idx; + bin_need(&r, n); + name = r.data + r.pos; + r.pos += n; + kind = bin_u8(&r); + idx = bin_uleb(&r); + if (kind == 0 && idx < out->nfuncs) { + WasmFunc *f = &out->funcs[idx]; + if (f->export_name) + out->heap->free(out->heap, f->export_name, + strlen(f->export_name) + 1u); + f->export_name = wasm_strdup(out->heap, (const char *)name, n); + if (!f->export_name) + wasm_error(c, wasm_loc(0, 0), "wasm: out of memory"); + } + } + } else if (id == 10) { + uint32_t i, count = bin_uleb(&r); + if (count != nfunc_types) + wasm_error(c, wasm_loc(0, 0), "wasm: function/code count mismatch"); + for (i = 0; i < count; ++i) { + uint32_t body_size = bin_uleb(&r); + size_t body_end; + uint32_t local_groups, j; + WasmFunc *f = &out->funcs[i]; + bin_need(&r, body_size); + body_end = r.pos + body_size; + local_groups = bin_uleb(&r); + for (j = 0; j < local_groups; ++j) { + uint32_t k, nlocals = bin_uleb(&r); + WasmValType vt = (WasmValType)bin_u8(&r); + if (nlocals > 32u || f->nlocals > 32u - nlocals) + wasm_error(c, wasm_loc(0, 0), "wasm: too many locals"); + for (k = 0; k < nlocals; ++k) + f->locals[f->nlocals++] = vt; + } + while (r.pos < body_end) { + uint8_t op = bin_u8(&r); + if (op == 0x0bu) + break; + switch (op) { + case 0x0f: + wasm_func_add_insn(c, out, f, WASM_INSN_RETURN, 0); + break; + case 0x1a: + wasm_func_add_insn(c, out, f, WASM_INSN_DROP, 0); + break; + case 0x10: + wasm_func_add_insn(c, out, f, WASM_INSN_CALL, bin_uleb(&r)); + break; + case 0x20: + wasm_func_add_insn(c, out, f, WASM_INSN_LOCAL_GET, bin_uleb(&r)); + break; + case 0x21: + wasm_func_add_insn(c, out, f, WASM_INSN_LOCAL_SET, bin_uleb(&r)); + break; + case 0x22: + wasm_func_add_insn(c, out, f, WASM_INSN_LOCAL_TEE, bin_uleb(&r)); + break; + case 0x41: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_CONST, + bin_sleb(&r, 32)); + break; + case 0x42: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_CONST, + bin_sleb(&r, 64)); + break; + case 0x45: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_EQZ, 0); + break; + case 0x46: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_EQ, 0); + break; + case 0x47: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_NE, 0); + break; + case 0x48: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_LT_S, 0); + break; + case 0x49: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_LT_U, 0); + break; + case 0x4a: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_GT_S, 0); + break; + case 0x4b: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_GT_U, 0); + break; + case 0x4c: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_LE_S, 0); + break; + case 0x4d: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_LE_U, 0); + break; + case 0x4e: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_GE_S, 0); + break; + case 0x4f: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_GE_U, 0); + break; + case 0x50: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_EQZ, 0); + break; + case 0x51: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_EQ, 0); + break; + case 0x52: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_NE, 0); + break; + case 0x53: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_LT_S, 0); + break; + case 0x54: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_LT_U, 0); + break; + case 0x55: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_GT_S, 0); + break; + case 0x56: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_GT_U, 0); + break; + case 0x57: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_LE_S, 0); + break; + case 0x58: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_LE_U, 0); + break; + case 0x59: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_GE_S, 0); + break; + case 0x5a: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_GE_U, 0); + break; + case 0x6a: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_ADD, 0); + break; + case 0x6b: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_SUB, 0); + break; + case 0x6c: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_MUL, 0); + break; + case 0x6d: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_DIV_S, 0); + break; + case 0x6e: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_DIV_U, 0); + break; + case 0x6f: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_REM_S, 0); + break; + case 0x70: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_REM_U, 0); + break; + case 0x71: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_AND, 0); + break; + case 0x72: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_OR, 0); + break; + case 0x73: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_XOR, 0); + break; + case 0x74: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_SHL, 0); + break; + case 0x75: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_SHR_S, 0); + break; + case 0x76: + wasm_func_add_insn(c, out, f, WASM_INSN_I32_SHR_U, 0); + break; + case 0x7c: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_ADD, 0); + break; + case 0x7d: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_SUB, 0); + break; + case 0x7e: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_MUL, 0); + break; + case 0x83: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_AND, 0); + break; + case 0x84: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_OR, 0); + break; + case 0x85: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_XOR, 0); + break; + case 0x86: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_SHL, 0); + break; + case 0x87: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_SHR_S, 0); + break; + case 0x88: + wasm_func_add_insn(c, out, f, WASM_INSN_I64_SHR_U, 0); + break; + default: + wasm_error(c, wasm_loc(0, 0), "wasm: unsupported opcode 0x%02x", + op); + } + } + r.pos = body_end; + } + } else { + r.pos = end; + } + if (r.pos != end) + wasm_error(c, wasm_loc(0, 0), "wasm: malformed section length"); + } +} + +static int wasm_is_binary(const CfreeBytesInput *input) { + return input->len >= 4u && input->data[0] == 0x00 && input->data[1] == 0x61 && + input->data[2] == 0x73 && input->data[3] == 0x6d; +} + +static CfreeCgTypeId wasm_cg_type(CfreeCompiler *c, CfreeCgBuiltinTypes b, + WasmValType vt) { + switch (vt) { + case WASM_VAL_I32: + return b.id[CFREE_CG_BUILTIN_I32]; + case WASM_VAL_I64: + return b.id[CFREE_CG_BUILTIN_I64]; + case WASM_VAL_F32: + return b.id[CFREE_CG_BUILTIN_F32]; + case WASM_VAL_F64: + return b.id[CFREE_CG_BUILTIN_F64]; + } + wasm_error(c, wasm_loc(0, 0), "wasm: unsupported value type"); +} + +static WasmValType wasm_func_local_type(const WasmFunc *f, uint32_t index) { + if (index < f->nparams) + return f->params[index]; + return f->locals[index - f->nparams]; +} + +static CfreeCgMemAccess wasm_cg_mem(CfreeCompiler *c, CfreeCgBuiltinTypes b, + WasmValType vt) { + CfreeCgMemAccess mem; + memset(&mem, 0, sizeof mem); + mem.type = wasm_cg_type(c, b, vt); + return mem; +} + +static int wasm_int_cmp_op(uint8_t kind, CfreeCgIntCmpOp *out) { + switch (kind) { + case WASM_INSN_I32_EQ: + case WASM_INSN_I64_EQ: + *out = CFREE_CG_INT_EQ; + return 1; + case WASM_INSN_I32_NE: + case WASM_INSN_I64_NE: + *out = CFREE_CG_INT_NE; + return 1; + case WASM_INSN_I32_LT_S: + case WASM_INSN_I64_LT_S: + *out = CFREE_CG_INT_LT_S; + return 1; + case WASM_INSN_I32_LT_U: + case WASM_INSN_I64_LT_U: + *out = CFREE_CG_INT_LT_U; + return 1; + case WASM_INSN_I32_GT_S: + case WASM_INSN_I64_GT_S: + *out = CFREE_CG_INT_GT_S; + return 1; + case WASM_INSN_I32_GT_U: + case WASM_INSN_I64_GT_U: + *out = CFREE_CG_INT_GT_U; + return 1; + case WASM_INSN_I32_LE_S: + case WASM_INSN_I64_LE_S: + *out = CFREE_CG_INT_LE_S; + return 1; + case WASM_INSN_I32_LE_U: + case WASM_INSN_I64_LE_U: + *out = CFREE_CG_INT_LE_U; + return 1; + case WASM_INSN_I32_GE_S: + case WASM_INSN_I64_GE_S: + *out = CFREE_CG_INT_GE_S; + return 1; + case WASM_INSN_I32_GE_U: + case WASM_INSN_I64_GE_U: + *out = CFREE_CG_INT_GE_U; + return 1; + default: + return 0; + } +} + +static void wasm_validate(const WasmModule *m, CfreeCompiler *c) { + uint32_t i, j; + for (i = 0; i < m->nfuncs; ++i) { + const WasmFunc *f = &m->funcs[i]; + int32_t depth = 0; + if (f->nresults > 1u) + wasm_error(c, wasm_loc(0, 0), "wasm: multi-result unsupported"); + for (j = 0; j < f->nparams; ++j) { + if (f->params[j] != WASM_VAL_I32 && f->params[j] != WASM_VAL_I64) + wasm_error(c, wasm_loc(0, 0), + "wasm: only integer parameters are supported"); + } + for (j = 0; j < f->nlocals; ++j) { + if (f->locals[j] != WASM_VAL_I32 && f->locals[j] != WASM_VAL_I64) + wasm_error(c, wasm_loc(0, 0), + "wasm: only integer locals are supported"); + } + for (j = 0; j < f->ninsns; ++j) { + WasmInsn in = f->insns[j]; + switch (in.kind) { + case WASM_INSN_I32_CONST: + case WASM_INSN_I64_CONST: + case WASM_INSN_LOCAL_GET: + if (in.kind == WASM_INSN_LOCAL_GET && + (uint64_t)in.imm >= (uint64_t)f->nparams + f->nlocals) + wasm_error(c, wasm_loc(0, 0), "wasm: local index out of range"); + depth++; + break; + case WASM_INSN_LOCAL_SET: + if (in.imm < 0 || (uint64_t)in.imm >= + (uint64_t)f->nparams + f->nlocals) + wasm_error(c, wasm_loc(0, 0), "wasm: local index out of range"); + if (depth < 1) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + depth--; + break; + case WASM_INSN_LOCAL_TEE: + if (in.imm < 0 || (uint64_t)in.imm >= + (uint64_t)f->nparams + f->nlocals) + wasm_error(c, wasm_loc(0, 0), "wasm: local index out of range"); + if (depth < 1) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + break; + case WASM_INSN_CALL: + if (in.imm < 0 || (uint64_t)in.imm >= m->nfuncs) + wasm_error(c, wasm_loc(0, 0), "wasm: call index out of range"); + if (depth < (int32_t)m->funcs[in.imm].nparams) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + depth -= (int32_t)m->funcs[in.imm].nparams; + if (m->funcs[in.imm].nresults) + depth++; + break; + case WASM_INSN_RETURN: + if (depth < (f->nresults ? 1 : 0)) + wasm_error(c, wasm_loc(0, 0), "wasm: return stack underflow"); + break; + case WASM_INSN_DROP: + if (depth < 1) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + depth--; + break; + case WASM_INSN_I32_EQZ: + case WASM_INSN_I64_EQZ: + if (depth < 1) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + break; + default: + if (depth < 2) + wasm_error(c, wasm_loc(0, 0), "wasm: operand stack underflow"); + depth--; + break; + } + } + if (f->nresults && depth < 1) + wasm_error(c, wasm_loc(0, 0), "wasm: missing function result"); + } +} + +static void wasm_emit_cg(CfreeCompiler *c, const CfreeCompileOptions *opts, + CfreeObjBuilder *out, const WasmModule *m) { + CfreeCg *cg = cfree_cg_new(c, out, opts); + CfreeCgBuiltinTypes b = cfree_cg_builtin_types(c); + CfreeCgSym syms[64]; + CfreeCgTypeId func_types[64]; + CfreeCgLocal locals[48]; + uint32_t i, j; + if (!cg) + wasm_error(c, wasm_loc(0, 0), "wasm: failed to initialize codegen"); + if (m->nfuncs > 64u) + wasm_error(c, wasm_loc(0, 0), "wasm: too many functions"); + memset(syms, 0, sizeof syms); + memset(func_types, 0, sizeof func_types); + for (i = 0; i < m->nfuncs; ++i) { + const WasmFunc *f = &m->funcs[i]; + CfreeCgFuncParam cg_params[16]; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + char local_name[40]; + CfreeSym source_name; + for (j = 0; j < f->nparams; ++j) { + memset(&cg_params[j], 0, sizeof cg_params[j]); + cg_params[j].type = wasm_cg_type(c, b, f->params[j]); + } + memset(&sig, 0, sizeof sig); + sig.ret = f->nresults ? wasm_cg_type(c, b, f->results[0]) + : b.id[CFREE_CG_BUILTIN_VOID]; + sig.params = cg_params; + sig.nparams = f->nparams; + sig.call_conv = CFREE_CG_CC_TARGET_C; + func_types[i] = cfree_cg_type_func(c, sig); + if (!func_types[i]) + wasm_error(c, wasm_loc(0, 0), "wasm: failed to create function type"); + if (f->export_name) { + source_name = cfree_sym_intern(c, f->export_name); + } else { + size_t pos = 0; + const char prefix[] = "__cfree_wasm_func_"; + uint32_t n = i, div = 1000000000u; + memcpy(local_name, prefix, sizeof(prefix) - 1u); + pos = sizeof(prefix) - 1u; + while (div > 1u && n / div == 0) + div /= 10u; + while (div) { + local_name[pos++] = (char)('0' + (n / div) % 10u); + div /= 10u; + } + local_name[pos] = '\0'; + source_name = cfree_sym_intern(c, local_name); + } + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_cg_c_linkage_name(c, source_name); + decl.display_name = source_name; + decl.type = func_types[i]; + decl.sym.bind = f->export_name ? CFREE_SB_GLOBAL : CFREE_SB_LOCAL; + syms[i] = cfree_cg_decl(cg, decl); + if (!syms[i]) + wasm_error(c, wasm_loc(0, 0), "wasm: failed to declare function"); + } + for (i = 0; i < m->nfuncs; ++i) { + const WasmFunc *f = &m->funcs[i]; + cfree_cg_func_begin(cg, syms[i]); + for (j = 0; j < f->nparams; ++j) { + CfreeCgLocalAttrs attrs; + memset(&attrs, 0, sizeof attrs); + locals[j] = cfree_cg_param(cg, j, wasm_cg_type(c, b, f->params[j]), + attrs); + } + for (j = 0; j < f->nlocals; ++j) { + CfreeCgLocalAttrs attrs; + memset(&attrs, 0, sizeof attrs); + attrs.flags = CFREE_CG_LOCAL_COMPILER_TEMP; + locals[f->nparams + j] = + cfree_cg_local(cg, wasm_cg_type(c, b, f->locals[j]), attrs); + } + for (j = 0; j < f->ninsns; ++j) { + WasmInsn in = f->insns[j]; + switch (in.kind) { + case WASM_INSN_I32_CONST: + cfree_cg_push_int(cg, (uint64_t)(uint32_t)in.imm, + b.id[CFREE_CG_BUILTIN_I32]); + break; + case WASM_INSN_I64_CONST: + cfree_cg_push_int(cg, (uint64_t)in.imm, b.id[CFREE_CG_BUILTIN_I64]); + break; + case WASM_INSN_LOCAL_GET: { + uint32_t index = (uint32_t)in.imm; + cfree_cg_push_local(cg, locals[index]); + cfree_cg_load(cg, + wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + break; + } + case WASM_INSN_LOCAL_SET: { + uint32_t index = (uint32_t)in.imm; + cfree_cg_push_local(cg, locals[index]); + cfree_cg_swap(cg); + cfree_cg_store(cg, + wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + break; + } + case WASM_INSN_LOCAL_TEE: { + uint32_t index = (uint32_t)in.imm; + cfree_cg_dup(cg); + cfree_cg_push_local(cg, locals[index]); + cfree_cg_swap(cg); + cfree_cg_store(cg, + wasm_cg_mem(c, b, wasm_func_local_type(f, index))); + break; + } + case WASM_INSN_CALL: + cfree_cg_call_symbol(cg, syms[in.imm], m->funcs[in.imm].nparams, + (CfreeCgCallAttrs){0}); + break; + case WASM_INSN_RETURN: + if (f->nresults) + cfree_cg_ret(cg); + else + cfree_cg_ret_void(cg); + break; + case WASM_INSN_DROP: + cfree_cg_drop(cg); + break; + case WASM_INSN_I32_ADD: + case WASM_INSN_I64_ADD: + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + break; + case WASM_INSN_I32_SUB: + case WASM_INSN_I64_SUB: + cfree_cg_int_binop(cg, CFREE_CG_INT_SUB, 0); + break; + case WASM_INSN_I32_MUL: + case WASM_INSN_I64_MUL: + cfree_cg_int_binop(cg, CFREE_CG_INT_MUL, 0); + break; + case WASM_INSN_I32_DIV_S: + cfree_cg_int_binop(cg, CFREE_CG_INT_SDIV, 0); + break; + case WASM_INSN_I32_DIV_U: + cfree_cg_int_binop(cg, CFREE_CG_INT_UDIV, 0); + break; + case WASM_INSN_I32_REM_S: + cfree_cg_int_binop(cg, CFREE_CG_INT_SREM, 0); + break; + case WASM_INSN_I32_REM_U: + cfree_cg_int_binop(cg, CFREE_CG_INT_UREM, 0); + break; + case WASM_INSN_I32_AND: + case WASM_INSN_I64_AND: + cfree_cg_int_binop(cg, CFREE_CG_INT_AND, 0); + break; + case WASM_INSN_I32_OR: + case WASM_INSN_I64_OR: + cfree_cg_int_binop(cg, CFREE_CG_INT_OR, 0); + break; + case WASM_INSN_I32_XOR: + case WASM_INSN_I64_XOR: + cfree_cg_int_binop(cg, CFREE_CG_INT_XOR, 0); + break; + case WASM_INSN_I32_SHL: + case WASM_INSN_I64_SHL: + cfree_cg_int_binop(cg, CFREE_CG_INT_SHL, 0); + break; + case WASM_INSN_I32_SHR_S: + case WASM_INSN_I64_SHR_S: + cfree_cg_int_binop(cg, CFREE_CG_INT_ASHR, 0); + break; + case WASM_INSN_I32_SHR_U: + case WASM_INSN_I64_SHR_U: + cfree_cg_int_binop(cg, CFREE_CG_INT_LSHR, 0); + break; + case WASM_INSN_I32_EQZ: + cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I32]); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I32]); + break; + case WASM_INSN_I64_EQZ: + cfree_cg_push_int(cg, 0, b.id[CFREE_CG_BUILTIN_I64]); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I32]); + break; + default: { + CfreeCgIntCmpOp cmp; + if (!wasm_int_cmp_op(in.kind, &cmp)) + wasm_error(c, wasm_loc(0, 0), "wasm: unsupported instruction"); + cfree_cg_int_cmp(cg, cmp); + cfree_cg_zext(cg, b.id[CFREE_CG_BUILTIN_I32]); + break; + } + } + } + if (f->nresults) + cfree_cg_ret(cg); + else + cfree_cg_ret_void(cg); + cfree_cg_func_end(cg); + } + cfree_cg_free(cg); +} + +static void write_byte(CfreeWriter *w, uint8_t b) { w->write(w, &b, 1); } + +static void write_uleb(CfreeWriter *w, uint64_t v) { + do { + uint8_t b = (uint8_t)(v & 0x7fu); + v >>= 7u; + if (v) + b |= 0x80u; + write_byte(w, b); + } while (v); +} + +static void write_sleb(CfreeWriter *w, int64_t v) { + int more = 1; + while (more) { + uint8_t b = (uint8_t)(v & 0x7f); + int sign = b & 0x40; + v >>= 7; + if ((v == 0 && !sign) || (v == -1 && sign)) + more = 0; + else + b |= 0x80u; + write_byte(w, b); + } +} + +static void write_name(CfreeWriter *w, const char *s) { + size_t n = strlen(s); + write_uleb(w, n); + w->write(w, s, n); +} + +static void encode_section(CfreeHeap *h, CfreeWriter *out, uint8_t id, + void (*fn)(CfreeWriter *, const WasmModule *), + const WasmModule *m) { + CfreeWriter *tmp = cfree_writer_mem(h); + size_t len; + const uint8_t *bytes; + if (!tmp) + return; + fn(tmp, m); + bytes = cfree_writer_mem_bytes(tmp, &len); + write_byte(out, id); + write_uleb(out, len); + out->write(out, bytes, len); + cfree_writer_close(tmp); +} + +static void enc_type(CfreeWriter *w, const WasmModule *m) { + uint32_t i, j; + write_uleb(w, m->nfuncs); + for (i = 0; i < m->nfuncs; ++i) { + const WasmFunc *f = &m->funcs[i]; + write_byte(w, 0x60); + write_uleb(w, f->nparams); + for (j = 0; j < f->nparams; ++j) + write_byte(w, (uint8_t)f->params[j]); + write_uleb(w, f->nresults); + for (j = 0; j < f->nresults; ++j) + write_byte(w, (uint8_t)f->results[j]); + } +} + +static void enc_func(CfreeWriter *w, const WasmModule *m) { + uint32_t i; + write_uleb(w, m->nfuncs); + for (i = 0; i < m->nfuncs; ++i) + write_uleb(w, i); +} + +static void enc_export(CfreeWriter *w, const WasmModule *m) { + uint32_t i, n = 0; + for (i = 0; i < m->nfuncs; ++i) + if (m->funcs[i].export_name) + n++; + write_uleb(w, n); + for (i = 0; i < m->nfuncs; ++i) { + if (!m->funcs[i].export_name) + continue; + write_name(w, m->funcs[i].export_name); + write_byte(w, 0); + write_uleb(w, i); + } +} + +static uint8_t wasm_opcode(uint8_t kind) { + switch (kind) { + case WASM_INSN_I32_CONST: + return 0x41; + case WASM_INSN_I64_CONST: + return 0x42; + case WASM_INSN_LOCAL_GET: + return 0x20; + case WASM_INSN_LOCAL_SET: + return 0x21; + case WASM_INSN_LOCAL_TEE: + return 0x22; + case WASM_INSN_CALL: + return 0x10; + case WASM_INSN_RETURN: + return 0x0f; + case WASM_INSN_DROP: + return 0x1a; + case WASM_INSN_I32_EQZ: + return 0x45; + case WASM_INSN_I32_EQ: + return 0x46; + case WASM_INSN_I32_NE: + return 0x47; + case WASM_INSN_I32_LT_S: + return 0x48; + case WASM_INSN_I32_LT_U: + return 0x49; + case WASM_INSN_I32_GT_S: + return 0x4a; + case WASM_INSN_I32_GT_U: + return 0x4b; + case WASM_INSN_I32_LE_S: + return 0x4c; + case WASM_INSN_I32_LE_U: + return 0x4d; + case WASM_INSN_I32_GE_S: + return 0x4e; + case WASM_INSN_I32_GE_U: + return 0x4f; + case WASM_INSN_I64_EQZ: + return 0x50; + case WASM_INSN_I64_EQ: + return 0x51; + case WASM_INSN_I64_NE: + return 0x52; + case WASM_INSN_I64_LT_S: + return 0x53; + case WASM_INSN_I64_LT_U: + return 0x54; + case WASM_INSN_I64_GT_S: + return 0x55; + case WASM_INSN_I64_GT_U: + return 0x56; + case WASM_INSN_I64_LE_S: + return 0x57; + case WASM_INSN_I64_LE_U: + return 0x58; + case WASM_INSN_I64_GE_S: + return 0x59; + case WASM_INSN_I64_GE_U: + return 0x5a; + case WASM_INSN_I32_ADD: + return 0x6a; + case WASM_INSN_I32_SUB: + return 0x6b; + case WASM_INSN_I32_MUL: + return 0x6c; + case WASM_INSN_I32_DIV_S: + return 0x6d; + case WASM_INSN_I32_DIV_U: + return 0x6e; + case WASM_INSN_I32_REM_S: + return 0x6f; + case WASM_INSN_I32_REM_U: + return 0x70; + case WASM_INSN_I32_AND: + return 0x71; + case WASM_INSN_I32_OR: + return 0x72; + case WASM_INSN_I32_XOR: + return 0x73; + case WASM_INSN_I32_SHL: + return 0x74; + case WASM_INSN_I32_SHR_S: + return 0x75; + case WASM_INSN_I32_SHR_U: + return 0x76; + case WASM_INSN_I64_ADD: + return 0x7c; + case WASM_INSN_I64_SUB: + return 0x7d; + case WASM_INSN_I64_MUL: + return 0x7e; + case WASM_INSN_I64_AND: + return 0x83; + case WASM_INSN_I64_OR: + return 0x84; + case WASM_INSN_I64_XOR: + return 0x85; + case WASM_INSN_I64_SHL: + return 0x86; + case WASM_INSN_I64_SHR_S: + return 0x87; + case WASM_INSN_I64_SHR_U: + return 0x88; + } + return 0; +} + +static void enc_code(CfreeWriter *w, const WasmModule *m) { + uint32_t i, j; + write_uleb(w, m->nfuncs); + for (i = 0; i < m->nfuncs; ++i) { + CfreeWriter *body = cfree_writer_mem(m->heap); + size_t len; + const uint8_t *bytes; + if (m->funcs[i].nlocals) { + uint32_t group_count = 0; + WasmValType prev = 0; + for (j = 0; j < m->funcs[i].nlocals; ++j) { + if (j == 0 || m->funcs[i].locals[j] != prev) { + group_count++; + prev = m->funcs[i].locals[j]; + } + } + write_uleb(body, group_count); + for (j = 0; j < m->funcs[i].nlocals;) { + uint32_t k = j + 1u; + while (k < m->funcs[i].nlocals && + m->funcs[i].locals[k] == m->funcs[i].locals[j]) + k++; + write_uleb(body, k - j); + write_byte(body, (uint8_t)m->funcs[i].locals[j]); + j = k; + } + } else { + write_uleb(body, 0); + } + for (j = 0; j < m->funcs[i].ninsns; ++j) { + WasmInsn in = m->funcs[i].insns[j]; + uint8_t op = wasm_opcode(in.kind); + write_byte(body, op); + if (in.kind == WASM_INSN_I32_CONST || in.kind == WASM_INSN_I64_CONST) + write_sleb(body, in.imm); + else if (in.kind == WASM_INSN_LOCAL_GET || + in.kind == WASM_INSN_LOCAL_SET || + in.kind == WASM_INSN_LOCAL_TEE || + in.kind == WASM_INSN_CALL) + write_uleb(body, (uint64_t)in.imm); + } + write_byte(body, 0x0b); + bytes = cfree_writer_mem_bytes(body, &len); + write_uleb(w, len); + w->write(w, bytes, len); + cfree_writer_close(body); + } +} + +static void wasm_encode(CfreeCompiler *c, const WasmModule *m, + CfreeWriter *out) { + static const uint8_t magic[] = {0x00, 0x61, 0x73, 0x6d, + 0x01, 0x00, 0x00, 0x00}; + CfreeHeap *h = cfree_compiler_heap(c); + out->write(out, magic, sizeof magic); + encode_section(h, out, 1, enc_type, m); + encode_section(h, out, 3, enc_func, m); + encode_section(h, out, 7, enc_export, m); + encode_section(h, out, 10, enc_code, m); +} + +static void wasm_parse_any(CfreeCompiler *c, const CfreeBytesInput *input, + WasmModule *m) { + if (wasm_is_binary(input)) + wasm_decode_binary(c, input, m); + else + wat_parse_module(c, input, m); + wasm_validate(m, c); +} + +int cfree_wasm_compile(CfreeCompiler *c, const CfreeCompileOptions *opts, + const CfreeBytesInput *input, CfreeObjBuilder *out) { + WasmModule m; + wasm_module_init(&m, cfree_compiler_heap(c)); + wasm_parse_any(c, input, &m); + wasm_emit_cg(c, opts, out, &m); + wasm_module_free(&m); + return 0; +} + +void cfree_wasm_register(CfreeCompiler *c) { + (void)cfree_register_frontend(c, CFREE_LANG_WASM, cfree_wasm_compile); +} + +int cfree_wasm_wat_to_wasm(CfreeCompiler *c, const CfreeBytesInput *input, + CfreeWriter *out) { + WasmModule m; + wasm_module_init(&m, cfree_compiler_heap(c)); + wat_parse_module(c, input, &m); + wasm_validate(&m, c); + wasm_encode(c, &m, out); + wasm_module_free(&m); + return 0; +} diff --git a/lang/wasm/wasm.h b/lang/wasm/wasm.h @@ -0,0 +1,15 @@ +#ifndef CFREE_LANG_WASM_H +#define CFREE_LANG_WASM_H + +#include <cfree.h> + +int cfree_wasm_compile(CfreeCompiler*, const CfreeCompileOptions*, + const CfreeBytesInput* input, CfreeObjBuilder* out); +void cfree_wasm_register(CfreeCompiler*); + +/* Internal test/developer helper: parse accepted WAT and write equivalent + * binary Wasm. This is intentionally not part of the installed public API. */ +int cfree_wasm_wat_to_wasm(CfreeCompiler*, const CfreeBytesInput* input, + CfreeWriter* out); + +#endif diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -116,6 +116,11 @@ CfreeLanguage cfree_language_for_path(const char* path) { if (ext[0] == 's' && ext[1] == '\0') return CFREE_LANG_ASM; if (ext[0] == 't' && ext[1] == 'o' && ext[2] == 'y' && ext[3] == '\0') return CFREE_LANG_TOY; + if (ext[0] == 'w' && ext[1] == 'a' && ext[2] == 't' && ext[3] == '\0') + return CFREE_LANG_WASM; + if (ext[0] == 'w' && ext[1] == 'a' && ext[2] == 's' && + ext[3] == 'm' && ext[4] == '\0') + return CFREE_LANG_WASM; return CFREE_LANG_C; } } diff --git a/test/test.mk b/test/test.mk @@ -25,7 +25,7 @@ # asm_parse / cfree_disasm_iter_* are still stubs; the harness builds # and runs end-to-end so the wiring stays exercised. See doc/ASM.md. -.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 +.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-lib-deps @@ -137,6 +137,7 @@ ROUNDTRIP_BIN_MACHO = build/test/cfree-roundtrip-macho LINK_EXE_RUNNER = build/test/link-exe-runner JIT_RUNNER = build/test/jit-runner PARSE_RUNNER = build/test/parse-runner +WASM_TOOL = build/test/wasm-tool # cfree-roundtrip needs `-Isrc` for the internal obj.h surface it inspects. $(ROUNDTRIP_BIN): test/elf/cfree-roundtrip.c $(LIB_AR) @@ -161,6 +162,10 @@ $(PARSE_RUNNER): test/parse/harness/parse_runner.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(HARNESS_CFLAGS) test/parse/harness/parse_runner.c $(LIB_AR) -o $@ +$(WASM_TOOL): test/wasm/harness/wasm_tool.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -I. test/wasm/harness/wasm_tool.c $(LIB_AR) -o $@ + test-elf: lib bin-soft $(ROUNDTRIP_BIN) bash test/elf/run.sh @@ -190,6 +195,9 @@ test-parse-err: lib $(PARSE_RUNNER) test-asm: lib bash test/asm/run.sh +test-wasm-front: bin $(WASM_TOOL) $(LINK_EXE_RUNNER) $(JIT_RUNNER) + bash test/wasm/run.sh + # test-smoke-x64: phase-1 sanity check for the multi-arch bring-up. Builds a # tiny freestanding x86_64 ELF with clang --target=x86_64-linux-gnu and # runs it through test/lib/exec_target.sh's podman/qemu pipeline, diff --git a/test/wasm/cases/call_chain.expect b/test/wasm/cases/call_chain.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/call_chain.wat b/test/wasm/cases/call_chain.wat @@ -0,0 +1,9 @@ +(module + (func (result i32) + i32.const 7) + (func (result i32) + call 0 + i32.const 6 + i32.mul) + (func (export "test_main") (result i32) + call 1)) diff --git a/test/wasm/cases/folded.expect b/test/wasm/cases/folded.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/folded.wat b/test/wasm/cases/folded.wat @@ -0,0 +1,5 @@ +(module + (func (export "test_main") (result i32) + (local i32) + (local.set 0 (i32.const 40)) + (i32.add (local.get 0) (i32.const 2)))) diff --git a/test/wasm/cases/int_ops.expect b/test/wasm/cases/int_ops.expect @@ -0,0 +1 @@ +2 diff --git a/test/wasm/cases/int_ops.wat b/test/wasm/cases/int_ops.wat @@ -0,0 +1,15 @@ +(module + (func (export "test_main") (result i32) + (local i32) + i32.const 5 + i32.const 3 + i32.shl + i32.const 2 + i32.or + local.tee 0 + i32.const 42 + i32.eq + local.get 0 + i32.const 42 + i32.ge_u + i32.add)) diff --git a/test/wasm/cases/locals_params.expect b/test/wasm/cases/locals_params.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/locals_params.wat b/test/wasm/cases/locals_params.wat @@ -0,0 +1,14 @@ +(module + (func (param i32) (param i32) (result i32) + (local i32) + local.get 0 + local.get 1 + i32.add + local.set 2 + local.get 2 + i32.const 2 + i32.mul) + (func (export "test_main") (result i32) + i32.const 19 + i32.const 2 + call 0)) diff --git a/test/wasm/cases/named_refs.expect b/test/wasm/cases/named_refs.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/named_refs.wat b/test/wasm/cases/named_refs.wat @@ -0,0 +1,7 @@ +(module + (func $add2 (param $x i32) (result i32) + (local $tmp i32) + (local.set $tmp (i32.add (local.get $x) (i32.const 2))) + (local.get $tmp)) + (func (export "test_main") (result i32) + (call $add2 (i32.const 40)))) diff --git a/test/wasm/cases/return42.expect b/test/wasm/cases/return42.expect @@ -0,0 +1 @@ +42 diff --git a/test/wasm/cases/return42.wat b/test/wasm/cases/return42.wat @@ -0,0 +1,5 @@ +(module + (func (export "test_main") (result i32) + i32.const 40 + i32.const 2 + i32.add)) diff --git a/test/wasm/harness/wasm_tool.c b/test/wasm/harness/wasm_tool.c @@ -0,0 +1,137 @@ +#include <cfree.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "lang/wasm/wasm.h" +#include "lib/cfree_test_target.h" + +static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return malloc(n); +} +static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void h_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; + +static void diag_fn(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_fn, NULL, 0, 0}; + +static int slurp(const char* path, uint8_t** out, size_t* len) { + int fd = open(path, O_RDONLY); + struct stat sb; + uint8_t* buf; + size_t got = 0; + if (fd < 0) return 1; + if (fstat(fd, &sb) != 0) { + close(fd); + return 1; + } + *len = (size_t)sb.st_size; + buf = (uint8_t*)malloc(*len ? *len : 1u); + if (!buf) { + close(fd); + return 1; + } + while (got < *len) { + ssize_t n = read(fd, buf + got, *len - got); + if (n <= 0) { + free(buf); + close(fd); + return 1; + } + got += (size_t)n; + } + close(fd); + *out = buf; + return 0; +} + +static int write_file(const char* path, const uint8_t* data, size_t len) { + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + size_t off = 0; + if (fd < 0) return 1; + while (off < len) { + ssize_t n = write(fd, data + off, len - off); + if (n <= 0) { + close(fd); + return 1; + } + off += (size_t)n; + } + close(fd); + return 0; +} + +int main(int argc, char** argv) { + if (argc != 4 || strcmp(argv[1], "--wat2wasm") != 0) { + fprintf(stderr, "usage: wasm-tool --wat2wasm IN.wat OUT.wasm\n"); + return 2; + } + + uint8_t* src = NULL; + size_t src_len = 0; + if (slurp(argv[2], &src, &src_len)) { + fprintf(stderr, "wasm-tool: cannot read %s\n", argv[2]); + return 2; + } + + CfreeTarget target; + if (cfree_test_target_init(&target) != 0) { + free(src); + return 2; + } + CfreeEnv env; + memset(&env, 0, sizeof env); + env.heap = &g_heap; + env.diag = &g_diag; + CfreeCompiler* c = cfree_compiler_new(target, &env); + if (!c) { + free(src); + return 2; + } + + CfreeWriter* w = cfree_writer_mem(&g_heap); + if (!w) { + cfree_compiler_free(c); + free(src); + return 2; + } + CfreeBytesInput in; + memset(&in, 0, sizeof in); + in.name = argv[2]; + in.data = src; + in.len = src_len; + in.lang = CFREE_LANG_WASM; + int rc = cfree_wasm_wat_to_wasm(c, &in, w); + size_t out_len = 0; + const uint8_t* out = cfree_writer_mem_bytes(w, &out_len); + if (!rc) + rc = write_file(argv[3], out, out_len); + cfree_writer_close(w); + cfree_compiler_free(c); + free(src); + return rc ? 1 : 0; +} diff --git a/test/wasm/run.sh b/test/wasm/run.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +set -u + +ROOT=$(cd "$(dirname "$0")/../.." && pwd) +BUILD_DIR="$ROOT/build/test/wasm" +CASES_DIR="$ROOT/test/wasm/cases" +CFREE_BIN="${CFREE:-$ROOT/build/cfree}" +WASM_TOOL="$ROOT/build/test/wasm-tool" +JIT_RUNNER="$ROOT/build/test/jit-runner" +LINK_EXE_RUNNER="$ROOT/build/test/link-exe-runner" +LINK_TEST_DIR="$ROOT/test/link" +TEST_ARCH="${CFREE_TEST_ARCH:-aa64}" +TEST_OBJ="${CFREE_TEST_OBJ:-macho}" + +mkdir -p "$BUILD_DIR" + +pass=0 +fail=0 +skip=0 + +note_pass() { printf ' PASS %s\n' "$1"; pass=$((pass + 1)); } +note_fail() { printf ' FAIL %s\n' "$1"; fail=$((fail + 1)); } +note_skip() { printf ' SKIP %s (%s)\n' "$1" "$2"; skip=$((skip + 1)); } + +host_arch=$(uname -m) +host_matches=0 +case "$TEST_ARCH:$host_arch" in + aa64:arm64|aa64:aarch64|aarch64:arm64|aarch64:aarch64|arm64:arm64) host_matches=1 ;; + x64:x86_64|x86_64:x86_64) host_matches=1 ;; +esac + +case "$TEST_OBJ" in + macho) target_triple="aarch64-macos" ;; + elf) + case "$TEST_ARCH" in + aa64|aarch64|arm64) target_triple="aarch64-linux" ;; + x64|x86_64|amd64) target_triple="x86_64-linux" ;; + rv64|riscv64) target_triple="riscv64-linux" ;; + *) target_triple="aarch64-linux" ;; + esac + ;; + *) target_triple="aarch64-macos" ;; +esac + +have_wasm_tool=0 +if [ -x "$WASM_TOOL" ]; then + have_wasm_tool=1 +fi + +have_jit_runner=0 +if [ -x "$JIT_RUNNER" ] && [ "$host_matches" -eq 1 ]; then + have_jit_runner=1 +fi + +have_link_runner=0 +if [ -x "$LINK_EXE_RUNNER" ]; then + have_link_runner=1 +fi + +have_qemu=0 +have_podman=0 +is_aarch64=0 +QEMU_BIN="$(command -v qemu-aarch64-static 2>/dev/null || command -v qemu-aarch64 2>/dev/null || true)" +[ -n "$QEMU_BIN" ] && have_qemu=1 +command -v podman >/dev/null 2>&1 && have_podman=1 +arch_raw="$(uname -m 2>/dev/null || true)" +{ [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_aarch64=1 +case "$TEST_ARCH" in + aa64|aarch64|arm64) EXEC_ARCH="aarch64" ;; + x64|x86_64|amd64) EXEC_ARCH="x64" ;; + rv64|riscv64) EXEC_ARCH="rv64" ;; + *) EXEC_ARCH="$TEST_ARCH" ;; +esac +EXEC_TARGET_MOUNT_ROOT="$BUILD_DIR" +# shellcheck source=../lib/exec_target.sh +source "$ROOT/test/lib/exec_target.sh" + +START_OBJ="$BUILD_DIR/start.o" +have_start_obj=0 +if [ "$TEST_OBJ" = "elf" ]; then + clang_target="$target_triple" + if clang --target="$clang_target" -ffreestanding -fno-stack-protector \ + -fno-builtin -nostdlib -c "$LINK_TEST_DIR/harness/start.c" \ + -o "$START_OBJ" >"$BUILD_DIR/start.out" 2>"$BUILD_DIR/start.err"; then + have_start_obj=1 + fi +fi + +run_expect_rc() { + local label=$1 + local expected=$2 + shift 2 + "$@" >"$BUILD_DIR/${label//\//_}.out" 2>"$BUILD_DIR/${label//\//_}.err" + local rc=$? + if [ "$rc" -eq "$expected" ]; then + note_pass "$label" + else + note_fail "$label expected $expected got $rc" + fi +} + +run_expect_zero() { + local label=$1 + shift + if "$@" >"$BUILD_DIR/${label//\//_}.out" 2>"$BUILD_DIR/${label//\//_}.err"; then + note_pass "$label" + else + note_fail "$label" + fi +} + +printf 'test-wasm-front target=%s obj=%s\n' "$target_triple" "$TEST_OBJ" + +for wat in "$CASES_DIR"/*.wat; do + [ -e "$wat" ] || continue + name=$(basename "$wat" .wat) + expected=$(tr -d '[:space:]' < "$CASES_DIR/$name.expect") + work="$BUILD_DIR/$name" + mkdir -p "$work" + wasm="$work/$name.wasm" + wat_obj="$work/$name.wat.o" + wasm_obj="$work/$name.wasm.o" + + if [ "$have_wasm_tool" -eq 1 ]; then + run_expect_zero "$name/W" "$WASM_TOOL" --wat2wasm "$wat" "$wasm" + else + note_skip "$name/W" "no wasm-tool" + continue + fi + + run_expect_rc "$name/D-wat" "$expected" "$CFREE_BIN" run -e test_main "$wat" + run_expect_rc "$name/D-wasm" "$expected" "$CFREE_BIN" run -e test_main "$wasm" + + run_expect_zero "$name/O-wat" "$CFREE_BIN" cc -target "$target_triple" -c \ + "$wat" -o "$wat_obj" + run_expect_zero "$name/O-wasm" "$CFREE_BIN" cc -target "$target_triple" -c \ + "$wasm" -o "$wasm_obj" + + if [ "$have_jit_runner" -eq 1 ]; then + run_expect_rc "$name/J-wat-obj" "$expected" env CFREE_TEST_ARCH="$TEST_ARCH" \ + CFREE_TEST_OBJ="$TEST_OBJ" "$JIT_RUNNER" "$wat_obj" + run_expect_rc "$name/J-wasm-obj" "$expected" env CFREE_TEST_ARCH="$TEST_ARCH" \ + CFREE_TEST_OBJ="$TEST_OBJ" "$JIT_RUNNER" "$wasm_obj" + else + note_skip "$name/J" "host arch does not match target or no jit-runner" + fi + + if [ "$TEST_OBJ" = "elf" ] && [ "$have_link_runner" -eq 1 ] && + [ "$have_start_obj" -eq 1 ]; then + exe="$work/$name.exe" + if "$LINK_EXE_RUNNER" -o "$exe" "$wat_obj" "$START_OBJ" \ + >"$work/link.out" 2>"$work/link.err"; then + if exec_target_supported "$EXEC_ARCH"; then + exec_target_run "$EXEC_ARCH" "$exe" "$work/exec.out" "$work/exec.err" + rc=$RUN_RC + if [ "$rc" -eq "$expected" ]; then + note_pass "$name/E" + else + note_fail "$name/E expected $expected got $rc" + fi + else + note_skip "$name/E" "no execution support for $TEST_ARCH" + fi + else + note_fail "$name/E link failed" + fi + else + note_skip "$name/E" "requires ELF link runner and start.o" + fi +done + +printf 'test-wasm-front: pass=%d fail=%d skip=%d\n' "$pass" "$fail" "$skip" +[ "$fail" -eq 0 ]