kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit cac224526af12ea3f31b9daf0e45c087af9c536b
parent 55817243963568fc08d1668e2d8752ffdae1eb5c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 28 May 2026 12:02:33 -0700

wasm/obj: objdump container reader and WAT disassembler

A new read_wasm (src/obj/wasm/read.c) parses a .wasm binary into a
format-neutral ObjBuilder, surfacing each wasm section as an ObjBuilder
section with its payload bytes (so objdump -h/-s show the real
container), marking the code section SF_EXEC, and adding one function
symbol per defined function (so -t lists them and -d labels each body).
It is not a linkable-object reader: tool-conventions linking / reloc.*
support is still pending, so relocations are not recovered.

A new ArchDisasm wasm_disasm_new (src/arch/wasm/disasm.c) renders the
code section as WAT for objdump -d, driven by the shared
decode_body_insn and wasm_insn_mnemonic introduced in the previous
opcode-coverage commit.

The existing wasm emitter moves into its new home,
src/obj/wasm_emit.c -> src/obj/wasm/emit.c, alongside the reader.
obj/registry.c advertises read_wasm, arch/wasm/arch.c wires
wasm_disasm_new, and the Makefile compiles src/obj/wasm/*.c whenever
arch/wasm, obj/wasm, or lang/wasm is enabled.

A first end-to-end test case lives in test/objdump/wasm/cases.

Diffstat:
MMakefile | 2++
Msrc/arch/wasm/arch.c | 12+++++++-----
Asrc/arch/wasm/disasm.c | 243+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/wasm/disasm.h | 8++++++++
Msrc/obj/registry.c | 12+++++++-----
Asrc/obj/wasm/emit.c | 25+++++++++++++++++++++++++
Asrc/obj/wasm/read.c | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/wasm/wasm.h | 30++++++++++++++++++++++++++++++
Dsrc/obj/wasm_emit.c | 24------------------------
Atest/objdump/wasm/cases/01-disasm-wat.expected | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objdump/wasm/cases/01-disasm-wat.sh | 14++++++++++++++
Atest/objdump/wasm/cases/add.wasm | 0
12 files changed, 592 insertions(+), 34 deletions(-)

diff --git a/Makefile b/Makefile @@ -145,6 +145,7 @@ endif LIB_SRCS_OBJ_ELF = $(shell find src/obj/elf -name '*.c' 2>/dev/null) LIB_SRCS_OBJ_MACHO = $(shell find src/obj/macho -name '*.c' 2>/dev/null) LIB_SRCS_OBJ_COFF = $(shell find src/obj/coff -name '*.c' 2>/dev/null) +LIB_SRCS_OBJ_WASM = $(shell find src/obj/wasm -name '*.c' 2>/dev/null) ifneq ($(CFREE_LINK_ENABLED),1) LIB_SRCS_OBJ_ELF := $(filter-out %/link.c %/link_dyn.c,$(LIB_SRCS_OBJ_ELF)) LIB_SRCS_OBJ_MACHO := $(filter-out %/link.c,$(LIB_SRCS_OBJ_MACHO)) @@ -222,6 +223,7 @@ LIB_SRCS += $(LIB_SRCS_ARCH_C_TARGET) endif ifneq ($(filter 1,$(CFREE_ARCH_WASM_ENABLED) $(CFREE_OBJ_WASM_ENABLED) $(CFREE_LANG_WASM_ENABLED)),) LIB_SRCS += $(LIB_SRCS_WASM_CORE) +LIB_SRCS += $(LIB_SRCS_OBJ_WASM) endif ifeq ($(CFREE_OBJ_ELF_ENABLED),1) LIB_SRCS += $(LIB_SRCS_OBJ_ELF) diff --git a/src/arch/wasm/arch.c b/src/arch/wasm/arch.c @@ -1,12 +1,14 @@ /* arch_impl_wasm: Wasm target descriptor. * - * Native machine-code emitters (ELF/Mach-O/COFF), the assembler, and the - * disassembler are intentionally NULL for v1. The Wasm target produces a - * WasmModule attached to the ObjBuilder; emit_wasm flushes it via - * wasm_encode. There is no assembly form for wasm32 in the toolchain. */ + * Native machine-code emitters (ELF/Mach-O/COFF) and the assembler are + * intentionally NULL for v1: the Wasm target produces a WasmModule attached to + * the ObjBuilder, which emit_wasm flushes via wasm_encode, and there is no + * assembly form for wasm32 in the toolchain. The disassembler (wasm_disasm_new) + * renders the code section of a .wasm module as WAT for objdump. */ #include "arch/arch.h" +#include "arch/wasm/disasm.h" #include "arch/wasm/internal.h" /* Predefined macros mirroring clang/llvm conventions. */ @@ -33,7 +35,7 @@ const ArchImpl arch_impl_wasm = { .name = "wasm", .cgtarget_new = wasm_cgtarget_new, .asm_new = NULL, - .disasm_new = NULL, + .disasm_new = wasm_disasm_new, .apply_label_fixup = NULL, .link = NULL, .predefined_macros = wasm_predefined_macros, diff --git a/src/arch/wasm/disasm.c b/src/arch/wasm/disasm.c @@ -0,0 +1,243 @@ +/* Wasm disassembler. + * + * Renders the code section of a .wasm module (as exposed by read_wasm) into + * WAT instruction text. The code section payload is framed — a function count, + * then per function a body size, a locals declaration, an instruction stream, + * and a terminating `end` — so unlike the flat ISAs this decoder is stateful: + * it walks the framing across successive decode calls, emits one ".locals" + * line per function body (at the offset read_wasm records as that function's + * symbol value, so objdump labels each body), then decodes the body's + * instructions one per call. Instruction decoding reuses the shared + * wasm_decode_one_insn so the opcode mapping has a single source of truth. */ + +#include "arch/wasm/disasm.h" + +#include <stdio.h> +#include <string.h> + +#include "core/heap.h" +#include "core/strbuf.h" +#include "wasm/wasm.h" + +#define WASM_DASM_MNEM_CAP 32u +#define WASM_DASM_OPS_CAP 192u + +typedef struct WasmDisasm { + ArchDisasm base; + Compiler* c; + Heap* heap; + WasmModule scratch; /* reusable decode buffer for wasm_decode_one_insn */ + int inited; /* read the function count yet? */ + u32 funcs_left; /* function bodies not yet started */ + int in_body; /* currently emitting a body's instructions */ + u32 depth; /* block/loop/if nesting, for indentation */ + char mnem_buf[WASM_DASM_MNEM_CAP]; + char ops_buf[WASM_DASM_OPS_CAP]; + StrBuf mnem; + StrBuf ops; +} WasmDisasm; + +/* Bounds-checked uleb over [*p, end); leaves *p past end and returns 0 on + * overrun (caller treats a 0-length decode as truncated). */ +static u32 dis_uleb(const u8** p, const u8* end) { + u32 result = 0, shift = 0; + while (*p < end) { + u8 b = *(*p)++; + result |= (u32)(b & 0x7fu) << shift; + if (!(b & 0x80u)) return result; + shift += 7u; + if (shift >= 32u) break; + } + return result; +} + +static const char* valtype_name(i64 b) { + switch ((u8)b) { + case 0x7f: return "i32"; + case 0x7e: return "i64"; + case 0x7d: return "f32"; + case 0x7c: return "f64"; + case 0x70: return "funcref"; + case 0x6f: return "externref"; + default: return "?"; + } +} + +/* Render an instruction's immediate operands into d->ops. */ +static void render_operands(WasmDisasm* d, const WasmInsn* in) { + switch ((WasmInsnKind)in->kind) { + case WASM_INSN_I32_CONST: + case WASM_INSN_I64_CONST: + strbuf_put_i64(&d->ops, in->imm); + break; + case WASM_INSN_F32_CONST: + case WASM_INSN_F64_CONST: { + char buf[40]; + (void)snprintf(buf, sizeof buf, "%g", in->fp); + strbuf_puts(&d->ops, buf); + break; + } + case WASM_INSN_LOCAL_GET: + case WASM_INSN_LOCAL_SET: + case WASM_INSN_LOCAL_TEE: + case WASM_INSN_GLOBAL_GET: + case WASM_INSN_GLOBAL_SET: + case WASM_INSN_BR: + case WASM_INSN_BR_IF: + case WASM_INSN_CALL: + case WASM_INSN_RETURN_CALL: + case WASM_INSN_REF_FUNC: + case WASM_INSN_CALL_REF: + case WASM_INSN_RETURN_CALL_REF: + strbuf_put_u64(&d->ops, (u64)in->imm); + break; + case WASM_INSN_CALL_INDIRECT: + case WASM_INSN_RETURN_CALL_INDIRECT: + strbuf_put_u64(&d->ops, (u64)in->imm); + strbuf_puts(&d->ops, " "); + strbuf_put_u64(&d->ops, (u64)in->aux_idx); + break; + case WASM_INSN_BR_TABLE: { + u32 i; + for (i = 0; i < in->ntargets; ++i) { + if (i) strbuf_putc(&d->ops, ' '); + strbuf_put_u64(&d->ops, (u64)in->targets[i]); + } + break; + } + case WASM_INSN_REF_NULL: + strbuf_puts(&d->ops, valtype_name(in->imm)); + break; + default: + if (wasm_insn_is_mem((WasmInsnKind)in->kind)) { + int wrote = 0; + if (in->offset64) { + strbuf_puts(&d->ops, "offset="); + strbuf_put_u64(&d->ops, in->offset64); + wrote = 1; + } + if (in->align) { + if (wrote) strbuf_putc(&d->ops, ' '); + strbuf_puts(&d->ops, "align="); + strbuf_put_u64(&d->ops, (u64)(1u << in->align)); + } + } + break; + } +} + +/* Indentation prefix: two spaces per nesting level, baked into the mnemonic so + * objdump's column layout reads as nested WAT. */ +static void put_indent(WasmDisasm* d, u32 depth) { + u32 i; + for (i = 0; i < depth; ++i) strbuf_puts(&d->mnem, " "); +} + +static u32 wasm_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, + CfreeInsn* out) { + WasmDisasm* d = (WasmDisasm*)base; + const u8* p = bytes; + const u8* end = bytes + len; + + strbuf_reset(&d->mnem); + strbuf_reset(&d->ops); + out->annotation = SLICE_LIT(""); + + if (len == 0) return 0; + + if (!d->inited) { + d->inited = 1; + d->funcs_left = dis_uleb(&p, end); + d->in_body = 0; + } + + if (!d->in_body) { + /* Start of a function body: consume the body size LEB and the locals + * vector, emitting one ".locals" line whose address is the locals-vector + * start (matching read_wasm's function symbol value). */ + size_t header; + u64 body_vaddr; + u32 ngroups, g; + if (d->funcs_left == 0) return 0; + (void)dis_uleb(&p, end); /* body size; body end tracked via depth */ + header = (size_t)(p - bytes); + body_vaddr = vaddr + header; + ngroups = dis_uleb(&p, end); + strbuf_puts(&d->mnem, ".locals"); + for (g = 0; g < ngroups && p < end; ++g) { + u32 n = dis_uleb(&p, end); + u8 vt = (p < end) ? *p++ : 0; + u32 k; + for (k = 0; k < n; ++k) { + strbuf_putc(&d->ops, ' '); + strbuf_puts(&d->ops, valtype_name(vt)); + } + } + d->in_body = 1; + d->depth = 0; + d->funcs_left--; + out->vaddr = body_vaddr; + out->bytes = bytes + header; + out->nbytes = (u32)((size_t)(p - bytes) - header); + out->mnemonic = strbuf_slice(&d->mnem); + out->operands = strbuf_slice(&d->ops); + return (u32)(p - bytes); + } + + /* Inside a body: decode one instruction. */ + { + WasmInsn insn; + size_t n = wasm_decode_one_insn(d->c, &d->scratch, bytes, len, 0, &insn); + WasmInsnKind k; + if (n == 0) return 0; + k = (WasmInsnKind)insn.kind; + /* Dedent for the closing/middle keywords before printing them. */ + if (k == WASM_INSN_ELSE && d->depth) { + put_indent(d, d->depth - 1u); + } else if (k == WASM_INSN_END && d->depth) { + put_indent(d, d->depth - 1u); + } else { + put_indent(d, d->depth); + } + strbuf_puts(&d->mnem, wasm_insn_mnemonic(k)); + render_operands(d, &insn); + + if (k == WASM_INSN_BLOCK || k == WASM_INSN_LOOP || k == WASM_INSN_IF) { + d->depth++; + } else if (k == WASM_INSN_END) { + if (d->depth == 0) + d->in_body = 0; /* body-terminating end */ + else + d->depth--; + } + + out->vaddr = vaddr; + out->bytes = bytes; + out->nbytes = (u32)n; + out->mnemonic = strbuf_slice(&d->mnem); + out->operands = strbuf_slice(&d->ops); + return (u32)n; + } +} + +static void wasm_disasm_destroy(ArchDisasm* base) { + WasmDisasm* d = (WasmDisasm*)base; + Heap* h = d->heap; + wasm_module_free(&d->scratch); + h->free(h, d, sizeof *d); +} + +ArchDisasm* wasm_disasm_new(Compiler* c) { + Heap* h = (Heap*)c->ctx->heap; + WasmDisasm* d = (WasmDisasm*)h->alloc(h, sizeof *d, _Alignof(WasmDisasm)); + if (!d) return NULL; + memset(d, 0, sizeof *d); + d->c = c; + d->heap = h; + d->base.decode = wasm_decode; + d->base.destroy = wasm_disasm_destroy; + wasm_module_init(&d->scratch, h); + strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); + strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); + return &d->base; +} diff --git a/src/arch/wasm/disasm.h b/src/arch/wasm/disasm.h @@ -0,0 +1,8 @@ +#ifndef CFREE_ARCH_WASM_DISASM_H +#define CFREE_ARCH_WASM_DISASM_H + +#include "arch/arch.h" + +ArchDisasm* wasm_disasm_new(Compiler*); + +#endif diff --git a/src/obj/registry.c b/src/obj/registry.c @@ -7,6 +7,7 @@ #include "obj/format.h" #include "obj/macho/macho.h" #include "obj/obj.h" +#include "obj/wasm/wasm.h" #if CFREE_LINK_ENABLED void link_emit_elf(LinkImage*, Writer*); @@ -239,13 +240,14 @@ static const ObjFormatImpl obj_format_impl_wasm = { .kind = CFREE_OBJ_WASM, .bin_fmt = CFREE_BIN_WASM, .name = "wasm", - /* Wasm core modules are frontend inputs. A linkable Wasm object reader - * needs tool-conventions `linking` / `reloc.*` support and is still - * pending, so do not advertise a generic object reader here. */ - .read_name = NULL, + /* read_wasm parses a core module into sections + function symbols for + * inspection (objdump -f/-h/-t/-s/-d). It is not a linkable-object reader: + * tool-conventions `linking` / `reloc.*` support is still pending, so + * relocations are not recovered. */ + .read_name = "read_wasm", .read_dso_name = NULL, .emit = emit_wasm, - .read = NULL, + .read = read_wasm, .read_dso = NULL, .link_emit = NULL, }; diff --git a/src/obj/wasm/emit.c b/src/obj/wasm/emit.c @@ -0,0 +1,25 @@ +/* emit_wasm: Wasm module/object emitter. + * + * For v1 the wasm CGTarget builds a WasmModule incrementally and attaches it + * to the ObjBuilder under OBJ_EXT_WASM. emit_wasm flushes that module via + * the existing wasm_encode writer. When no module is attached (e.g. an + * ObjBuilder produced by an .o reader or an empty TU), an empty module + * header is written so downstream tools see a syntactically valid file. */ + +#include "core/core.h" +#include "obj/obj.h" +#include "obj/wasm/wasm.h" +#include "wasm/wasm.h" + +void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) { + WasmModule* m = (WasmModule*)obj_ext_get(o, OBJ_EXT_WASM); + if (m) { + wasm_encode(c, m, w); + return; + } + /* Empty module: magic + version. wasm_encode emits the same header even + * for an empty WasmModule, but we avoid allocating one just to drop it. */ + static const uint8_t magic[] = {0x00, 0x61, 0x73, 0x6d, + 0x01, 0x00, 0x00, 0x00}; + w->write(w, magic, sizeof magic); +} diff --git a/src/obj/wasm/read.c b/src/obj/wasm/read.c @@ -0,0 +1,204 @@ +/* read_wasm: parse a .wasm binary into a format-neutral ObjBuilder. + * + * A wasm module is a container, much like ELF: the type/import/function/ + * table/memory/global/export/elem/data and custom sections are object-format + * metadata, and the code section holds the function bodies. We expose each + * wasm section as an ObjBuilder section carrying its original payload bytes + * (so objdump -h/-s show the real container), mark the code section SF_EXEC + * (so objdump -d disassembles it via the wasm ArchDisasm), and add one + * function symbol per defined function so -t lists them and -d labels each + * body. Container metadata beyond raw bytes is decoded only to recover + * function names; see read_wasm's contract in obj/wasm/wasm.h. */ + +#include <stdio.h> +#include <string.h> + +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "obj/format.h" +#include "obj/obj.h" +#include "obj/wasm/wasm.h" +#include "wasm/wasm.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* Minimal bounds-checked cursor over the raw module bytes. */ +typedef struct WasmCur { + Compiler* c; + const u8* data; + size_t len; + size_t pos; +} WasmCur; + +static u8 cur_u8(WasmCur* w) { + if (w->pos >= w->len) + compiler_panic(w->c, no_loc(), "read_wasm: unexpected end of file"); + return w->data[w->pos++]; +} + +static u32 cur_uleb(WasmCur* w) { + u32 result = 0, shift = 0, nbytes = 0; + for (;;) { + u8 b = cur_u8(w); + if (nbytes++ >= 5u || (shift == 28u && (b & 0xf0u))) + compiler_panic(w->c, no_loc(), "read_wasm: invalid uleb128"); + result |= (u32)(b & 0x7fu) << shift; + if (!(b & 0x80u)) return result; + shift += 7u; + } +} + +/* Canonical section name for the standard section ids (WebAssembly spec). */ +static const char* wasm_section_name(u8 id) { + switch (id) { + case 1: return "type"; + case 2: return "import"; + case 3: return "function"; + case 4: return "table"; + case 5: return "memory"; + case 6: return "global"; + case 7: return "export"; + case 8: return "start"; + case 9: return "elem"; + case 10: return "code"; + case 11: return "data"; + case 12: return "datacount"; + default: return "section"; + } +} + +static Sym intern_cstr(Compiler* c, const char* s) { + return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)}); +} + +/* Best name for the j-th defined function: explicit (name section), else the + * export name, else a synthesized func[index] placeholder. */ +static Sym func_symbol_name(Compiler* c, const WasmFunc* f, u32 func_index) { + char buf[32]; + if (f && f->name && f->name[0]) return intern_cstr(c, f->name); + if (f && f->export_name && f->export_name[0]) + return intern_cstr(c, f->export_name); + (void)snprintf(buf, sizeof buf, "func[%u]", func_index); + return intern_cstr(c, buf); +} + +/* Add one function symbol per defined function, with value = byte offset of the + * body's size-prefixed content (locals vector start) within the code-section + * payload. `payload` points at the code section content (the function-count + * LEB); `size` is its length. */ +static void add_code_symbols(Compiler* c, ObjBuilder* ob, ObjSecId code_sec, + const WasmModule* mod, const u8* payload, + size_t size) { + WasmCur w = {c, payload, size, 0}; + u32 count = cur_uleb(&w); + u32 fi = 0; /* index into mod->funcs (wasm function index space) */ + u32 k; + for (k = 0; k < count; ++k) { + u32 body_size = cur_uleb(&w); + u64 body_off = (u64)w.pos; /* offset of locals vector within payload */ + const WasmFunc* f = NULL; + Sym nm; + SymBind bind; + /* Skip imported functions: only defined funcs have code bodies. */ + while (fi < mod->nfuncs && mod->funcs[fi].is_import) fi++; + if (fi < mod->nfuncs) f = &mod->funcs[fi]; + nm = func_symbol_name(c, f, fi); + bind = (f && f->export_name && f->export_name[0]) ? SB_GLOBAL : SB_LOCAL; + { + ObjSymId sid = obj_symbol_ex(ob, nm, bind, SV_DEFAULT, SK_FUNC, code_sec, + body_off, body_size, 0); + obj_sym_mark_referenced(ob, sid); + } + fi++; + /* Advance past the body to the next size LEB. */ + if (body_size > w.len || w.pos > w.len - body_size) + compiler_panic(c, no_loc(), "read_wasm: code body out of bounds"); + w.pos += body_size; + } +} + +ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data, + size_t len) { + CfreeSlice input; + WasmModule mod; + ObjBuilder* ob; + WasmCur w; + (void)name; + + input.data = data; + input.len = len; + if (!wasm_is_binary(&input)) + compiler_panic(c, no_loc(), "read_wasm: not a wasm binary"); + + ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_wasm: obj_new failed"); + + /* Decode the module model so we can recover function names. Names are + * interned into c->global before the module is freed, so the module's own + * storage need not outlive this function. */ + wasm_module_init(&mod, c->ctx->heap); + wasm_decode_binary(c, &input, &mod); + + /* Walk the raw section framing and mirror each section into the builder with + * its original payload bytes. */ + w.c = c; + w.data = data; + w.len = len; + w.pos = 8; /* past magic + version */ + while (w.pos < w.len) { + u8 id = cur_u8(&w); + u32 size = cur_uleb(&w); + size_t payload = w.pos; + size_t payload_end; + Sym sname; + SecKind kind; + SecSem sem; + u16 flags; + ObjSecId sid; + if (size > w.len || w.pos > w.len - size) + compiler_panic(c, no_loc(), "read_wasm: section out of bounds"); + payload_end = payload + size; + + if (id == 0) { + /* Custom section: name it after its embedded name field. */ + u32 nlen = cur_uleb(&w); + const char* nm = (const char*)(w.data + w.pos); + if (nlen > w.len || w.pos > w.len - nlen) + compiler_panic(c, no_loc(), "read_wasm: custom name out of bounds"); + sname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + kind = SEC_OTHER; + sem = SSEM_WASM_CUSTOM; + flags = 0; + } else { + sname = intern_cstr(c, wasm_section_name(id)); + sem = SSEM_PROGBITS; + if (id == 10) { + kind = SEC_TEXT; + flags = (u16)(SF_EXEC | SF_ALLOC); + } else if (id == 11) { + kind = SEC_DATA; + flags = SF_ALLOC; + } else { + kind = SEC_OTHER; + flags = 0; + } + } + + sid = obj_section_ex(ob, sname, kind, sem, flags, 1, 0, OBJ_SEC_NONE, 0); + if (sid == OBJ_SEC_NONE) + compiler_panic(c, no_loc(), "read_wasm: obj_section_ex failed"); + if (size) obj_write(ob, sid, data + payload, size); + + if (id == 10) + add_code_symbols(c, ob, sid, &mod, data + payload, size); + + w.pos = payload_end; + } + + wasm_module_free(&mod); + return ob; +} diff --git a/src/obj/wasm/wasm.h b/src/obj/wasm/wasm.h @@ -0,0 +1,30 @@ +/* Wasm object-format read/emit entry points, mirroring obj/elf/elf.h and + * friends. The public ObjBuilder/Linker surface is format-neutral + * (obj/obj.h); this header is the private wasm spelling of the read/emit + * hooks plugged into the ObjFormatImpl table in obj/registry.c. + * + * Note: distinct from the core module model in src/wasm/wasm.h (the in-memory + * WasmModule and binary decode/encode). This header only declares the obj-layer + * glue and is always included by its full path (obj/wasm/wasm.h). */ + +#ifndef CFREE_OBJ_WASM_H +#define CFREE_OBJ_WASM_H + +#include "core/core.h" +#include "obj/obj.h" + +/* Flush the WasmModule attached to the builder (OBJ_EXT_WASM) as a .wasm + * binary, or an empty module header when none is attached. */ +void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w); + +/* Parse a .wasm binary into a format-neutral ObjBuilder: one section per wasm + * module section (code marked SF_EXEC so objdump -d disassembles it) plus a + * function symbol per defined function. The code section's symbol values are + * byte offsets, within the code-section payload, of each function body's + * size-prefixed content (i.e. the first byte after the body's size LEB, where + * the locals vector begins) — the wasm disassembler reports that same offset + * for the body's first emitted line so the labels line up. */ +ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data, + size_t len); + +#endif diff --git a/src/obj/wasm_emit.c b/src/obj/wasm_emit.c @@ -1,24 +0,0 @@ -/* emit_wasm: Wasm module/object emitter. - * - * For v1 the wasm CGTarget builds a WasmModule incrementally and attaches it - * to the ObjBuilder under OBJ_EXT_WASM. emit_wasm flushes that module via - * the existing wasm_encode writer. When no module is attached (e.g. an - * ObjBuilder produced by an .o reader or an empty TU), an empty module - * header is written so downstream tools see a syntactically valid file. */ - -#include "core/core.h" -#include "obj/obj.h" -#include "wasm/wasm.h" - -void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) { - WasmModule* m = (WasmModule*)obj_ext_get(o, OBJ_EXT_WASM); - if (m) { - wasm_encode(c, m, w); - return; - } - /* Empty module: magic + version. wasm_encode emits the same header even - * for an empty WasmModule, but we avoid allocating one just to drop it. */ - static const uint8_t magic[] = {0x00, 0x61, 0x73, 0x6d, - 0x01, 0x00, 0x00, 0x00}; - w->write(w, magic, sizeof magic); -} diff --git a/test/objdump/wasm/cases/01-disasm-wat.expected b/test/objdump/wasm/cases/01-disasm-wat.expected @@ -0,0 +1,52 @@ +== sections == +add.wasm: file format wasm-wasm32 + +Sections: +Idx Name Size Align Flags + 0 type 0000000c 2**0 CONTENTS + 1 function 00000003 2**0 CONTENTS + 2 memory 00000003 2**0 CONTENTS + 3 export 00000018 2**0 CONTENTS + 4 code 00000030 2**0 CONTENTS,ALLOC,LOAD,READONLY,CODE + +== symbols == +add.wasm: file format wasm-wasm32 + +SYMBOL TABLE: +0000000000000002 g F code 000000000000000f add1 +0000000000000012 g F code 000000000000001e madd + +== disasm == +add.wasm: file format wasm-wasm32 + +Disassembly of section code: + +0000000000000002 <add1>: + 2: 01 02 7f .locals i32 i32 + 5: 20 00 local.get 0 + 7: 41 01 i32.const 1 + 9: 6a i32.add + a: 21 01 local.set 1 + c: 20 01 local.get 1 + e: 0f return + f: 00 unreachable + 10: 0b end +0000000000000012 <madd>: + 12: 01 05 7f .locals i32 i32 i32 i32 i32 + 15: 20 00 local.get 0 + 17: 20 01 local.get 1 + 19: 6c i32.mul + 1a: 21 03 local.set 3 + 1c: 20 03 local.get 3 + 1e: 21 04 local.set 4 + 20: 20 04 local.get 4 + 22: 21 02 local.set 2 + 24: 20 02 local.get 2 + 26: 20 00 local.get 0 + 28: 6a i32.add + 29: 21 05 local.set 5 + 2b: 20 05 local.get 5 + 2d: 0f return + 2e: 00 unreachable + 2f: 0b end + diff --git a/test/objdump/wasm/cases/01-disasm-wat.sh b/test/objdump/wasm/cases/01-disasm-wat.sh @@ -0,0 +1,14 @@ +# Golden: objdump inspection of a wasm core module. The fixture add.wasm is a +# committed cfree-produced module (two exported functions, add1/madd) so the +# goldens are stable against codegen drift. Exercises the wasm object reader +# (-h section list, -t function symbols) and the wasm disassembler (-d, which +# renders the code section as WAT with per-function labels and locals). +# Copy the fixture into the sandbox cwd and inspect it by bare name so the +# golden's "file format" banner path is stable regardless of repo location. +cp "$(dirname "$0")/add.wasm" add.wasm +echo "== sections ==" +"$CFREE" objdump -h add.wasm +echo "== symbols ==" +"$CFREE" objdump -t add.wasm +echo "== disasm ==" +"$CFREE" objdump -d add.wasm diff --git a/test/objdump/wasm/cases/add.wasm b/test/objdump/wasm/cases/add.wasm Binary files differ.