commit cac224526af12ea3f31b9daf0e45c087af9c536b
parent 55817243963568fc08d1668e2d8752ffdae1eb5c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 28 May 2026 12:02:33 -0700
wasm/obj: objdump container reader and WAT disassembler
A new read_wasm (src/obj/wasm/read.c) parses a .wasm binary into a
format-neutral ObjBuilder, surfacing each wasm section as an ObjBuilder
section with its payload bytes (so objdump -h/-s show the real
container), marking the code section SF_EXEC, and adding one function
symbol per defined function (so -t lists them and -d labels each body).
It is not a linkable-object reader: tool-conventions linking / reloc.*
support is still pending, so relocations are not recovered.
A new ArchDisasm wasm_disasm_new (src/arch/wasm/disasm.c) renders the
code section as WAT for objdump -d, driven by the shared
decode_body_insn and wasm_insn_mnemonic introduced in the previous
opcode-coverage commit.
The existing wasm emitter moves into its new home,
src/obj/wasm_emit.c -> src/obj/wasm/emit.c, alongside the reader.
obj/registry.c advertises read_wasm, arch/wasm/arch.c wires
wasm_disasm_new, and the Makefile compiles src/obj/wasm/*.c whenever
arch/wasm, obj/wasm, or lang/wasm is enabled.
A first end-to-end test case lives in test/objdump/wasm/cases.
Diffstat:
12 files changed, 592 insertions(+), 34 deletions(-)
diff --git a/Makefile b/Makefile
@@ -145,6 +145,7 @@ endif
LIB_SRCS_OBJ_ELF = $(shell find src/obj/elf -name '*.c' 2>/dev/null)
LIB_SRCS_OBJ_MACHO = $(shell find src/obj/macho -name '*.c' 2>/dev/null)
LIB_SRCS_OBJ_COFF = $(shell find src/obj/coff -name '*.c' 2>/dev/null)
+LIB_SRCS_OBJ_WASM = $(shell find src/obj/wasm -name '*.c' 2>/dev/null)
ifneq ($(CFREE_LINK_ENABLED),1)
LIB_SRCS_OBJ_ELF := $(filter-out %/link.c %/link_dyn.c,$(LIB_SRCS_OBJ_ELF))
LIB_SRCS_OBJ_MACHO := $(filter-out %/link.c,$(LIB_SRCS_OBJ_MACHO))
@@ -222,6 +223,7 @@ LIB_SRCS += $(LIB_SRCS_ARCH_C_TARGET)
endif
ifneq ($(filter 1,$(CFREE_ARCH_WASM_ENABLED) $(CFREE_OBJ_WASM_ENABLED) $(CFREE_LANG_WASM_ENABLED)),)
LIB_SRCS += $(LIB_SRCS_WASM_CORE)
+LIB_SRCS += $(LIB_SRCS_OBJ_WASM)
endif
ifeq ($(CFREE_OBJ_ELF_ENABLED),1)
LIB_SRCS += $(LIB_SRCS_OBJ_ELF)
diff --git a/src/arch/wasm/arch.c b/src/arch/wasm/arch.c
@@ -1,12 +1,14 @@
/* arch_impl_wasm: Wasm target descriptor.
*
- * Native machine-code emitters (ELF/Mach-O/COFF), the assembler, and the
- * disassembler are intentionally NULL for v1. The Wasm target produces a
- * WasmModule attached to the ObjBuilder; emit_wasm flushes it via
- * wasm_encode. There is no assembly form for wasm32 in the toolchain. */
+ * Native machine-code emitters (ELF/Mach-O/COFF) and the assembler are
+ * intentionally NULL for v1: the Wasm target produces a WasmModule attached to
+ * the ObjBuilder, which emit_wasm flushes via wasm_encode, and there is no
+ * assembly form for wasm32 in the toolchain. The disassembler (wasm_disasm_new)
+ * renders the code section of a .wasm module as WAT for objdump. */
#include "arch/arch.h"
+#include "arch/wasm/disasm.h"
#include "arch/wasm/internal.h"
/* Predefined macros mirroring clang/llvm conventions. */
@@ -33,7 +35,7 @@ const ArchImpl arch_impl_wasm = {
.name = "wasm",
.cgtarget_new = wasm_cgtarget_new,
.asm_new = NULL,
- .disasm_new = NULL,
+ .disasm_new = wasm_disasm_new,
.apply_label_fixup = NULL,
.link = NULL,
.predefined_macros = wasm_predefined_macros,
diff --git a/src/arch/wasm/disasm.c b/src/arch/wasm/disasm.c
@@ -0,0 +1,243 @@
+/* Wasm disassembler.
+ *
+ * Renders the code section of a .wasm module (as exposed by read_wasm) into
+ * WAT instruction text. The code section payload is framed — a function count,
+ * then per function a body size, a locals declaration, an instruction stream,
+ * and a terminating `end` — so unlike the flat ISAs this decoder is stateful:
+ * it walks the framing across successive decode calls, emits one ".locals"
+ * line per function body (at the offset read_wasm records as that function's
+ * symbol value, so objdump labels each body), then decodes the body's
+ * instructions one per call. Instruction decoding reuses the shared
+ * wasm_decode_one_insn so the opcode mapping has a single source of truth. */
+
+#include "arch/wasm/disasm.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/strbuf.h"
+#include "wasm/wasm.h"
+
+#define WASM_DASM_MNEM_CAP 32u
+#define WASM_DASM_OPS_CAP 192u
+
+typedef struct WasmDisasm {
+ ArchDisasm base;
+ Compiler* c;
+ Heap* heap;
+ WasmModule scratch; /* reusable decode buffer for wasm_decode_one_insn */
+ int inited; /* read the function count yet? */
+ u32 funcs_left; /* function bodies not yet started */
+ int in_body; /* currently emitting a body's instructions */
+ u32 depth; /* block/loop/if nesting, for indentation */
+ char mnem_buf[WASM_DASM_MNEM_CAP];
+ char ops_buf[WASM_DASM_OPS_CAP];
+ StrBuf mnem;
+ StrBuf ops;
+} WasmDisasm;
+
+/* Bounds-checked uleb over [*p, end); leaves *p past end and returns 0 on
+ * overrun (caller treats a 0-length decode as truncated). */
+static u32 dis_uleb(const u8** p, const u8* end) {
+ u32 result = 0, shift = 0;
+ while (*p < end) {
+ u8 b = *(*p)++;
+ result |= (u32)(b & 0x7fu) << shift;
+ if (!(b & 0x80u)) return result;
+ shift += 7u;
+ if (shift >= 32u) break;
+ }
+ return result;
+}
+
+static const char* valtype_name(i64 b) {
+ switch ((u8)b) {
+ case 0x7f: return "i32";
+ case 0x7e: return "i64";
+ case 0x7d: return "f32";
+ case 0x7c: return "f64";
+ case 0x70: return "funcref";
+ case 0x6f: return "externref";
+ default: return "?";
+ }
+}
+
+/* Render an instruction's immediate operands into d->ops. */
+static void render_operands(WasmDisasm* d, const WasmInsn* in) {
+ switch ((WasmInsnKind)in->kind) {
+ case WASM_INSN_I32_CONST:
+ case WASM_INSN_I64_CONST:
+ strbuf_put_i64(&d->ops, in->imm);
+ break;
+ case WASM_INSN_F32_CONST:
+ case WASM_INSN_F64_CONST: {
+ char buf[40];
+ (void)snprintf(buf, sizeof buf, "%g", in->fp);
+ strbuf_puts(&d->ops, buf);
+ break;
+ }
+ case WASM_INSN_LOCAL_GET:
+ case WASM_INSN_LOCAL_SET:
+ case WASM_INSN_LOCAL_TEE:
+ case WASM_INSN_GLOBAL_GET:
+ case WASM_INSN_GLOBAL_SET:
+ case WASM_INSN_BR:
+ case WASM_INSN_BR_IF:
+ case WASM_INSN_CALL:
+ case WASM_INSN_RETURN_CALL:
+ case WASM_INSN_REF_FUNC:
+ case WASM_INSN_CALL_REF:
+ case WASM_INSN_RETURN_CALL_REF:
+ strbuf_put_u64(&d->ops, (u64)in->imm);
+ break;
+ case WASM_INSN_CALL_INDIRECT:
+ case WASM_INSN_RETURN_CALL_INDIRECT:
+ strbuf_put_u64(&d->ops, (u64)in->imm);
+ strbuf_puts(&d->ops, " ");
+ strbuf_put_u64(&d->ops, (u64)in->aux_idx);
+ break;
+ case WASM_INSN_BR_TABLE: {
+ u32 i;
+ for (i = 0; i < in->ntargets; ++i) {
+ if (i) strbuf_putc(&d->ops, ' ');
+ strbuf_put_u64(&d->ops, (u64)in->targets[i]);
+ }
+ break;
+ }
+ case WASM_INSN_REF_NULL:
+ strbuf_puts(&d->ops, valtype_name(in->imm));
+ break;
+ default:
+ if (wasm_insn_is_mem((WasmInsnKind)in->kind)) {
+ int wrote = 0;
+ if (in->offset64) {
+ strbuf_puts(&d->ops, "offset=");
+ strbuf_put_u64(&d->ops, in->offset64);
+ wrote = 1;
+ }
+ if (in->align) {
+ if (wrote) strbuf_putc(&d->ops, ' ');
+ strbuf_puts(&d->ops, "align=");
+ strbuf_put_u64(&d->ops, (u64)(1u << in->align));
+ }
+ }
+ break;
+ }
+}
+
+/* Indentation prefix: two spaces per nesting level, baked into the mnemonic so
+ * objdump's column layout reads as nested WAT. */
+static void put_indent(WasmDisasm* d, u32 depth) {
+ u32 i;
+ for (i = 0; i < depth; ++i) strbuf_puts(&d->mnem, " ");
+}
+
+static u32 wasm_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
+ CfreeInsn* out) {
+ WasmDisasm* d = (WasmDisasm*)base;
+ const u8* p = bytes;
+ const u8* end = bytes + len;
+
+ strbuf_reset(&d->mnem);
+ strbuf_reset(&d->ops);
+ out->annotation = SLICE_LIT("");
+
+ if (len == 0) return 0;
+
+ if (!d->inited) {
+ d->inited = 1;
+ d->funcs_left = dis_uleb(&p, end);
+ d->in_body = 0;
+ }
+
+ if (!d->in_body) {
+ /* Start of a function body: consume the body size LEB and the locals
+ * vector, emitting one ".locals" line whose address is the locals-vector
+ * start (matching read_wasm's function symbol value). */
+ size_t header;
+ u64 body_vaddr;
+ u32 ngroups, g;
+ if (d->funcs_left == 0) return 0;
+ (void)dis_uleb(&p, end); /* body size; body end tracked via depth */
+ header = (size_t)(p - bytes);
+ body_vaddr = vaddr + header;
+ ngroups = dis_uleb(&p, end);
+ strbuf_puts(&d->mnem, ".locals");
+ for (g = 0; g < ngroups && p < end; ++g) {
+ u32 n = dis_uleb(&p, end);
+ u8 vt = (p < end) ? *p++ : 0;
+ u32 k;
+ for (k = 0; k < n; ++k) {
+ strbuf_putc(&d->ops, ' ');
+ strbuf_puts(&d->ops, valtype_name(vt));
+ }
+ }
+ d->in_body = 1;
+ d->depth = 0;
+ d->funcs_left--;
+ out->vaddr = body_vaddr;
+ out->bytes = bytes + header;
+ out->nbytes = (u32)((size_t)(p - bytes) - header);
+ out->mnemonic = strbuf_slice(&d->mnem);
+ out->operands = strbuf_slice(&d->ops);
+ return (u32)(p - bytes);
+ }
+
+ /* Inside a body: decode one instruction. */
+ {
+ WasmInsn insn;
+ size_t n = wasm_decode_one_insn(d->c, &d->scratch, bytes, len, 0, &insn);
+ WasmInsnKind k;
+ if (n == 0) return 0;
+ k = (WasmInsnKind)insn.kind;
+ /* Dedent for the closing/middle keywords before printing them. */
+ if (k == WASM_INSN_ELSE && d->depth) {
+ put_indent(d, d->depth - 1u);
+ } else if (k == WASM_INSN_END && d->depth) {
+ put_indent(d, d->depth - 1u);
+ } else {
+ put_indent(d, d->depth);
+ }
+ strbuf_puts(&d->mnem, wasm_insn_mnemonic(k));
+ render_operands(d, &insn);
+
+ if (k == WASM_INSN_BLOCK || k == WASM_INSN_LOOP || k == WASM_INSN_IF) {
+ d->depth++;
+ } else if (k == WASM_INSN_END) {
+ if (d->depth == 0)
+ d->in_body = 0; /* body-terminating end */
+ else
+ d->depth--;
+ }
+
+ out->vaddr = vaddr;
+ out->bytes = bytes;
+ out->nbytes = (u32)n;
+ out->mnemonic = strbuf_slice(&d->mnem);
+ out->operands = strbuf_slice(&d->ops);
+ return (u32)n;
+ }
+}
+
+static void wasm_disasm_destroy(ArchDisasm* base) {
+ WasmDisasm* d = (WasmDisasm*)base;
+ Heap* h = d->heap;
+ wasm_module_free(&d->scratch);
+ h->free(h, d, sizeof *d);
+}
+
+ArchDisasm* wasm_disasm_new(Compiler* c) {
+ Heap* h = (Heap*)c->ctx->heap;
+ WasmDisasm* d = (WasmDisasm*)h->alloc(h, sizeof *d, _Alignof(WasmDisasm));
+ if (!d) return NULL;
+ memset(d, 0, sizeof *d);
+ d->c = c;
+ d->heap = h;
+ d->base.decode = wasm_decode;
+ d->base.destroy = wasm_disasm_destroy;
+ wasm_module_init(&d->scratch, h);
+ strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf);
+ strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf);
+ return &d->base;
+}
diff --git a/src/arch/wasm/disasm.h b/src/arch/wasm/disasm.h
@@ -0,0 +1,8 @@
+#ifndef CFREE_ARCH_WASM_DISASM_H
+#define CFREE_ARCH_WASM_DISASM_H
+
+#include "arch/arch.h"
+
+ArchDisasm* wasm_disasm_new(Compiler*);
+
+#endif
diff --git a/src/obj/registry.c b/src/obj/registry.c
@@ -7,6 +7,7 @@
#include "obj/format.h"
#include "obj/macho/macho.h"
#include "obj/obj.h"
+#include "obj/wasm/wasm.h"
#if CFREE_LINK_ENABLED
void link_emit_elf(LinkImage*, Writer*);
@@ -239,13 +240,14 @@ static const ObjFormatImpl obj_format_impl_wasm = {
.kind = CFREE_OBJ_WASM,
.bin_fmt = CFREE_BIN_WASM,
.name = "wasm",
- /* Wasm core modules are frontend inputs. A linkable Wasm object reader
- * needs tool-conventions `linking` / `reloc.*` support and is still
- * pending, so do not advertise a generic object reader here. */
- .read_name = NULL,
+ /* read_wasm parses a core module into sections + function symbols for
+ * inspection (objdump -f/-h/-t/-s/-d). It is not a linkable-object reader:
+ * tool-conventions `linking` / `reloc.*` support is still pending, so
+ * relocations are not recovered. */
+ .read_name = "read_wasm",
.read_dso_name = NULL,
.emit = emit_wasm,
- .read = NULL,
+ .read = read_wasm,
.read_dso = NULL,
.link_emit = NULL,
};
diff --git a/src/obj/wasm/emit.c b/src/obj/wasm/emit.c
@@ -0,0 +1,25 @@
+/* emit_wasm: Wasm module/object emitter.
+ *
+ * For v1 the wasm CGTarget builds a WasmModule incrementally and attaches it
+ * to the ObjBuilder under OBJ_EXT_WASM. emit_wasm flushes that module via
+ * the existing wasm_encode writer. When no module is attached (e.g. an
+ * ObjBuilder produced by an .o reader or an empty TU), an empty module
+ * header is written so downstream tools see a syntactically valid file. */
+
+#include "core/core.h"
+#include "obj/obj.h"
+#include "obj/wasm/wasm.h"
+#include "wasm/wasm.h"
+
+void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) {
+ WasmModule* m = (WasmModule*)obj_ext_get(o, OBJ_EXT_WASM);
+ if (m) {
+ wasm_encode(c, m, w);
+ return;
+ }
+ /* Empty module: magic + version. wasm_encode emits the same header even
+ * for an empty WasmModule, but we avoid allocating one just to drop it. */
+ static const uint8_t magic[] = {0x00, 0x61, 0x73, 0x6d,
+ 0x01, 0x00, 0x00, 0x00};
+ w->write(w, magic, sizeof magic);
+}
diff --git a/src/obj/wasm/read.c b/src/obj/wasm/read.c
@@ -0,0 +1,204 @@
+/* read_wasm: parse a .wasm binary into a format-neutral ObjBuilder.
+ *
+ * A wasm module is a container, much like ELF: the type/import/function/
+ * table/memory/global/export/elem/data and custom sections are object-format
+ * metadata, and the code section holds the function bodies. We expose each
+ * wasm section as an ObjBuilder section carrying its original payload bytes
+ * (so objdump -h/-s show the real container), mark the code section SF_EXEC
+ * (so objdump -d disassembles it via the wasm ArchDisasm), and add one
+ * function symbol per defined function so -t lists them and -d labels each
+ * body. Container metadata beyond raw bytes is decoded only to recover
+ * function names; see read_wasm's contract in obj/wasm/wasm.h. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "obj/format.h"
+#include "obj/obj.h"
+#include "obj/wasm/wasm.h"
+#include "wasm/wasm.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* Minimal bounds-checked cursor over the raw module bytes. */
+typedef struct WasmCur {
+ Compiler* c;
+ const u8* data;
+ size_t len;
+ size_t pos;
+} WasmCur;
+
+static u8 cur_u8(WasmCur* w) {
+ if (w->pos >= w->len)
+ compiler_panic(w->c, no_loc(), "read_wasm: unexpected end of file");
+ return w->data[w->pos++];
+}
+
+static u32 cur_uleb(WasmCur* w) {
+ u32 result = 0, shift = 0, nbytes = 0;
+ for (;;) {
+ u8 b = cur_u8(w);
+ if (nbytes++ >= 5u || (shift == 28u && (b & 0xf0u)))
+ compiler_panic(w->c, no_loc(), "read_wasm: invalid uleb128");
+ result |= (u32)(b & 0x7fu) << shift;
+ if (!(b & 0x80u)) return result;
+ shift += 7u;
+ }
+}
+
+/* Canonical section name for the standard section ids (WebAssembly spec). */
+static const char* wasm_section_name(u8 id) {
+ switch (id) {
+ case 1: return "type";
+ case 2: return "import";
+ case 3: return "function";
+ case 4: return "table";
+ case 5: return "memory";
+ case 6: return "global";
+ case 7: return "export";
+ case 8: return "start";
+ case 9: return "elem";
+ case 10: return "code";
+ case 11: return "data";
+ case 12: return "datacount";
+ default: return "section";
+ }
+}
+
+static Sym intern_cstr(Compiler* c, const char* s) {
+ return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)});
+}
+
+/* Best name for the j-th defined function: explicit (name section), else the
+ * export name, else a synthesized func[index] placeholder. */
+static Sym func_symbol_name(Compiler* c, const WasmFunc* f, u32 func_index) {
+ char buf[32];
+ if (f && f->name && f->name[0]) return intern_cstr(c, f->name);
+ if (f && f->export_name && f->export_name[0])
+ return intern_cstr(c, f->export_name);
+ (void)snprintf(buf, sizeof buf, "func[%u]", func_index);
+ return intern_cstr(c, buf);
+}
+
+/* Add one function symbol per defined function, with value = byte offset of the
+ * body's size-prefixed content (locals vector start) within the code-section
+ * payload. `payload` points at the code section content (the function-count
+ * LEB); `size` is its length. */
+static void add_code_symbols(Compiler* c, ObjBuilder* ob, ObjSecId code_sec,
+ const WasmModule* mod, const u8* payload,
+ size_t size) {
+ WasmCur w = {c, payload, size, 0};
+ u32 count = cur_uleb(&w);
+ u32 fi = 0; /* index into mod->funcs (wasm function index space) */
+ u32 k;
+ for (k = 0; k < count; ++k) {
+ u32 body_size = cur_uleb(&w);
+ u64 body_off = (u64)w.pos; /* offset of locals vector within payload */
+ const WasmFunc* f = NULL;
+ Sym nm;
+ SymBind bind;
+ /* Skip imported functions: only defined funcs have code bodies. */
+ while (fi < mod->nfuncs && mod->funcs[fi].is_import) fi++;
+ if (fi < mod->nfuncs) f = &mod->funcs[fi];
+ nm = func_symbol_name(c, f, fi);
+ bind = (f && f->export_name && f->export_name[0]) ? SB_GLOBAL : SB_LOCAL;
+ {
+ ObjSymId sid = obj_symbol_ex(ob, nm, bind, SV_DEFAULT, SK_FUNC, code_sec,
+ body_off, body_size, 0);
+ obj_sym_mark_referenced(ob, sid);
+ }
+ fi++;
+ /* Advance past the body to the next size LEB. */
+ if (body_size > w.len || w.pos > w.len - body_size)
+ compiler_panic(c, no_loc(), "read_wasm: code body out of bounds");
+ w.pos += body_size;
+ }
+}
+
+ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ CfreeSlice input;
+ WasmModule mod;
+ ObjBuilder* ob;
+ WasmCur w;
+ (void)name;
+
+ input.data = data;
+ input.len = len;
+ if (!wasm_is_binary(&input))
+ compiler_panic(c, no_loc(), "read_wasm: not a wasm binary");
+
+ ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_wasm: obj_new failed");
+
+ /* Decode the module model so we can recover function names. Names are
+ * interned into c->global before the module is freed, so the module's own
+ * storage need not outlive this function. */
+ wasm_module_init(&mod, c->ctx->heap);
+ wasm_decode_binary(c, &input, &mod);
+
+ /* Walk the raw section framing and mirror each section into the builder with
+ * its original payload bytes. */
+ w.c = c;
+ w.data = data;
+ w.len = len;
+ w.pos = 8; /* past magic + version */
+ while (w.pos < w.len) {
+ u8 id = cur_u8(&w);
+ u32 size = cur_uleb(&w);
+ size_t payload = w.pos;
+ size_t payload_end;
+ Sym sname;
+ SecKind kind;
+ SecSem sem;
+ u16 flags;
+ ObjSecId sid;
+ if (size > w.len || w.pos > w.len - size)
+ compiler_panic(c, no_loc(), "read_wasm: section out of bounds");
+ payload_end = payload + size;
+
+ if (id == 0) {
+ /* Custom section: name it after its embedded name field. */
+ u32 nlen = cur_uleb(&w);
+ const char* nm = (const char*)(w.data + w.pos);
+ if (nlen > w.len || w.pos > w.len - nlen)
+ compiler_panic(c, no_loc(), "read_wasm: custom name out of bounds");
+ sname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+ kind = SEC_OTHER;
+ sem = SSEM_WASM_CUSTOM;
+ flags = 0;
+ } else {
+ sname = intern_cstr(c, wasm_section_name(id));
+ sem = SSEM_PROGBITS;
+ if (id == 10) {
+ kind = SEC_TEXT;
+ flags = (u16)(SF_EXEC | SF_ALLOC);
+ } else if (id == 11) {
+ kind = SEC_DATA;
+ flags = SF_ALLOC;
+ } else {
+ kind = SEC_OTHER;
+ flags = 0;
+ }
+ }
+
+ sid = obj_section_ex(ob, sname, kind, sem, flags, 1, 0, OBJ_SEC_NONE, 0);
+ if (sid == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(), "read_wasm: obj_section_ex failed");
+ if (size) obj_write(ob, sid, data + payload, size);
+
+ if (id == 10)
+ add_code_symbols(c, ob, sid, &mod, data + payload, size);
+
+ w.pos = payload_end;
+ }
+
+ wasm_module_free(&mod);
+ return ob;
+}
diff --git a/src/obj/wasm/wasm.h b/src/obj/wasm/wasm.h
@@ -0,0 +1,30 @@
+/* Wasm object-format read/emit entry points, mirroring obj/elf/elf.h and
+ * friends. The public ObjBuilder/Linker surface is format-neutral
+ * (obj/obj.h); this header is the private wasm spelling of the read/emit
+ * hooks plugged into the ObjFormatImpl table in obj/registry.c.
+ *
+ * Note: distinct from the core module model in src/wasm/wasm.h (the in-memory
+ * WasmModule and binary decode/encode). This header only declares the obj-layer
+ * glue and is always included by its full path (obj/wasm/wasm.h). */
+
+#ifndef CFREE_OBJ_WASM_H
+#define CFREE_OBJ_WASM_H
+
+#include "core/core.h"
+#include "obj/obj.h"
+
+/* Flush the WasmModule attached to the builder (OBJ_EXT_WASM) as a .wasm
+ * binary, or an empty module header when none is attached. */
+void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w);
+
+/* Parse a .wasm binary into a format-neutral ObjBuilder: one section per wasm
+ * module section (code marked SF_EXEC so objdump -d disassembles it) plus a
+ * function symbol per defined function. The code section's symbol values are
+ * byte offsets, within the code-section payload, of each function body's
+ * size-prefixed content (i.e. the first byte after the body's size LEB, where
+ * the locals vector begins) — the wasm disassembler reports that same offset
+ * for the body's first emitted line so the labels line up. */
+ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data,
+ size_t len);
+
+#endif
diff --git a/src/obj/wasm_emit.c b/src/obj/wasm_emit.c
@@ -1,24 +0,0 @@
-/* emit_wasm: Wasm module/object emitter.
- *
- * For v1 the wasm CGTarget builds a WasmModule incrementally and attaches it
- * to the ObjBuilder under OBJ_EXT_WASM. emit_wasm flushes that module via
- * the existing wasm_encode writer. When no module is attached (e.g. an
- * ObjBuilder produced by an .o reader or an empty TU), an empty module
- * header is written so downstream tools see a syntactically valid file. */
-
-#include "core/core.h"
-#include "obj/obj.h"
-#include "wasm/wasm.h"
-
-void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) {
- WasmModule* m = (WasmModule*)obj_ext_get(o, OBJ_EXT_WASM);
- if (m) {
- wasm_encode(c, m, w);
- return;
- }
- /* Empty module: magic + version. wasm_encode emits the same header even
- * for an empty WasmModule, but we avoid allocating one just to drop it. */
- static const uint8_t magic[] = {0x00, 0x61, 0x73, 0x6d,
- 0x01, 0x00, 0x00, 0x00};
- w->write(w, magic, sizeof magic);
-}
diff --git a/test/objdump/wasm/cases/01-disasm-wat.expected b/test/objdump/wasm/cases/01-disasm-wat.expected
@@ -0,0 +1,52 @@
+== sections ==
+add.wasm: file format wasm-wasm32
+
+Sections:
+Idx Name Size Align Flags
+ 0 type 0000000c 2**0 CONTENTS
+ 1 function 00000003 2**0 CONTENTS
+ 2 memory 00000003 2**0 CONTENTS
+ 3 export 00000018 2**0 CONTENTS
+ 4 code 00000030 2**0 CONTENTS,ALLOC,LOAD,READONLY,CODE
+
+== symbols ==
+add.wasm: file format wasm-wasm32
+
+SYMBOL TABLE:
+0000000000000002 g F code 000000000000000f add1
+0000000000000012 g F code 000000000000001e madd
+
+== disasm ==
+add.wasm: file format wasm-wasm32
+
+Disassembly of section code:
+
+0000000000000002 <add1>:
+ 2: 01 02 7f .locals i32 i32
+ 5: 20 00 local.get 0
+ 7: 41 01 i32.const 1
+ 9: 6a i32.add
+ a: 21 01 local.set 1
+ c: 20 01 local.get 1
+ e: 0f return
+ f: 00 unreachable
+ 10: 0b end
+0000000000000012 <madd>:
+ 12: 01 05 7f .locals i32 i32 i32 i32 i32
+ 15: 20 00 local.get 0
+ 17: 20 01 local.get 1
+ 19: 6c i32.mul
+ 1a: 21 03 local.set 3
+ 1c: 20 03 local.get 3
+ 1e: 21 04 local.set 4
+ 20: 20 04 local.get 4
+ 22: 21 02 local.set 2
+ 24: 20 02 local.get 2
+ 26: 20 00 local.get 0
+ 28: 6a i32.add
+ 29: 21 05 local.set 5
+ 2b: 20 05 local.get 5
+ 2d: 0f return
+ 2e: 00 unreachable
+ 2f: 0b end
+
diff --git a/test/objdump/wasm/cases/01-disasm-wat.sh b/test/objdump/wasm/cases/01-disasm-wat.sh
@@ -0,0 +1,14 @@
+# Golden: objdump inspection of a wasm core module. The fixture add.wasm is a
+# committed cfree-produced module (two exported functions, add1/madd) so the
+# goldens are stable against codegen drift. Exercises the wasm object reader
+# (-h section list, -t function symbols) and the wasm disassembler (-d, which
+# renders the code section as WAT with per-function labels and locals).
+# Copy the fixture into the sandbox cwd and inspect it by bare name so the
+# golden's "file format" banner path is stable regardless of repo location.
+cp "$(dirname "$0")/add.wasm" add.wasm
+echo "== sections =="
+"$CFREE" objdump -h add.wasm
+echo "== symbols =="
+"$CFREE" objdump -t add.wasm
+echo "== disasm =="
+"$CFREE" objdump -d add.wasm
diff --git a/test/objdump/wasm/cases/add.wasm b/test/objdump/wasm/cases/add.wasm
Binary files differ.