disasm.c (7264B)
1 /* Wasm disassembler. 2 * 3 * Renders the code section of a .wasm module (as exposed by read_wasm) into 4 * WAT instruction text. The code section payload is framed — a function count, 5 * then per function a body size, a locals declaration, an instruction stream, 6 * and a terminating `end` — so unlike the flat ISAs this decoder is stateful: 7 * it walks the framing across successive decode calls, emits one ".locals" 8 * line per function body (at the offset read_wasm records as that function's 9 * symbol value, so objdump labels each body), then decodes the body's 10 * instructions one per call. Instruction decoding reuses the shared 11 * wasm_decode_one_insn so the opcode mapping has a single source of truth. */ 12 13 #include "arch/wasm/disasm.h" 14 15 #include <stdio.h> 16 #include <string.h> 17 18 #include "core/heap.h" 19 #include "core/strbuf.h" 20 #include "wasm/wasm.h" 21 #include "wasm/wasm_insn_table.h" 22 23 #define WASM_DASM_MNEM_CAP 32u 24 #define WASM_DASM_OPS_CAP 192u 25 26 typedef struct WasmDisasm { 27 ArchDisasm base; 28 Compiler* c; 29 Heap* heap; 30 WasmModule scratch; /* reusable decode buffer for wasm_decode_one_insn */ 31 int inited; /* read the function count yet? */ 32 u32 funcs_left; /* function bodies not yet started */ 33 int in_body; /* currently emitting a body's instructions */ 34 u32 depth; /* block/loop/if nesting, for indentation */ 35 char mnem_buf[WASM_DASM_MNEM_CAP]; 36 char ops_buf[WASM_DASM_OPS_CAP]; 37 StrBuf mnem; 38 StrBuf ops; 39 } WasmDisasm; 40 41 /* Bounds-checked uleb over [*p, end); leaves *p past end and returns 0 on 42 * overrun (caller treats a 0-length decode as truncated). */ 43 static u32 dis_uleb(const u8** p, const u8* end) { 44 u32 result = 0, shift = 0; 45 while (*p < end) { 46 u8 b = *(*p)++; 47 result |= (u32)(b & 0x7fu) << shift; 48 if (!(b & 0x80u)) return result; 49 shift += 7u; 50 if (shift >= 32u) break; 51 } 52 return result; 53 } 54 55 static const char* valtype_name(i64 b) { 56 switch ((u8)b) { 57 case 0x7f: 58 return "i32"; 59 case 0x7e: 60 return "i64"; 61 case 0x7d: 62 return "f32"; 63 case 0x7c: 64 return "f64"; 65 case 0x70: 66 return "funcref"; 67 case 0x6f: 68 return "externref"; 69 default: 70 return "?"; 71 } 72 } 73 74 /* Render an instruction's immediate operands into d->ops, dispatched on the 75 * shared operand class from WASM_INSN_TABLE rather than re-listing kinds. */ 76 static void render_operands(WasmDisasm* d, const WasmInsn* in) { 77 const WasmInsnInfo* info = wasm_insn_info((WasmInsnKind)in->kind); 78 WasmOperandClass oc = info ? (WasmOperandClass)info->operand_class : WASM_OC_NONE; 79 switch (oc) { 80 case WASM_OC_SLEB: 81 strbuf_put_i64(&d->ops, in->imm); 82 break; 83 case WASM_OC_FP: { 84 char buf[40]; 85 (void)snprintf(buf, sizeof buf, "%g", in->fp); 86 strbuf_puts(&d->ops, buf); 87 break; 88 } 89 case WASM_OC_IDX: 90 case WASM_OC_TYPED_REF: 91 strbuf_put_u64(&d->ops, (u64)in->imm); 92 break; 93 case WASM_OC_CALL_INDIRECT: 94 strbuf_put_u64(&d->ops, (u64)in->imm); 95 strbuf_puts(&d->ops, " "); 96 strbuf_put_u64(&d->ops, (u64)in->aux_idx); 97 break; 98 case WASM_OC_BR_TABLE: { 99 u32 i; 100 for (i = 0; i < in->ntargets; ++i) { 101 if (i) strbuf_putc(&d->ops, ' '); 102 strbuf_put_u64(&d->ops, (u64)in->targets[i]); 103 } 104 break; 105 } 106 case WASM_OC_REF_NULL: 107 strbuf_puts(&d->ops, valtype_name(in->imm)); 108 break; 109 case WASM_OC_MEMARG: { 110 int wrote = 0; 111 if (in->offset64) { 112 strbuf_puts(&d->ops, "offset="); 113 strbuf_put_u64(&d->ops, in->offset64); 114 wrote = 1; 115 } 116 if (in->align) { 117 if (wrote) strbuf_putc(&d->ops, ' '); 118 strbuf_puts(&d->ops, "align="); 119 strbuf_put_u64(&d->ops, (u64)(1u << in->align)); 120 } 121 break; 122 } 123 default: 124 break; 125 } 126 } 127 128 /* Indentation prefix: two spaces per nesting level, baked into the mnemonic so 129 * objdump's column layout reads as nested WAT. */ 130 static void put_indent(WasmDisasm* d, u32 depth) { 131 u32 i; 132 for (i = 0; i < depth; ++i) strbuf_puts(&d->mnem, " "); 133 } 134 135 static u32 wasm_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, 136 KitInsn* out) { 137 WasmDisasm* d = (WasmDisasm*)base; 138 const u8* p = bytes; 139 const u8* end = bytes + len; 140 141 strbuf_reset(&d->mnem); 142 strbuf_reset(&d->ops); 143 out->annotation = SLICE_LIT(""); 144 145 if (len == 0) return 0; 146 147 if (!d->inited) { 148 d->inited = 1; 149 d->funcs_left = dis_uleb(&p, end); 150 d->in_body = 0; 151 } 152 153 if (!d->in_body) { 154 /* Start of a function body: consume the body size LEB and the locals 155 * vector, emitting one ".locals" line whose address is the locals-vector 156 * start (matching read_wasm's function symbol value). */ 157 size_t header; 158 u64 body_vaddr; 159 u32 ngroups, g; 160 if (d->funcs_left == 0) return 0; 161 (void)dis_uleb(&p, end); /* body size; body end tracked via depth */ 162 header = (size_t)(p - bytes); 163 body_vaddr = vaddr + header; 164 ngroups = dis_uleb(&p, end); 165 strbuf_puts(&d->mnem, ".locals"); 166 for (g = 0; g < ngroups && p < end; ++g) { 167 u32 n = dis_uleb(&p, end); 168 u8 vt = (p < end) ? *p++ : 0; 169 u32 k; 170 for (k = 0; k < n; ++k) { 171 strbuf_putc(&d->ops, ' '); 172 strbuf_puts(&d->ops, valtype_name(vt)); 173 } 174 } 175 d->in_body = 1; 176 d->depth = 0; 177 d->funcs_left--; 178 out->vaddr = body_vaddr; 179 out->bytes = bytes + header; 180 out->nbytes = (u32)((size_t)(p - bytes) - header); 181 out->mnemonic = strbuf_slice(&d->mnem); 182 out->operands = strbuf_slice(&d->ops); 183 return (u32)(p - bytes); 184 } 185 186 /* Inside a body: decode one instruction. */ 187 { 188 WasmInsn insn; 189 size_t n = wasm_decode_one_insn(d->c, &d->scratch, bytes, len, 0, &insn); 190 WasmInsnKind k; 191 if (n == 0) return 0; 192 k = (WasmInsnKind)insn.kind; 193 /* Dedent for the closing/middle keywords before printing them. */ 194 if (k == WASM_INSN_ELSE && d->depth) { 195 put_indent(d, d->depth - 1u); 196 } else if (k == WASM_INSN_END && d->depth) { 197 put_indent(d, d->depth - 1u); 198 } else { 199 put_indent(d, d->depth); 200 } 201 strbuf_puts(&d->mnem, wasm_insn_mnemonic(k)); 202 render_operands(d, &insn); 203 204 if (k == WASM_INSN_BLOCK || k == WASM_INSN_LOOP || k == WASM_INSN_IF) { 205 d->depth++; 206 } else if (k == WASM_INSN_END) { 207 if (d->depth == 0) 208 d->in_body = 0; /* body-terminating end */ 209 else 210 d->depth--; 211 } 212 213 out->vaddr = vaddr; 214 out->bytes = bytes; 215 out->nbytes = (u32)n; 216 out->mnemonic = strbuf_slice(&d->mnem); 217 out->operands = strbuf_slice(&d->ops); 218 return (u32)n; 219 } 220 } 221 222 static void wasm_disasm_destroy(ArchDisasm* base) { 223 WasmDisasm* d = (WasmDisasm*)base; 224 Heap* h = d->heap; 225 wasm_module_free(&d->scratch); 226 h->free(h, d, sizeof *d); 227 } 228 229 ArchDisasm* wasm_disasm_new(Compiler* c) { 230 Heap* h = (Heap*)c->ctx->heap; 231 WasmDisasm* d = (WasmDisasm*)h->alloc(h, sizeof *d, _Alignof(WasmDisasm)); 232 if (!d) return NULL; 233 memset(d, 0, sizeof *d); 234 d->c = c; 235 d->heap = h; 236 d->base.decode = wasm_decode; 237 d->base.destroy = wasm_disasm_destroy; 238 wasm_module_init(&d->scratch, h); 239 strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); 240 strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); 241 return &d->base; 242 }