kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

disasm.c (7264B)


      1 /* Wasm disassembler.
      2  *
      3  * Renders the code section of a .wasm module (as exposed by read_wasm) into
      4  * WAT instruction text. The code section payload is framed — a function count,
      5  * then per function a body size, a locals declaration, an instruction stream,
      6  * and a terminating `end` — so unlike the flat ISAs this decoder is stateful:
      7  * it walks the framing across successive decode calls, emits one ".locals"
      8  * line per function body (at the offset read_wasm records as that function's
      9  * symbol value, so objdump labels each body), then decodes the body's
     10  * instructions one per call. Instruction decoding reuses the shared
     11  * wasm_decode_one_insn so the opcode mapping has a single source of truth. */
     12 
     13 #include "arch/wasm/disasm.h"
     14 
     15 #include <stdio.h>
     16 #include <string.h>
     17 
     18 #include "core/heap.h"
     19 #include "core/strbuf.h"
     20 #include "wasm/wasm.h"
     21 #include "wasm/wasm_insn_table.h"
     22 
     23 #define WASM_DASM_MNEM_CAP 32u
     24 #define WASM_DASM_OPS_CAP 192u
     25 
     26 typedef struct WasmDisasm {
     27   ArchDisasm base;
     28   Compiler* c;
     29   Heap* heap;
     30   WasmModule scratch; /* reusable decode buffer for wasm_decode_one_insn */
     31   int inited;         /* read the function count yet? */
     32   u32 funcs_left;     /* function bodies not yet started */
     33   int in_body;        /* currently emitting a body's instructions */
     34   u32 depth;          /* block/loop/if nesting, for indentation */
     35   char mnem_buf[WASM_DASM_MNEM_CAP];
     36   char ops_buf[WASM_DASM_OPS_CAP];
     37   StrBuf mnem;
     38   StrBuf ops;
     39 } WasmDisasm;
     40 
     41 /* Bounds-checked uleb over [*p, end); leaves *p past end and returns 0 on
     42  * overrun (caller treats a 0-length decode as truncated). */
     43 static u32 dis_uleb(const u8** p, const u8* end) {
     44   u32 result = 0, shift = 0;
     45   while (*p < end) {
     46     u8 b = *(*p)++;
     47     result |= (u32)(b & 0x7fu) << shift;
     48     if (!(b & 0x80u)) return result;
     49     shift += 7u;
     50     if (shift >= 32u) break;
     51   }
     52   return result;
     53 }
     54 
     55 static const char* valtype_name(i64 b) {
     56   switch ((u8)b) {
     57     case 0x7f:
     58       return "i32";
     59     case 0x7e:
     60       return "i64";
     61     case 0x7d:
     62       return "f32";
     63     case 0x7c:
     64       return "f64";
     65     case 0x70:
     66       return "funcref";
     67     case 0x6f:
     68       return "externref";
     69     default:
     70       return "?";
     71   }
     72 }
     73 
     74 /* Render an instruction's immediate operands into d->ops, dispatched on the
     75  * shared operand class from WASM_INSN_TABLE rather than re-listing kinds. */
     76 static void render_operands(WasmDisasm* d, const WasmInsn* in) {
     77   const WasmInsnInfo* info = wasm_insn_info((WasmInsnKind)in->kind);
     78   WasmOperandClass oc = info ? (WasmOperandClass)info->operand_class : WASM_OC_NONE;
     79   switch (oc) {
     80     case WASM_OC_SLEB:
     81       strbuf_put_i64(&d->ops, in->imm);
     82       break;
     83     case WASM_OC_FP: {
     84       char buf[40];
     85       (void)snprintf(buf, sizeof buf, "%g", in->fp);
     86       strbuf_puts(&d->ops, buf);
     87       break;
     88     }
     89     case WASM_OC_IDX:
     90     case WASM_OC_TYPED_REF:
     91       strbuf_put_u64(&d->ops, (u64)in->imm);
     92       break;
     93     case WASM_OC_CALL_INDIRECT:
     94       strbuf_put_u64(&d->ops, (u64)in->imm);
     95       strbuf_puts(&d->ops, " ");
     96       strbuf_put_u64(&d->ops, (u64)in->aux_idx);
     97       break;
     98     case WASM_OC_BR_TABLE: {
     99       u32 i;
    100       for (i = 0; i < in->ntargets; ++i) {
    101         if (i) strbuf_putc(&d->ops, ' ');
    102         strbuf_put_u64(&d->ops, (u64)in->targets[i]);
    103       }
    104       break;
    105     }
    106     case WASM_OC_REF_NULL:
    107       strbuf_puts(&d->ops, valtype_name(in->imm));
    108       break;
    109     case WASM_OC_MEMARG: {
    110       int wrote = 0;
    111       if (in->offset64) {
    112         strbuf_puts(&d->ops, "offset=");
    113         strbuf_put_u64(&d->ops, in->offset64);
    114         wrote = 1;
    115       }
    116       if (in->align) {
    117         if (wrote) strbuf_putc(&d->ops, ' ');
    118         strbuf_puts(&d->ops, "align=");
    119         strbuf_put_u64(&d->ops, (u64)(1u << in->align));
    120       }
    121       break;
    122     }
    123     default:
    124       break;
    125   }
    126 }
    127 
    128 /* Indentation prefix: two spaces per nesting level, baked into the mnemonic so
    129  * objdump's column layout reads as nested WAT. */
    130 static void put_indent(WasmDisasm* d, u32 depth) {
    131   u32 i;
    132   for (i = 0; i < depth; ++i) strbuf_puts(&d->mnem, "  ");
    133 }
    134 
    135 static u32 wasm_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
    136                        KitInsn* out) {
    137   WasmDisasm* d = (WasmDisasm*)base;
    138   const u8* p = bytes;
    139   const u8* end = bytes + len;
    140 
    141   strbuf_reset(&d->mnem);
    142   strbuf_reset(&d->ops);
    143   out->annotation = SLICE_LIT("");
    144 
    145   if (len == 0) return 0;
    146 
    147   if (!d->inited) {
    148     d->inited = 1;
    149     d->funcs_left = dis_uleb(&p, end);
    150     d->in_body = 0;
    151   }
    152 
    153   if (!d->in_body) {
    154     /* Start of a function body: consume the body size LEB and the locals
    155      * vector, emitting one ".locals" line whose address is the locals-vector
    156      * start (matching read_wasm's function symbol value). */
    157     size_t header;
    158     u64 body_vaddr;
    159     u32 ngroups, g;
    160     if (d->funcs_left == 0) return 0;
    161     (void)dis_uleb(&p, end); /* body size; body end tracked via depth */
    162     header = (size_t)(p - bytes);
    163     body_vaddr = vaddr + header;
    164     ngroups = dis_uleb(&p, end);
    165     strbuf_puts(&d->mnem, ".locals");
    166     for (g = 0; g < ngroups && p < end; ++g) {
    167       u32 n = dis_uleb(&p, end);
    168       u8 vt = (p < end) ? *p++ : 0;
    169       u32 k;
    170       for (k = 0; k < n; ++k) {
    171         strbuf_putc(&d->ops, ' ');
    172         strbuf_puts(&d->ops, valtype_name(vt));
    173       }
    174     }
    175     d->in_body = 1;
    176     d->depth = 0;
    177     d->funcs_left--;
    178     out->vaddr = body_vaddr;
    179     out->bytes = bytes + header;
    180     out->nbytes = (u32)((size_t)(p - bytes) - header);
    181     out->mnemonic = strbuf_slice(&d->mnem);
    182     out->operands = strbuf_slice(&d->ops);
    183     return (u32)(p - bytes);
    184   }
    185 
    186   /* Inside a body: decode one instruction. */
    187   {
    188     WasmInsn insn;
    189     size_t n = wasm_decode_one_insn(d->c, &d->scratch, bytes, len, 0, &insn);
    190     WasmInsnKind k;
    191     if (n == 0) return 0;
    192     k = (WasmInsnKind)insn.kind;
    193     /* Dedent for the closing/middle keywords before printing them. */
    194     if (k == WASM_INSN_ELSE && d->depth) {
    195       put_indent(d, d->depth - 1u);
    196     } else if (k == WASM_INSN_END && d->depth) {
    197       put_indent(d, d->depth - 1u);
    198     } else {
    199       put_indent(d, d->depth);
    200     }
    201     strbuf_puts(&d->mnem, wasm_insn_mnemonic(k));
    202     render_operands(d, &insn);
    203 
    204     if (k == WASM_INSN_BLOCK || k == WASM_INSN_LOOP || k == WASM_INSN_IF) {
    205       d->depth++;
    206     } else if (k == WASM_INSN_END) {
    207       if (d->depth == 0)
    208         d->in_body = 0; /* body-terminating end */
    209       else
    210         d->depth--;
    211     }
    212 
    213     out->vaddr = vaddr;
    214     out->bytes = bytes;
    215     out->nbytes = (u32)n;
    216     out->mnemonic = strbuf_slice(&d->mnem);
    217     out->operands = strbuf_slice(&d->ops);
    218     return (u32)n;
    219   }
    220 }
    221 
    222 static void wasm_disasm_destroy(ArchDisasm* base) {
    223   WasmDisasm* d = (WasmDisasm*)base;
    224   Heap* h = d->heap;
    225   wasm_module_free(&d->scratch);
    226   h->free(h, d, sizeof *d);
    227 }
    228 
    229 ArchDisasm* wasm_disasm_new(Compiler* c) {
    230   Heap* h = (Heap*)c->ctx->heap;
    231   WasmDisasm* d = (WasmDisasm*)h->alloc(h, sizeof *d, _Alignof(WasmDisasm));
    232   if (!d) return NULL;
    233   memset(d, 0, sizeof *d);
    234   d->c = c;
    235   d->heap = h;
    236   d->base.decode = wasm_decode;
    237   d->base.destroy = wasm_disasm_destroy;
    238   wasm_module_init(&d->scratch, h);
    239   strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf);
    240   strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf);
    241   return &d->base;
    242 }