kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

read.c (6789B)


      1 /* read_wasm: parse a .wasm binary into a format-neutral ObjBuilder.
      2  *
      3  * A wasm module is a container, much like ELF: the type/import/function/
      4  * table/memory/global/export/elem/data and custom sections are object-format
      5  * metadata, and the code section holds the function bodies. We expose each
      6  * wasm section as an ObjBuilder section carrying its original payload bytes
      7  * (so objdump -h/-s show the real container), mark the code section SF_EXEC
      8  * (so objdump -d disassembles it via the wasm ArchDisasm), and add one
      9  * function symbol per defined function so -t lists them and -d labels each
     10  * body. Container metadata beyond raw bytes is decoded only to recover
     11  * function names; see read_wasm's contract in obj/wasm/wasm.h. */
     12 
     13 #include <stdio.h>
     14 #include <string.h>
     15 
     16 #include "core/heap.h"
     17 #include "core/pool.h"
     18 #include "core/slice.h"
     19 #include "obj/format.h"
     20 #include "obj/obj.h"
     21 #include "obj/wasm/wasm.h"
     22 #include "wasm/wasm.h"
     23 
     24 /* Minimal bounds-checked cursor over the raw module bytes. */
     25 typedef struct WasmCur {
     26   Compiler* c;
     27   const u8* data;
     28   size_t len;
     29   size_t pos;
     30 } WasmCur;
     31 
     32 static u8 cur_u8(WasmCur* w) {
     33   if (w->pos >= w->len)
     34     compiler_panic(w->c, SRCLOC_NONE, "read_wasm: unexpected end of file");
     35   return w->data[w->pos++];
     36 }
     37 
     38 static u32 cur_uleb(WasmCur* w) {
     39   u32 result = 0, shift = 0, nbytes = 0;
     40   for (;;) {
     41     u8 b = cur_u8(w);
     42     if (nbytes++ >= 5u || (shift == 28u && (b & 0xf0u)))
     43       compiler_panic(w->c, SRCLOC_NONE, "read_wasm: invalid uleb128");
     44     result |= (u32)(b & 0x7fu) << shift;
     45     if (!(b & 0x80u)) return result;
     46     shift += 7u;
     47   }
     48 }
     49 
     50 /* Canonical section name for the standard section ids (WebAssembly spec). */
     51 static const char* wasm_section_name(u8 id) {
     52   switch (id) {
     53     case 1:
     54       return "type";
     55     case 2:
     56       return "import";
     57     case 3:
     58       return "function";
     59     case 4:
     60       return "table";
     61     case 5:
     62       return "memory";
     63     case 6:
     64       return "global";
     65     case 7:
     66       return "export";
     67     case 8:
     68       return "start";
     69     case 9:
     70       return "elem";
     71     case 10:
     72       return "code";
     73     case 11:
     74       return "data";
     75     case 12:
     76       return "datacount";
     77     default:
     78       return "section";
     79   }
     80 }
     81 
     82 static Sym intern_cstr(Compiler* c, const char* s) {
     83   return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)});
     84 }
     85 
     86 /* Best name for the j-th defined function: explicit (name section), else the
     87  * export name, else a synthesized func[index] placeholder. */
     88 static Sym func_symbol_name(Compiler* c, const WasmFunc* f, u32 func_index) {
     89   char buf[32];
     90   if (f && f->name && f->name[0]) return intern_cstr(c, f->name);
     91   if (f && f->export_name && f->export_name[0])
     92     return intern_cstr(c, f->export_name);
     93   (void)snprintf(buf, sizeof buf, "func[%u]", func_index);
     94   return intern_cstr(c, buf);
     95 }
     96 
     97 /* Add one function symbol per defined function, with value = byte offset of the
     98  * body's size-prefixed content (locals vector start) within the code-section
     99  * payload. `payload` points at the code section content (the function-count
    100  * LEB); `size` is its length. */
    101 static void add_code_symbols(Compiler* c, ObjBuilder* ob, ObjSecId code_sec,
    102                              const WasmModule* mod, const u8* payload,
    103                              size_t size) {
    104   WasmCur w = {c, payload, size, 0};
    105   u32 count = cur_uleb(&w);
    106   u32 fi = 0; /* index into mod->funcs (wasm function index space) */
    107   u32 k;
    108   for (k = 0; k < count; ++k) {
    109     u32 body_size = cur_uleb(&w);
    110     u64 body_off = (u64)w.pos; /* offset of locals vector within payload */
    111     const WasmFunc* f = NULL;
    112     Sym nm;
    113     SymBind bind;
    114     /* Skip imported functions: only defined funcs have code bodies. */
    115     while (fi < mod->nfuncs && mod->funcs[fi].is_import) fi++;
    116     if (fi < mod->nfuncs) f = &mod->funcs[fi];
    117     nm = func_symbol_name(c, f, fi);
    118     bind = (f && f->export_name && f->export_name[0]) ? SB_GLOBAL : SB_LOCAL;
    119     {
    120       ObjSymId sid = obj_symbol_ex(ob, nm, bind, SV_DEFAULT, SK_FUNC, code_sec,
    121                                    body_off, body_size, 0);
    122       obj_sym_mark_referenced(ob, sid);
    123     }
    124     fi++;
    125     /* Advance past the body to the next size LEB. */
    126     if (body_size > w.len || w.pos > w.len - body_size)
    127       compiler_panic(c, SRCLOC_NONE, "read_wasm: code body out of bounds");
    128     w.pos += body_size;
    129   }
    130 }
    131 
    132 ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data,
    133                       size_t len) {
    134   KitSlice input;
    135   WasmModule mod;
    136   ObjBuilder* ob;
    137   WasmCur w;
    138   (void)name;
    139 
    140   input.data = data;
    141   input.len = len;
    142   if (!wasm_is_binary(&input))
    143     compiler_panic(c, SRCLOC_NONE, "read_wasm: not a wasm binary");
    144 
    145   ob = obj_new(c);
    146   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_wasm: obj_new failed");
    147 
    148   /* Decode the module model so we can recover function names. Names are
    149    * interned into c->global before the module is freed, so the module's own
    150    * storage need not outlive this function. */
    151   wasm_module_init(&mod, c->ctx->heap);
    152   wasm_decode_binary(c, &input, &mod);
    153 
    154   /* Walk the raw section framing and mirror each section into the builder with
    155    * its original payload bytes. */
    156   w.c = c;
    157   w.data = data;
    158   w.len = len;
    159   w.pos = 8; /* past magic + version */
    160   while (w.pos < w.len) {
    161     u8 id = cur_u8(&w);
    162     u32 size = cur_uleb(&w);
    163     size_t payload = w.pos;
    164     size_t payload_end;
    165     Sym sname;
    166     SecKind kind;
    167     SecSem sem;
    168     u16 flags;
    169     ObjSecId sid;
    170     if (size > w.len || w.pos > w.len - size)
    171       compiler_panic(c, SRCLOC_NONE, "read_wasm: section out of bounds");
    172     payload_end = payload + size;
    173 
    174     if (id == 0) {
    175       /* Custom section: name it after its embedded name field. */
    176       u32 nlen = cur_uleb(&w);
    177       const char* nm = (const char*)(w.data + w.pos);
    178       if (nlen > w.len || w.pos > w.len - nlen)
    179         compiler_panic(c, SRCLOC_NONE, "read_wasm: custom name out of bounds");
    180       sname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    181       kind = SEC_OTHER;
    182       sem = SSEM_WASM_CUSTOM;
    183       flags = 0;
    184     } else {
    185       sname = intern_cstr(c, wasm_section_name(id));
    186       sem = SSEM_PROGBITS;
    187       if (id == 10) {
    188         kind = SEC_TEXT;
    189         flags = (u16)(SF_EXEC | SF_ALLOC);
    190       } else if (id == 11) {
    191         kind = SEC_DATA;
    192         flags = SF_ALLOC;
    193       } else {
    194         kind = SEC_OTHER;
    195         flags = 0;
    196       }
    197     }
    198 
    199     sid = obj_section_ex(ob, sname, kind, sem, flags, 1, 0, OBJ_SEC_NONE, 0);
    200     if (sid == OBJ_SEC_NONE)
    201       compiler_panic(c, SRCLOC_NONE, "read_wasm: obj_section_ex failed");
    202     if (size) obj_write(ob, sid, data + payload, size);
    203 
    204     if (id == 10) add_code_symbols(c, ob, sid, &mod, data + payload, size);
    205 
    206     w.pos = payload_end;
    207   }
    208 
    209   wasm_module_free(&mod);
    210   return ob;
    211 }