read.c (6789B)
1 /* read_wasm: parse a .wasm binary into a format-neutral ObjBuilder. 2 * 3 * A wasm module is a container, much like ELF: the type/import/function/ 4 * table/memory/global/export/elem/data and custom sections are object-format 5 * metadata, and the code section holds the function bodies. We expose each 6 * wasm section as an ObjBuilder section carrying its original payload bytes 7 * (so objdump -h/-s show the real container), mark the code section SF_EXEC 8 * (so objdump -d disassembles it via the wasm ArchDisasm), and add one 9 * function symbol per defined function so -t lists them and -d labels each 10 * body. Container metadata beyond raw bytes is decoded only to recover 11 * function names; see read_wasm's contract in obj/wasm/wasm.h. */ 12 13 #include <stdio.h> 14 #include <string.h> 15 16 #include "core/heap.h" 17 #include "core/pool.h" 18 #include "core/slice.h" 19 #include "obj/format.h" 20 #include "obj/obj.h" 21 #include "obj/wasm/wasm.h" 22 #include "wasm/wasm.h" 23 24 /* Minimal bounds-checked cursor over the raw module bytes. */ 25 typedef struct WasmCur { 26 Compiler* c; 27 const u8* data; 28 size_t len; 29 size_t pos; 30 } WasmCur; 31 32 static u8 cur_u8(WasmCur* w) { 33 if (w->pos >= w->len) 34 compiler_panic(w->c, SRCLOC_NONE, "read_wasm: unexpected end of file"); 35 return w->data[w->pos++]; 36 } 37 38 static u32 cur_uleb(WasmCur* w) { 39 u32 result = 0, shift = 0, nbytes = 0; 40 for (;;) { 41 u8 b = cur_u8(w); 42 if (nbytes++ >= 5u || (shift == 28u && (b & 0xf0u))) 43 compiler_panic(w->c, SRCLOC_NONE, "read_wasm: invalid uleb128"); 44 result |= (u32)(b & 0x7fu) << shift; 45 if (!(b & 0x80u)) return result; 46 shift += 7u; 47 } 48 } 49 50 /* Canonical section name for the standard section ids (WebAssembly spec). */ 51 static const char* wasm_section_name(u8 id) { 52 switch (id) { 53 case 1: 54 return "type"; 55 case 2: 56 return "import"; 57 case 3: 58 return "function"; 59 case 4: 60 return "table"; 61 case 5: 62 return "memory"; 63 case 6: 64 return "global"; 65 case 7: 66 return "export"; 67 case 8: 68 return "start"; 69 case 9: 70 return "elem"; 71 case 10: 72 return "code"; 73 case 11: 74 return "data"; 75 case 12: 76 return "datacount"; 77 default: 78 return "section"; 79 } 80 } 81 82 static Sym intern_cstr(Compiler* c, const char* s) { 83 return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)}); 84 } 85 86 /* Best name for the j-th defined function: explicit (name section), else the 87 * export name, else a synthesized func[index] placeholder. */ 88 static Sym func_symbol_name(Compiler* c, const WasmFunc* f, u32 func_index) { 89 char buf[32]; 90 if (f && f->name && f->name[0]) return intern_cstr(c, f->name); 91 if (f && f->export_name && f->export_name[0]) 92 return intern_cstr(c, f->export_name); 93 (void)snprintf(buf, sizeof buf, "func[%u]", func_index); 94 return intern_cstr(c, buf); 95 } 96 97 /* Add one function symbol per defined function, with value = byte offset of the 98 * body's size-prefixed content (locals vector start) within the code-section 99 * payload. `payload` points at the code section content (the function-count 100 * LEB); `size` is its length. */ 101 static void add_code_symbols(Compiler* c, ObjBuilder* ob, ObjSecId code_sec, 102 const WasmModule* mod, const u8* payload, 103 size_t size) { 104 WasmCur w = {c, payload, size, 0}; 105 u32 count = cur_uleb(&w); 106 u32 fi = 0; /* index into mod->funcs (wasm function index space) */ 107 u32 k; 108 for (k = 0; k < count; ++k) { 109 u32 body_size = cur_uleb(&w); 110 u64 body_off = (u64)w.pos; /* offset of locals vector within payload */ 111 const WasmFunc* f = NULL; 112 Sym nm; 113 SymBind bind; 114 /* Skip imported functions: only defined funcs have code bodies. */ 115 while (fi < mod->nfuncs && mod->funcs[fi].is_import) fi++; 116 if (fi < mod->nfuncs) f = &mod->funcs[fi]; 117 nm = func_symbol_name(c, f, fi); 118 bind = (f && f->export_name && f->export_name[0]) ? SB_GLOBAL : SB_LOCAL; 119 { 120 ObjSymId sid = obj_symbol_ex(ob, nm, bind, SV_DEFAULT, SK_FUNC, code_sec, 121 body_off, body_size, 0); 122 obj_sym_mark_referenced(ob, sid); 123 } 124 fi++; 125 /* Advance past the body to the next size LEB. */ 126 if (body_size > w.len || w.pos > w.len - body_size) 127 compiler_panic(c, SRCLOC_NONE, "read_wasm: code body out of bounds"); 128 w.pos += body_size; 129 } 130 } 131 132 ObjBuilder* read_wasm(Compiler* c, const char* name, const u8* data, 133 size_t len) { 134 KitSlice input; 135 WasmModule mod; 136 ObjBuilder* ob; 137 WasmCur w; 138 (void)name; 139 140 input.data = data; 141 input.len = len; 142 if (!wasm_is_binary(&input)) 143 compiler_panic(c, SRCLOC_NONE, "read_wasm: not a wasm binary"); 144 145 ob = obj_new(c); 146 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_wasm: obj_new failed"); 147 148 /* Decode the module model so we can recover function names. Names are 149 * interned into c->global before the module is freed, so the module's own 150 * storage need not outlive this function. */ 151 wasm_module_init(&mod, c->ctx->heap); 152 wasm_decode_binary(c, &input, &mod); 153 154 /* Walk the raw section framing and mirror each section into the builder with 155 * its original payload bytes. */ 156 w.c = c; 157 w.data = data; 158 w.len = len; 159 w.pos = 8; /* past magic + version */ 160 while (w.pos < w.len) { 161 u8 id = cur_u8(&w); 162 u32 size = cur_uleb(&w); 163 size_t payload = w.pos; 164 size_t payload_end; 165 Sym sname; 166 SecKind kind; 167 SecSem sem; 168 u16 flags; 169 ObjSecId sid; 170 if (size > w.len || w.pos > w.len - size) 171 compiler_panic(c, SRCLOC_NONE, "read_wasm: section out of bounds"); 172 payload_end = payload + size; 173 174 if (id == 0) { 175 /* Custom section: name it after its embedded name field. */ 176 u32 nlen = cur_uleb(&w); 177 const char* nm = (const char*)(w.data + w.pos); 178 if (nlen > w.len || w.pos > w.len - nlen) 179 compiler_panic(c, SRCLOC_NONE, "read_wasm: custom name out of bounds"); 180 sname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 181 kind = SEC_OTHER; 182 sem = SSEM_WASM_CUSTOM; 183 flags = 0; 184 } else { 185 sname = intern_cstr(c, wasm_section_name(id)); 186 sem = SSEM_PROGBITS; 187 if (id == 10) { 188 kind = SEC_TEXT; 189 flags = (u16)(SF_EXEC | SF_ALLOC); 190 } else if (id == 11) { 191 kind = SEC_DATA; 192 flags = SF_ALLOC; 193 } else { 194 kind = SEC_OTHER; 195 flags = 0; 196 } 197 } 198 199 sid = obj_section_ex(ob, sname, kind, sem, flags, 1, 0, OBJ_SEC_NONE, 0); 200 if (sid == OBJ_SEC_NONE) 201 compiler_panic(c, SRCLOC_NONE, "read_wasm: obj_section_ex failed"); 202 if (size) obj_write(ob, sid, data + payload, size); 203 204 if (id == 10) add_code_symbols(c, ob, sid, &mod, data + payload, size); 205 206 w.pos = payload_end; 207 } 208 209 wasm_module_free(&mod); 210 return ob; 211 }