parser_core.c (17399B)
1 #include <stdarg.h> 2 #include <stdio.h> 3 #include <string.h> 4 5 #include "internal.h" 6 7 KitCgTypeId toy_builtin_type(ToyParser* p, KitCgBuiltinType ty) { 8 return p->types.id[ty]; 9 } 10 11 KitCgTypeId toy_cg_func_ret(ToyParser* p, KitCgTypeId fn_ty) { 12 KitCgTypeId rt = kit_cg_type_func_result(p->c, fn_ty).type; 13 return rt ? rt : toy_builtin_type(p, KIT_CG_BUILTIN_VOID); 14 } 15 16 void* toy_parser_zalloc(ToyParser* p, size_t count, size_t elem_size, 17 const char* what) { 18 KitHeap* h = kit_compiler_context(p->c)->heap; 19 size_t size = count * elem_size; 20 void* items; 21 if (count != 0 && size / count != elem_size) { 22 toy_error(p, p->cur.loc, "out of memory growing %.*s", 23 KIT_SLICE_ARG(kit_slice_cstr(what))); 24 return NULL; 25 } 26 items = h->alloc(h, size ? size : 1u, 1); 27 if (!items) { 28 toy_error(p, p->cur.loc, "out of memory growing %.*s", 29 KIT_SLICE_ARG(kit_slice_cstr(what))); 30 return NULL; 31 } 32 memset(items, 0, size ? size : 1u); 33 return items; 34 } 35 36 void toy_parser_free_mem(ToyParser* p, void* items, size_t size) { 37 KitHeap* h; 38 if (!items) return; 39 h = kit_compiler_context(p->c)->heap; 40 h->free(h, items, size ? size : 1u); 41 } 42 43 static uint32_t toy_source_file_id(KitCompiler* c, const char* name) { 44 uint32_t file_id = 0; 45 if (name && *name) 46 (void)kit_source_add_memory(c, kit_slice_cstr(name), &file_id); 47 return file_id; 48 } 49 50 void toy_parser_init(ToyParser* p, KitCompiler* c, KitCg* cg, ToyModule* module, 51 const uint8_t* data, size_t len, const char* input_name) { 52 memset(p, 0, sizeof *p); 53 p->module = module; 54 p->file_id = toy_source_file_id(c, input_name); 55 p->input_name = input_name; 56 toy_lexer_init(&p->lex, data, len, p->file_id); 57 p->cur = toy_lexer_next(&p->lex); 58 p->c = c; 59 p->cg = cg; 60 p->types = kit_cg_builtin_types(c); 61 p->target = kit_compiler_target_spec(c); 62 p->int_type = toy_builtin_type(p, KIT_CG_BUILTIN_I64); 63 p->size_type = toy_builtin_type( 64 p, p->target.ptr_size <= 4u ? KIT_CG_BUILTIN_I32 : KIT_CG_BUILTIN_I64); 65 p->int_ptr_type = kit_cg_type_ptr(c, p->int_type, 0); 66 p->va_list_type = toy_builtin_type(p, KIT_CG_BUILTIN_VARARG_STATE); 67 p->nvars = 0; 68 p->cap_vars = 0; 69 p->vars = NULL; 70 /* The durable module is zero-initialized by the frontend; register the 71 * builtin types into it exactly once, on this first compile. */ 72 toy_type_register_builtins(p); 73 p->nscopes = 0; 74 p->cap_scopes = 0; 75 p->scopes = NULL; 76 p->nlabels = 0; 77 p->cap_labels = 0; 78 p->labels = NULL; 79 p->goto_targets = NULL; 80 p->cap_goto_targets = 0; 81 p->cur_fn_ret = toy_builtin_type(p, KIT_CG_BUILTIN_VOID); 82 p->cur_fn_ret_toy = toy_type_from_cg(p, p->cur_fn_ret); 83 p->diag = kit_compiler_context(c)->diag; 84 p->input_name = input_name; 85 p->has_error = 0; 86 p->expr_island_mask = 0; 87 p->last_type = TOY_TYPE_NONE; 88 p->allow_tail_call_expr = 0; 89 p->tail_call_expr = 0; 90 p->tail_call_ret_toy = TOY_TYPE_NONE; 91 p->input_kind = KIT_FRONTEND_INPUT_TRANSLATION_UNIT; 92 } 93 94 void toy_parser_reinit(ToyParser* p, KitCompiler* c, KitCg* cg, 95 const uint8_t* data, size_t len, const char* input_name, 96 KitFrontendInputKind input_kind) { 97 p->file_id = toy_source_file_id(c, input_name); 98 p->input_name = input_name; 99 toy_lexer_init(&p->lex, data, len, p->file_id); 100 p->cur = toy_lexer_next(&p->lex); 101 p->c = c; 102 p->cg = cg; 103 p->types = kit_cg_builtin_types(c); 104 p->target = kit_compiler_target_spec(c); 105 p->int_type = toy_builtin_type(p, KIT_CG_BUILTIN_I64); 106 p->size_type = toy_builtin_type( 107 p, p->target.ptr_size <= 4u ? KIT_CG_BUILTIN_I32 : KIT_CG_BUILTIN_I64); 108 p->int_ptr_type = kit_cg_type_ptr(c, p->int_type, 0); 109 p->va_list_type = toy_builtin_type(p, KIT_CG_BUILTIN_VARARG_STATE); 110 p->nvars = 0; 111 p->nscopes = 0; 112 p->nlabels = 0; 113 p->cur_fn_ret = toy_builtin_type(p, KIT_CG_BUILTIN_VOID); 114 p->cur_fn_ret_toy = toy_type_from_cg(p, p->cur_fn_ret); 115 p->diag = kit_compiler_context(c)->diag; 116 p->input_name = input_name; 117 p->has_error = 0; 118 p->expr_island_mask = 0; 119 p->last_type = TOY_TYPE_NONE; 120 p->allow_tail_call_expr = 0; 121 p->tail_call_expr = 0; 122 p->tail_call_ret_toy = TOY_TYPE_NONE; 123 p->input_kind = input_kind; 124 } 125 126 /* Frees one heap block sized for the module owner. Mirrors 127 * toy_parser_free_mem but does not need a live parser, so the durable module 128 * can be torn down independently of any compile. */ 129 static void toy_mem_free(KitCompiler* c, void* items, size_t size) { 130 KitHeap* h; 131 if (!items) return; 132 h = kit_compiler_context(c)->heap; 133 h->free(h, items, size ? size : 1u); 134 } 135 136 void toy_parser_dispose(ToyParser* p) { 137 /* Per-compile scratch only; the durable arrays belong to the module and 138 * are freed by toy_module_dispose. */ 139 toy_parser_free_mem(p, p->vars, p->cap_vars * sizeof *p->vars); 140 toy_parser_free_mem(p, p->scopes, p->cap_scopes * sizeof *p->scopes); 141 toy_parser_free_mem(p, p->labels, p->cap_labels * sizeof *p->labels); 142 toy_parser_free_mem(p, p->goto_targets, 143 p->cap_goto_targets * sizeof *p->goto_targets); 144 toy_parser_free_mem(p, p->fn_syms, p->cap_fn_syms * sizeof *p->fn_syms); 145 toy_parser_free_mem(p, p->global_syms, 146 p->cap_global_syms * sizeof *p->global_syms); 147 toy_parser_free_mem(p, p->txn.undo, p->txn.cap_undo * sizeof *p->txn.undo); 148 p->txn.undo = NULL; 149 p->txn.cap_undo = 0; 150 p->txn.nundo = 0; 151 p->txn.open = 0; 152 p->vars = NULL; 153 p->scopes = NULL; 154 p->labels = NULL; 155 p->goto_targets = NULL; 156 p->fn_syms = NULL; 157 p->global_syms = NULL; 158 p->cap_fn_syms = 0; 159 p->cap_global_syms = 0; 160 p->nvars = 0; 161 p->nscopes = p->nlabels = 0; 162 p->cap_vars = 0; 163 p->cap_scopes = p->cap_labels = 0; 164 p->cap_goto_targets = 0; 165 p->last_type = TOY_TYPE_NONE; 166 } 167 168 void toy_module_dispose(ToyModule* m, KitCompiler* c) { 169 size_t i; 170 ToyTypeTable* tt = &m->type_table; 171 /* Only free per-element pointer arrays when their companion count is 172 * non-zero — allocation only happens in that case, so a zero count means 173 * the pointer field was never written by an allocation. This guards 174 * against rare paths that leave a pointer slot uninitialized while 175 * leaving the count at 0; without it, freeing a garbage pointer aborts 176 * libc malloc with "pointer being freed was not allocated". */ 177 for (i = 0; i < m->nfns; ++i) { 178 if (m->fns[i].nparams) { 179 toy_mem_free(c, m->fns[i].params, 180 m->fns[i].nparams * sizeof *m->fns[i].params); 181 toy_mem_free(c, m->fns[i].toy_params, 182 m->fns[i].nparams * sizeof *m->fns[i].toy_params); 183 } 184 } 185 toy_mem_free(c, m->fns, m->cap_fns * sizeof *m->fns); 186 toy_mem_free(c, m->globals, m->cap_globals * sizeof *m->globals); 187 for (i = 0; i < tt->ntypes; ++i) { 188 if (tt->types[i].nparams) { 189 toy_mem_free(c, tt->types[i].params, 190 tt->types[i].nparams * sizeof *tt->types[i].params); 191 } 192 } 193 toy_mem_free(c, tt->types, tt->cap_types * sizeof *tt->types); 194 for (i = 0; i < tt->count; ++i) { 195 if (tt->named[i].cap_enum_values) { 196 toy_mem_free( 197 c, tt->named[i].enum_values, 198 tt->named[i].cap_enum_values * sizeof *tt->named[i].enum_values); 199 } 200 if (tt->named[i].cap_fields) { 201 toy_mem_free(c, tt->named[i].fields, 202 tt->named[i].cap_fields * sizeof *tt->named[i].fields); 203 } 204 } 205 toy_mem_free(c, tt->named, tt->cap * sizeof *tt->named); 206 m->fns = NULL; 207 m->globals = NULL; 208 tt->types = NULL; 209 tt->named = NULL; 210 m->nfns = m->nglobals = 0; 211 tt->count = tt->ntypes = 0; 212 m->cap_fns = m->cap_globals = 0; 213 tt->cap = tt->cap_types = 0; 214 } 215 216 void toy_txn_begin(ToyParser* p) { 217 ToyModule* m = p->module; 218 p->txn.open = 1; 219 p->txn.fns = m->nfns; 220 p->txn.globals = m->nglobals; 221 p->txn.types = m->type_table.ntypes; 222 p->txn.named = m->type_table.count; 223 p->txn.nundo = 0; /* reuse the journal buffer; cap_undo persists */ 224 } 225 226 void toy_txn_commit(ToyParser* p) { 227 if (!p->txn.open) return; 228 /* Staged appends are already in the module; just drop the journal. */ 229 p->txn.open = 0; 230 p->txn.nundo = 0; 231 } 232 233 int toy_txn_record_named(ToyParser* p, size_t index) { 234 size_t i; 235 if (!p->txn.open || index >= p->txn.named) return 1; /* staged or no txn */ 236 for (i = 0; i < p->txn.nundo; ++i) { 237 if (p->txn.undo[i].kind == TOY_UNDO_NAMED && p->txn.undo[i].index == index) 238 return 1; /* keep the earliest before-image */ 239 } 240 if (!toy_parser_reserve(p, (void**)&p->txn.undo, &p->txn.cap_undo, 241 p->txn.nundo + 1u, sizeof *p->txn.undo, 242 "undo journal")) { 243 return 0; 244 } 245 p->txn.undo[p->txn.nundo].kind = TOY_UNDO_NAMED; 246 p->txn.undo[p->txn.nundo].index = index; 247 p->txn.undo[p->txn.nundo].saved.named = p->module->type_table.named[index]; 248 p->txn.nundo++; 249 return 1; 250 } 251 252 int toy_txn_record_type(ToyParser* p, size_t index) { 253 size_t i; 254 if (!p->txn.open || index >= p->txn.types) return 1; /* staged or no txn */ 255 for (i = 0; i < p->txn.nundo; ++i) { 256 if (p->txn.undo[i].kind == TOY_UNDO_TYPE && p->txn.undo[i].index == index) 257 return 1; 258 } 259 if (!toy_parser_reserve(p, (void**)&p->txn.undo, &p->txn.cap_undo, 260 p->txn.nundo + 1u, sizeof *p->txn.undo, 261 "undo journal")) { 262 return 0; 263 } 264 p->txn.undo[p->txn.nundo].kind = TOY_UNDO_TYPE; 265 p->txn.undo[p->txn.nundo].index = index; 266 p->txn.undo[p->txn.nundo].saved.type = p->module->type_table.types[index]; 267 p->txn.nundo++; 268 return 1; 269 } 270 271 void toy_txn_abort(ToyParser* p) { 272 ToyModule* m = p->module; 273 ToyTypeTable* tt = &m->type_table; 274 size_t i; 275 if (!p->txn.open) return; 276 p->txn.open = 0; 277 /* 1. Restore committed entries that were mutated in place, newest first. 278 * The current entry may have allocated a fields/enum/params array since the 279 * snapshot; free it before restoring the (typically NULL) saved pointer. A 280 * committed entry is only mutated when completing a forward declaration, so 281 * the saved array is NULL and there is no realloc-of-saved hazard. */ 282 while (p->txn.nundo > 0) { 283 ToyUndo* u = &p->txn.undo[--p->txn.nundo]; 284 if (u->kind == TOY_UNDO_NAMED) { 285 ToyNamedType* cur = &tt->named[u->index]; 286 if (cur->fields != u->saved.named.fields) { 287 toy_mem_free(p->c, cur->fields, cur->cap_fields * sizeof *cur->fields); 288 } 289 if (cur->enum_values != u->saved.named.enum_values) { 290 toy_mem_free(p->c, cur->enum_values, 291 cur->cap_enum_values * sizeof *cur->enum_values); 292 } 293 *cur = u->saved.named; 294 } else { 295 ToyType* cur = &tt->types[u->index]; 296 if (cur->params != u->saved.type.params) { 297 toy_mem_free(p->c, cur->params, cur->nparams * sizeof *cur->params); 298 } 299 *cur = u->saved.type; 300 } 301 } 302 /* 2. Truncate staged appends, freeing each dropped entry's sub-arrays. */ 303 for (i = p->txn.fns; i < m->nfns; ++i) { 304 if (m->fns[i].nparams) { 305 toy_mem_free(p->c, m->fns[i].params, 306 m->fns[i].nparams * sizeof *m->fns[i].params); 307 toy_mem_free(p->c, m->fns[i].toy_params, 308 m->fns[i].nparams * sizeof *m->fns[i].toy_params); 309 } 310 } 311 m->nfns = p->txn.fns; 312 m->nglobals = p->txn.globals; /* globals own no sub-arrays */ 313 for (i = p->txn.types; i < tt->ntypes; ++i) { 314 if (tt->types[i].nparams) { 315 toy_mem_free(p->c, tt->types[i].params, 316 tt->types[i].nparams * sizeof *tt->types[i].params); 317 } 318 } 319 tt->ntypes = p->txn.types; 320 for (i = p->txn.named; i < tt->count; ++i) { 321 if (tt->named[i].cap_fields) { 322 toy_mem_free(p->c, tt->named[i].fields, 323 tt->named[i].cap_fields * sizeof *tt->named[i].fields); 324 } 325 if (tt->named[i].cap_enum_values) { 326 toy_mem_free( 327 p->c, tt->named[i].enum_values, 328 tt->named[i].cap_enum_values * sizeof *tt->named[i].enum_values); 329 } 330 } 331 tt->count = p->txn.named; 332 } 333 334 int toy_parser_reserve(ToyParser* p, void** items, size_t* cap, size_t want, 335 size_t elem_size, const char* what) { 336 KitHeap* h; 337 size_t new_cap; 338 size_t old_size; 339 size_t new_size; 340 size_t old_cap; 341 void* new_items; 342 if (want <= *cap) return 1; 343 old_cap = *cap; 344 new_cap = old_cap ? old_cap * 2u : 8u; 345 while (new_cap < want) new_cap *= 2u; 346 old_size = old_cap * elem_size; 347 new_size = new_cap * elem_size; 348 if (new_cap != 0 && new_size / new_cap != elem_size) { 349 toy_error(p, p->cur.loc, "out of memory growing %.*s", 350 KIT_SLICE_ARG(kit_slice_cstr(what))); 351 return 0; 352 } 353 h = kit_compiler_context(p->c)->heap; 354 /* Allocate fresh and copy old contents over (rather than realloc'ing in 355 * place and only zeroing the tail). Zeroing the whole buffer guarantees 356 * any slot the writer doesn't fully initialize has NULL pointers, so the 357 * dispose path never sees stale bytes — a previous realloc-in-place 358 * variant left under-initialized slots holding the realloc'd pages' 359 * prior contents, which manifested as a "pointer being freed was not 360 * allocated" abort in lib c malloc when the dispose loop walked the 361 * type table. */ 362 new_items = h->alloc(h, new_size ? new_size : 1u, 1); 363 if (!new_items) { 364 toy_error(p, p->cur.loc, "out of memory growing %.*s", 365 KIT_SLICE_ARG(kit_slice_cstr(what))); 366 return 0; 367 } 368 memset(new_items, 0, new_size ? new_size : 1u); 369 if (*items && old_size) memcpy(new_items, *items, old_size); 370 if (*items) h->free(h, *items, old_size ? old_size : 1u); 371 *items = new_items; 372 *cap = new_cap; 373 return 1; 374 } 375 376 void toy_parser_advance(ToyParser* p) { p->cur = toy_lexer_next(&p->lex); } 377 378 int toy_parser_match(ToyParser* p, ToyTokenKind kind) { 379 if (p->cur.kind == kind) { 380 toy_parser_advance(p); 381 return 1; 382 } 383 return 0; 384 } 385 386 int toy_parser_expect(ToyParser* p, ToyTokenKind kind) { 387 if (p->cur.kind == kind) { 388 toy_parser_advance(p); 389 return 1; 390 } 391 return 0; 392 } 393 394 void toy_error(ToyParser* p, KitSrcLoc loc, const char* fmt, ...) { 395 va_list ap; 396 p->has_error = 1; 397 if (!p->diag) return; 398 va_start(ap, fmt); 399 p->diag->emit(p->diag, KIT_DIAG_ERROR, loc, fmt, ap); 400 va_end(ap); 401 } 402 403 void toy_set_loc(ToyParser* p) { 404 if (p->cg) kit_cg_set_loc(p->cg, p->cur.loc); 405 } 406 407 KitSym toy_tok_sym(ToyParser* p, ToyToken tok) { 408 char buf[64]; 409 if (tok.text_len >= sizeof(buf)) { 410 toy_error(p, tok.loc, "identifier too long"); 411 return 0; 412 } 413 memcpy(buf, tok.text, tok.text_len); 414 buf[tok.text_len] = '\0'; 415 return kit_sym_intern(p->c, (KitSlice){.s = buf, .len = tok.text_len}); 416 } 417 418 int toy_sym_is(ToyParser* p, KitSym sym, const char* name) { 419 return sym == kit_sym_intern(p->c, kit_slice_cstr(name)); 420 } 421 422 int toy_lookup_const(ToyParser* p, KitSym tok, const ToyConstRow* rows, size_t n, 423 const char* what, uint64_t* out) { 424 size_t i; 425 for (i = 0; i < n; ++i) { 426 if (tok == kit_sym_intern(p->c, kit_slice_cstr(rows[i].name))) { 427 *out = rows[i].value; 428 return 1; 429 } 430 } 431 toy_error(p, p->cur.loc, "unknown %s", what); 432 return 0; 433 } 434 435 int toy_parse_dot_const(ToyParser* p, const ToyConstRow* rows, size_t n, 436 int strict_ident, const char* expected, 437 const char* unknown, uint64_t* out) { 438 KitSym tok; 439 if (strict_ident) { 440 if (!toy_parser_expect(p, TOK_DOT) || p->cur.kind != TOK_IDENT) { 441 toy_error(p, p->cur.loc, "expected %s", expected); 442 return 0; 443 } 444 tok = toy_tok_sym(p, p->cur); 445 toy_parser_advance(p); 446 } else { 447 (void)expected; 448 if (!toy_parse_attr_dot_name(p, &tok)) return 0; 449 } 450 return toy_lookup_const(p, tok, rows, n, unknown, out); 451 } 452 453 int toy_parse_flag_set(ToyParser* p, const ToyConstRow* rows, size_t n, 454 const char* unknown, int comma_prefixed, uint32_t* mask) { 455 for (;;) { 456 KitSym name; 457 uint64_t value; 458 if (comma_prefixed) { 459 if (!toy_parser_match(p, TOK_COMMA)) break; 460 } else if (p->cur.kind == TOK_RPAREN || p->cur.kind == TOK_EOF) { 461 break; 462 } 463 if (!toy_parse_attr_dot_name(p, &name)) return 0; 464 if (!toy_lookup_const(p, name, rows, n, unknown, &value)) return 0; 465 *mask |= (uint32_t)value; 466 if (!comma_prefixed && !toy_parser_match(p, TOK_COMMA)) break; 467 } 468 return 1; 469 } 470 471 int toy_skip_attr_list_ex(ToyParser* p, int* has_static) { 472 int bracket_depth = 0; 473 int paren_depth = 0; 474 if (has_static) *has_static = 0; 475 if (!toy_parser_match(p, TOK_AT)) return 1; 476 if (!toy_parser_expect(p, TOK_LBRACKET)) { 477 toy_error(p, p->cur.loc, "expected '[' after '@'"); 478 return 0; 479 } 480 bracket_depth = 1; 481 while (p->cur.kind != TOK_EOF && bracket_depth > 0) { 482 if (p->cur.kind == TOK_DOT) { 483 toy_parser_advance(p); 484 if (p->cur.kind == TOK_IDENT && has_static && p->cur.text_len == 6 && 485 memcmp(p->cur.text, "static", 6) == 0) { 486 *has_static = 1; 487 } 488 continue; 489 } 490 if (p->cur.kind == TOK_LBRACKET && paren_depth == 0) 491 bracket_depth++; 492 else if (p->cur.kind == TOK_RBRACKET && paren_depth == 0) 493 bracket_depth--; 494 else if (p->cur.kind == TOK_LPAREN) 495 paren_depth++; 496 else if (p->cur.kind == TOK_RPAREN && paren_depth > 0) 497 paren_depth--; 498 toy_parser_advance(p); 499 } 500 if (bracket_depth != 0) { 501 toy_error(p, p->cur.loc, "unterminated attribute list"); 502 return 0; 503 } 504 return 1; 505 } 506 507 int toy_skip_attr_list(ToyParser* p) { return toy_skip_attr_list_ex(p, NULL); }