kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

parser_core.c (17399B)


      1 #include <stdarg.h>
      2 #include <stdio.h>
      3 #include <string.h>
      4 
      5 #include "internal.h"
      6 
      7 KitCgTypeId toy_builtin_type(ToyParser* p, KitCgBuiltinType ty) {
      8   return p->types.id[ty];
      9 }
     10 
     11 KitCgTypeId toy_cg_func_ret(ToyParser* p, KitCgTypeId fn_ty) {
     12   KitCgTypeId rt = kit_cg_type_func_result(p->c, fn_ty).type;
     13   return rt ? rt : toy_builtin_type(p, KIT_CG_BUILTIN_VOID);
     14 }
     15 
     16 void* toy_parser_zalloc(ToyParser* p, size_t count, size_t elem_size,
     17                         const char* what) {
     18   KitHeap* h = kit_compiler_context(p->c)->heap;
     19   size_t size = count * elem_size;
     20   void* items;
     21   if (count != 0 && size / count != elem_size) {
     22     toy_error(p, p->cur.loc, "out of memory growing %.*s",
     23               KIT_SLICE_ARG(kit_slice_cstr(what)));
     24     return NULL;
     25   }
     26   items = h->alloc(h, size ? size : 1u, 1);
     27   if (!items) {
     28     toy_error(p, p->cur.loc, "out of memory growing %.*s",
     29               KIT_SLICE_ARG(kit_slice_cstr(what)));
     30     return NULL;
     31   }
     32   memset(items, 0, size ? size : 1u);
     33   return items;
     34 }
     35 
     36 void toy_parser_free_mem(ToyParser* p, void* items, size_t size) {
     37   KitHeap* h;
     38   if (!items) return;
     39   h = kit_compiler_context(p->c)->heap;
     40   h->free(h, items, size ? size : 1u);
     41 }
     42 
     43 static uint32_t toy_source_file_id(KitCompiler* c, const char* name) {
     44   uint32_t file_id = 0;
     45   if (name && *name)
     46     (void)kit_source_add_memory(c, kit_slice_cstr(name), &file_id);
     47   return file_id;
     48 }
     49 
     50 void toy_parser_init(ToyParser* p, KitCompiler* c, KitCg* cg, ToyModule* module,
     51                      const uint8_t* data, size_t len, const char* input_name) {
     52   memset(p, 0, sizeof *p);
     53   p->module = module;
     54   p->file_id = toy_source_file_id(c, input_name);
     55   p->input_name = input_name;
     56   toy_lexer_init(&p->lex, data, len, p->file_id);
     57   p->cur = toy_lexer_next(&p->lex);
     58   p->c = c;
     59   p->cg = cg;
     60   p->types = kit_cg_builtin_types(c);
     61   p->target = kit_compiler_target_spec(c);
     62   p->int_type = toy_builtin_type(p, KIT_CG_BUILTIN_I64);
     63   p->size_type = toy_builtin_type(
     64       p, p->target.ptr_size <= 4u ? KIT_CG_BUILTIN_I32 : KIT_CG_BUILTIN_I64);
     65   p->int_ptr_type = kit_cg_type_ptr(c, p->int_type, 0);
     66   p->va_list_type = toy_builtin_type(p, KIT_CG_BUILTIN_VARARG_STATE);
     67   p->nvars = 0;
     68   p->cap_vars = 0;
     69   p->vars = NULL;
     70   /* The durable module is zero-initialized by the frontend; register the
     71    * builtin types into it exactly once, on this first compile. */
     72   toy_type_register_builtins(p);
     73   p->nscopes = 0;
     74   p->cap_scopes = 0;
     75   p->scopes = NULL;
     76   p->nlabels = 0;
     77   p->cap_labels = 0;
     78   p->labels = NULL;
     79   p->goto_targets = NULL;
     80   p->cap_goto_targets = 0;
     81   p->cur_fn_ret = toy_builtin_type(p, KIT_CG_BUILTIN_VOID);
     82   p->cur_fn_ret_toy = toy_type_from_cg(p, p->cur_fn_ret);
     83   p->diag = kit_compiler_context(c)->diag;
     84   p->input_name = input_name;
     85   p->has_error = 0;
     86   p->expr_island_mask = 0;
     87   p->last_type = TOY_TYPE_NONE;
     88   p->allow_tail_call_expr = 0;
     89   p->tail_call_expr = 0;
     90   p->tail_call_ret_toy = TOY_TYPE_NONE;
     91   p->input_kind = KIT_FRONTEND_INPUT_TRANSLATION_UNIT;
     92 }
     93 
     94 void toy_parser_reinit(ToyParser* p, KitCompiler* c, KitCg* cg,
     95                        const uint8_t* data, size_t len, const char* input_name,
     96                        KitFrontendInputKind input_kind) {
     97   p->file_id = toy_source_file_id(c, input_name);
     98   p->input_name = input_name;
     99   toy_lexer_init(&p->lex, data, len, p->file_id);
    100   p->cur = toy_lexer_next(&p->lex);
    101   p->c = c;
    102   p->cg = cg;
    103   p->types = kit_cg_builtin_types(c);
    104   p->target = kit_compiler_target_spec(c);
    105   p->int_type = toy_builtin_type(p, KIT_CG_BUILTIN_I64);
    106   p->size_type = toy_builtin_type(
    107       p, p->target.ptr_size <= 4u ? KIT_CG_BUILTIN_I32 : KIT_CG_BUILTIN_I64);
    108   p->int_ptr_type = kit_cg_type_ptr(c, p->int_type, 0);
    109   p->va_list_type = toy_builtin_type(p, KIT_CG_BUILTIN_VARARG_STATE);
    110   p->nvars = 0;
    111   p->nscopes = 0;
    112   p->nlabels = 0;
    113   p->cur_fn_ret = toy_builtin_type(p, KIT_CG_BUILTIN_VOID);
    114   p->cur_fn_ret_toy = toy_type_from_cg(p, p->cur_fn_ret);
    115   p->diag = kit_compiler_context(c)->diag;
    116   p->input_name = input_name;
    117   p->has_error = 0;
    118   p->expr_island_mask = 0;
    119   p->last_type = TOY_TYPE_NONE;
    120   p->allow_tail_call_expr = 0;
    121   p->tail_call_expr = 0;
    122   p->tail_call_ret_toy = TOY_TYPE_NONE;
    123   p->input_kind = input_kind;
    124 }
    125 
    126 /* Frees one heap block sized for the module owner. Mirrors
    127  * toy_parser_free_mem but does not need a live parser, so the durable module
    128  * can be torn down independently of any compile. */
    129 static void toy_mem_free(KitCompiler* c, void* items, size_t size) {
    130   KitHeap* h;
    131   if (!items) return;
    132   h = kit_compiler_context(c)->heap;
    133   h->free(h, items, size ? size : 1u);
    134 }
    135 
    136 void toy_parser_dispose(ToyParser* p) {
    137   /* Per-compile scratch only; the durable arrays belong to the module and
    138    * are freed by toy_module_dispose. */
    139   toy_parser_free_mem(p, p->vars, p->cap_vars * sizeof *p->vars);
    140   toy_parser_free_mem(p, p->scopes, p->cap_scopes * sizeof *p->scopes);
    141   toy_parser_free_mem(p, p->labels, p->cap_labels * sizeof *p->labels);
    142   toy_parser_free_mem(p, p->goto_targets,
    143                       p->cap_goto_targets * sizeof *p->goto_targets);
    144   toy_parser_free_mem(p, p->fn_syms, p->cap_fn_syms * sizeof *p->fn_syms);
    145   toy_parser_free_mem(p, p->global_syms,
    146                       p->cap_global_syms * sizeof *p->global_syms);
    147   toy_parser_free_mem(p, p->txn.undo, p->txn.cap_undo * sizeof *p->txn.undo);
    148   p->txn.undo = NULL;
    149   p->txn.cap_undo = 0;
    150   p->txn.nundo = 0;
    151   p->txn.open = 0;
    152   p->vars = NULL;
    153   p->scopes = NULL;
    154   p->labels = NULL;
    155   p->goto_targets = NULL;
    156   p->fn_syms = NULL;
    157   p->global_syms = NULL;
    158   p->cap_fn_syms = 0;
    159   p->cap_global_syms = 0;
    160   p->nvars = 0;
    161   p->nscopes = p->nlabels = 0;
    162   p->cap_vars = 0;
    163   p->cap_scopes = p->cap_labels = 0;
    164   p->cap_goto_targets = 0;
    165   p->last_type = TOY_TYPE_NONE;
    166 }
    167 
    168 void toy_module_dispose(ToyModule* m, KitCompiler* c) {
    169   size_t i;
    170   ToyTypeTable* tt = &m->type_table;
    171   /* Only free per-element pointer arrays when their companion count is
    172    * non-zero — allocation only happens in that case, so a zero count means
    173    * the pointer field was never written by an allocation. This guards
    174    * against rare paths that leave a pointer slot uninitialized while
    175    * leaving the count at 0; without it, freeing a garbage pointer aborts
    176    * libc malloc with "pointer being freed was not allocated". */
    177   for (i = 0; i < m->nfns; ++i) {
    178     if (m->fns[i].nparams) {
    179       toy_mem_free(c, m->fns[i].params,
    180                    m->fns[i].nparams * sizeof *m->fns[i].params);
    181       toy_mem_free(c, m->fns[i].toy_params,
    182                    m->fns[i].nparams * sizeof *m->fns[i].toy_params);
    183     }
    184   }
    185   toy_mem_free(c, m->fns, m->cap_fns * sizeof *m->fns);
    186   toy_mem_free(c, m->globals, m->cap_globals * sizeof *m->globals);
    187   for (i = 0; i < tt->ntypes; ++i) {
    188     if (tt->types[i].nparams) {
    189       toy_mem_free(c, tt->types[i].params,
    190                    tt->types[i].nparams * sizeof *tt->types[i].params);
    191     }
    192   }
    193   toy_mem_free(c, tt->types, tt->cap_types * sizeof *tt->types);
    194   for (i = 0; i < tt->count; ++i) {
    195     if (tt->named[i].cap_enum_values) {
    196       toy_mem_free(
    197           c, tt->named[i].enum_values,
    198           tt->named[i].cap_enum_values * sizeof *tt->named[i].enum_values);
    199     }
    200     if (tt->named[i].cap_fields) {
    201       toy_mem_free(c, tt->named[i].fields,
    202                    tt->named[i].cap_fields * sizeof *tt->named[i].fields);
    203     }
    204   }
    205   toy_mem_free(c, tt->named, tt->cap * sizeof *tt->named);
    206   m->fns = NULL;
    207   m->globals = NULL;
    208   tt->types = NULL;
    209   tt->named = NULL;
    210   m->nfns = m->nglobals = 0;
    211   tt->count = tt->ntypes = 0;
    212   m->cap_fns = m->cap_globals = 0;
    213   tt->cap = tt->cap_types = 0;
    214 }
    215 
    216 void toy_txn_begin(ToyParser* p) {
    217   ToyModule* m = p->module;
    218   p->txn.open = 1;
    219   p->txn.fns = m->nfns;
    220   p->txn.globals = m->nglobals;
    221   p->txn.types = m->type_table.ntypes;
    222   p->txn.named = m->type_table.count;
    223   p->txn.nundo = 0; /* reuse the journal buffer; cap_undo persists */
    224 }
    225 
    226 void toy_txn_commit(ToyParser* p) {
    227   if (!p->txn.open) return;
    228   /* Staged appends are already in the module; just drop the journal. */
    229   p->txn.open = 0;
    230   p->txn.nundo = 0;
    231 }
    232 
    233 int toy_txn_record_named(ToyParser* p, size_t index) {
    234   size_t i;
    235   if (!p->txn.open || index >= p->txn.named) return 1; /* staged or no txn */
    236   for (i = 0; i < p->txn.nundo; ++i) {
    237     if (p->txn.undo[i].kind == TOY_UNDO_NAMED && p->txn.undo[i].index == index)
    238       return 1; /* keep the earliest before-image */
    239   }
    240   if (!toy_parser_reserve(p, (void**)&p->txn.undo, &p->txn.cap_undo,
    241                           p->txn.nundo + 1u, sizeof *p->txn.undo,
    242                           "undo journal")) {
    243     return 0;
    244   }
    245   p->txn.undo[p->txn.nundo].kind = TOY_UNDO_NAMED;
    246   p->txn.undo[p->txn.nundo].index = index;
    247   p->txn.undo[p->txn.nundo].saved.named = p->module->type_table.named[index];
    248   p->txn.nundo++;
    249   return 1;
    250 }
    251 
    252 int toy_txn_record_type(ToyParser* p, size_t index) {
    253   size_t i;
    254   if (!p->txn.open || index >= p->txn.types) return 1; /* staged or no txn */
    255   for (i = 0; i < p->txn.nundo; ++i) {
    256     if (p->txn.undo[i].kind == TOY_UNDO_TYPE && p->txn.undo[i].index == index)
    257       return 1;
    258   }
    259   if (!toy_parser_reserve(p, (void**)&p->txn.undo, &p->txn.cap_undo,
    260                           p->txn.nundo + 1u, sizeof *p->txn.undo,
    261                           "undo journal")) {
    262     return 0;
    263   }
    264   p->txn.undo[p->txn.nundo].kind = TOY_UNDO_TYPE;
    265   p->txn.undo[p->txn.nundo].index = index;
    266   p->txn.undo[p->txn.nundo].saved.type = p->module->type_table.types[index];
    267   p->txn.nundo++;
    268   return 1;
    269 }
    270 
    271 void toy_txn_abort(ToyParser* p) {
    272   ToyModule* m = p->module;
    273   ToyTypeTable* tt = &m->type_table;
    274   size_t i;
    275   if (!p->txn.open) return;
    276   p->txn.open = 0;
    277   /* 1. Restore committed entries that were mutated in place, newest first.
    278    * The current entry may have allocated a fields/enum/params array since the
    279    * snapshot; free it before restoring the (typically NULL) saved pointer. A
    280    * committed entry is only mutated when completing a forward declaration, so
    281    * the saved array is NULL and there is no realloc-of-saved hazard. */
    282   while (p->txn.nundo > 0) {
    283     ToyUndo* u = &p->txn.undo[--p->txn.nundo];
    284     if (u->kind == TOY_UNDO_NAMED) {
    285       ToyNamedType* cur = &tt->named[u->index];
    286       if (cur->fields != u->saved.named.fields) {
    287         toy_mem_free(p->c, cur->fields, cur->cap_fields * sizeof *cur->fields);
    288       }
    289       if (cur->enum_values != u->saved.named.enum_values) {
    290         toy_mem_free(p->c, cur->enum_values,
    291                      cur->cap_enum_values * sizeof *cur->enum_values);
    292       }
    293       *cur = u->saved.named;
    294     } else {
    295       ToyType* cur = &tt->types[u->index];
    296       if (cur->params != u->saved.type.params) {
    297         toy_mem_free(p->c, cur->params, cur->nparams * sizeof *cur->params);
    298       }
    299       *cur = u->saved.type;
    300     }
    301   }
    302   /* 2. Truncate staged appends, freeing each dropped entry's sub-arrays. */
    303   for (i = p->txn.fns; i < m->nfns; ++i) {
    304     if (m->fns[i].nparams) {
    305       toy_mem_free(p->c, m->fns[i].params,
    306                    m->fns[i].nparams * sizeof *m->fns[i].params);
    307       toy_mem_free(p->c, m->fns[i].toy_params,
    308                    m->fns[i].nparams * sizeof *m->fns[i].toy_params);
    309     }
    310   }
    311   m->nfns = p->txn.fns;
    312   m->nglobals = p->txn.globals; /* globals own no sub-arrays */
    313   for (i = p->txn.types; i < tt->ntypes; ++i) {
    314     if (tt->types[i].nparams) {
    315       toy_mem_free(p->c, tt->types[i].params,
    316                    tt->types[i].nparams * sizeof *tt->types[i].params);
    317     }
    318   }
    319   tt->ntypes = p->txn.types;
    320   for (i = p->txn.named; i < tt->count; ++i) {
    321     if (tt->named[i].cap_fields) {
    322       toy_mem_free(p->c, tt->named[i].fields,
    323                    tt->named[i].cap_fields * sizeof *tt->named[i].fields);
    324     }
    325     if (tt->named[i].cap_enum_values) {
    326       toy_mem_free(
    327           p->c, tt->named[i].enum_values,
    328           tt->named[i].cap_enum_values * sizeof *tt->named[i].enum_values);
    329     }
    330   }
    331   tt->count = p->txn.named;
    332 }
    333 
    334 int toy_parser_reserve(ToyParser* p, void** items, size_t* cap, size_t want,
    335                        size_t elem_size, const char* what) {
    336   KitHeap* h;
    337   size_t new_cap;
    338   size_t old_size;
    339   size_t new_size;
    340   size_t old_cap;
    341   void* new_items;
    342   if (want <= *cap) return 1;
    343   old_cap = *cap;
    344   new_cap = old_cap ? old_cap * 2u : 8u;
    345   while (new_cap < want) new_cap *= 2u;
    346   old_size = old_cap * elem_size;
    347   new_size = new_cap * elem_size;
    348   if (new_cap != 0 && new_size / new_cap != elem_size) {
    349     toy_error(p, p->cur.loc, "out of memory growing %.*s",
    350               KIT_SLICE_ARG(kit_slice_cstr(what)));
    351     return 0;
    352   }
    353   h = kit_compiler_context(p->c)->heap;
    354   /* Allocate fresh and copy old contents over (rather than realloc'ing in
    355    * place and only zeroing the tail). Zeroing the whole buffer guarantees
    356    * any slot the writer doesn't fully initialize has NULL pointers, so the
    357    * dispose path never sees stale bytes — a previous realloc-in-place
    358    * variant left under-initialized slots holding the realloc'd pages'
    359    * prior contents, which manifested as a "pointer being freed was not
    360    * allocated" abort in lib c malloc when the dispose loop walked the
    361    * type table. */
    362   new_items = h->alloc(h, new_size ? new_size : 1u, 1);
    363   if (!new_items) {
    364     toy_error(p, p->cur.loc, "out of memory growing %.*s",
    365               KIT_SLICE_ARG(kit_slice_cstr(what)));
    366     return 0;
    367   }
    368   memset(new_items, 0, new_size ? new_size : 1u);
    369   if (*items && old_size) memcpy(new_items, *items, old_size);
    370   if (*items) h->free(h, *items, old_size ? old_size : 1u);
    371   *items = new_items;
    372   *cap = new_cap;
    373   return 1;
    374 }
    375 
    376 void toy_parser_advance(ToyParser* p) { p->cur = toy_lexer_next(&p->lex); }
    377 
    378 int toy_parser_match(ToyParser* p, ToyTokenKind kind) {
    379   if (p->cur.kind == kind) {
    380     toy_parser_advance(p);
    381     return 1;
    382   }
    383   return 0;
    384 }
    385 
    386 int toy_parser_expect(ToyParser* p, ToyTokenKind kind) {
    387   if (p->cur.kind == kind) {
    388     toy_parser_advance(p);
    389     return 1;
    390   }
    391   return 0;
    392 }
    393 
    394 void toy_error(ToyParser* p, KitSrcLoc loc, const char* fmt, ...) {
    395   va_list ap;
    396   p->has_error = 1;
    397   if (!p->diag) return;
    398   va_start(ap, fmt);
    399   p->diag->emit(p->diag, KIT_DIAG_ERROR, loc, fmt, ap);
    400   va_end(ap);
    401 }
    402 
    403 void toy_set_loc(ToyParser* p) {
    404   if (p->cg) kit_cg_set_loc(p->cg, p->cur.loc);
    405 }
    406 
    407 KitSym toy_tok_sym(ToyParser* p, ToyToken tok) {
    408   char buf[64];
    409   if (tok.text_len >= sizeof(buf)) {
    410     toy_error(p, tok.loc, "identifier too long");
    411     return 0;
    412   }
    413   memcpy(buf, tok.text, tok.text_len);
    414   buf[tok.text_len] = '\0';
    415   return kit_sym_intern(p->c, (KitSlice){.s = buf, .len = tok.text_len});
    416 }
    417 
    418 int toy_sym_is(ToyParser* p, KitSym sym, const char* name) {
    419   return sym == kit_sym_intern(p->c, kit_slice_cstr(name));
    420 }
    421 
    422 int toy_lookup_const(ToyParser* p, KitSym tok, const ToyConstRow* rows, size_t n,
    423                      const char* what, uint64_t* out) {
    424   size_t i;
    425   for (i = 0; i < n; ++i) {
    426     if (tok == kit_sym_intern(p->c, kit_slice_cstr(rows[i].name))) {
    427       *out = rows[i].value;
    428       return 1;
    429     }
    430   }
    431   toy_error(p, p->cur.loc, "unknown %s", what);
    432   return 0;
    433 }
    434 
    435 int toy_parse_dot_const(ToyParser* p, const ToyConstRow* rows, size_t n,
    436                         int strict_ident, const char* expected,
    437                         const char* unknown, uint64_t* out) {
    438   KitSym tok;
    439   if (strict_ident) {
    440     if (!toy_parser_expect(p, TOK_DOT) || p->cur.kind != TOK_IDENT) {
    441       toy_error(p, p->cur.loc, "expected %s", expected);
    442       return 0;
    443     }
    444     tok = toy_tok_sym(p, p->cur);
    445     toy_parser_advance(p);
    446   } else {
    447     (void)expected;
    448     if (!toy_parse_attr_dot_name(p, &tok)) return 0;
    449   }
    450   return toy_lookup_const(p, tok, rows, n, unknown, out);
    451 }
    452 
    453 int toy_parse_flag_set(ToyParser* p, const ToyConstRow* rows, size_t n,
    454                        const char* unknown, int comma_prefixed, uint32_t* mask) {
    455   for (;;) {
    456     KitSym name;
    457     uint64_t value;
    458     if (comma_prefixed) {
    459       if (!toy_parser_match(p, TOK_COMMA)) break;
    460     } else if (p->cur.kind == TOK_RPAREN || p->cur.kind == TOK_EOF) {
    461       break;
    462     }
    463     if (!toy_parse_attr_dot_name(p, &name)) return 0;
    464     if (!toy_lookup_const(p, name, rows, n, unknown, &value)) return 0;
    465     *mask |= (uint32_t)value;
    466     if (!comma_prefixed && !toy_parser_match(p, TOK_COMMA)) break;
    467   }
    468   return 1;
    469 }
    470 
    471 int toy_skip_attr_list_ex(ToyParser* p, int* has_static) {
    472   int bracket_depth = 0;
    473   int paren_depth = 0;
    474   if (has_static) *has_static = 0;
    475   if (!toy_parser_match(p, TOK_AT)) return 1;
    476   if (!toy_parser_expect(p, TOK_LBRACKET)) {
    477     toy_error(p, p->cur.loc, "expected '[' after '@'");
    478     return 0;
    479   }
    480   bracket_depth = 1;
    481   while (p->cur.kind != TOK_EOF && bracket_depth > 0) {
    482     if (p->cur.kind == TOK_DOT) {
    483       toy_parser_advance(p);
    484       if (p->cur.kind == TOK_IDENT && has_static && p->cur.text_len == 6 &&
    485           memcmp(p->cur.text, "static", 6) == 0) {
    486         *has_static = 1;
    487       }
    488       continue;
    489     }
    490     if (p->cur.kind == TOK_LBRACKET && paren_depth == 0)
    491       bracket_depth++;
    492     else if (p->cur.kind == TOK_RBRACKET && paren_depth == 0)
    493       bracket_depth--;
    494     else if (p->cur.kind == TOK_LPAREN)
    495       paren_depth++;
    496     else if (p->cur.kind == TOK_RPAREN && paren_depth > 0)
    497       paren_depth--;
    498     toy_parser_advance(p);
    499   }
    500   if (bracket_depth != 0) {
    501     toy_error(p, p->cur.loc, "unterminated attribute list");
    502     return 0;
    503   }
    504   return 1;
    505 }
    506 
    507 int toy_skip_attr_list(ToyParser* p) { return toy_skip_attr_list_ex(p, NULL); }