kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

pp_directive.c (41401B)


      1 /* pp_directive.c — if-stack, PP expression evaluator, #include search/open,
      2  * #line, #pragma, #error, #embed, and directive dispatch. */
      3 
      4 #include "pp/pp_priv.h"
      5 
      6 static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap,
      7                         size_t* out_len);
      8 
      9 /* ============================================================
     10  * If-stack
     11  * ============================================================ */
     12 
     13 static void if_push(Pp* pp, IfFrame f) {
     14   if (pp->ifstk_n == pp->ifstk_cap) {
     15     u32 nc = pp->ifstk_cap ? pp->ifstk_cap * 2 : 4;
     16     pp->ifstk = pp_xrealloc(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap,
     17                             sizeof(IfFrame) * nc, _Alignof(IfFrame));
     18     pp->ifstk_cap = nc;
     19   }
     20   pp->ifstk[pp->ifstk_n++] = f;
     21 }
     22 
     23 static IfFrame* if_top(Pp* pp) {
     24   return pp->ifstk_n ? &pp->ifstk[pp->ifstk_n - 1] : NULL;
     25 }
     26 
     27 static void if_pop(Pp* pp) {
     28   if (pp->ifstk_n) --pp->ifstk_n;
     29 }
     30 
     31 /* ============================================================
     32  * Directive line reader
     33  * ============================================================ */
     34 
     35 /* Read tokens up through (and including) the next TOK_NEWLINE / TOK_EOF.
     36  * Drops the newline; collected tokens are arena-allocated and returned via
     37  * *out_toks/out_n. */
     38 void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n) {
     39   Tok* buf = NULL;
     40   u32 cap = 0, n = 0;
     41   Tok t;
     42   HidesetId hs;
     43   for (;;) {
     44     t = src_next_raw(pp, &hs, NULL);
     45     if (t.kind == TOK_NEWLINE || t.kind == TOK_EOF) break;
     46     if (n == cap) {
     47       u32 nc = cap ? cap * 2 : 8;
     48       Tok* nb = (Tok*)arena_alloc(pp->arena, sizeof(Tok) * nc, _Alignof(Tok));
     49       if (cap) memcpy(nb, buf, sizeof(Tok) * cap);
     50       buf = nb;
     51       cap = nc;
     52     }
     53     buf[n++] = t;
     54   }
     55   *out_toks = buf;
     56   *out_n = n;
     57 }
     58 
     59 /* ============================================================
     60  * PP expression evaluator (§6.10.1)
     61  * ============================================================ */
     62 
     63 /* Parse a C integer constant from a pp-number's spelling. Suffixes (u, l,
     64  * etc.) are ignored. Recognizes decimal, hex (0x...), and octal (0...). */
     65 static i64 parse_pp_int(const char* s, size_t n) {
     66   int base = 10;
     67   size_t i = 0;
     68   u64 val = 0; /* unsigned: #if arithmetic wraps on overflow, signed would be UB */
     69   if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
     70     base = 16;
     71     i = 2;
     72   } else if (n >= 1 && s[0] == '0') {
     73     base = 8;
     74     i = 1;
     75   }
     76   for (; i < n; ++i) {
     77     char c = s[i];
     78     int d;
     79     if (c >= '0' && c <= '9')
     80       d = c - '0';
     81     else if (base == 16 && c >= 'a' && c <= 'f')
     82       d = c - 'a' + 10;
     83     else if (base == 16 && c >= 'A' && c <= 'F')
     84       d = c - 'A' + 10;
     85     else
     86       break;
     87     if (d >= base) break;
     88     val = val * (u64)base + (u64)d;
     89   }
     90   return (i64)val;
     91 }
     92 
     93 /* Pre-pass: replace `defined X` / `defined ( X )` with a 0/1 pp-number,
     94  * preserving the rest of the token sequence. The operand of `defined` is
     95  * NOT macro-expanded. Output is a fresh TokVec. */
     96 static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
     97   u32 i;
     98   for (i = 0; i < nin; ++i) {
     99     if (in[i].kind == TOK_IDENT && in[i].v.ident == pp->sym_defined) {
    100       int has_paren = 0;
    101       Sym ident = 0;
    102       u32 j = i + 1;
    103       if (j < nin && in[j].kind == TOK_PUNCT && in[j].v.punct == '(') {
    104         has_paren = 1;
    105         ++j;
    106       }
    107       if (j >= nin || in[j].kind != TOK_IDENT) {
    108         compiler_panic(pp->c, in[i].loc,
    109                        "operand of 'defined' must be an identifier");
    110       }
    111       ident = in[j].v.ident;
    112       ++j;
    113       if (has_paren) {
    114         if (j >= nin || in[j].kind != TOK_PUNCT || in[j].v.punct != ')') {
    115           compiler_panic(pp->c, in[i].loc,
    116                          "expected ')' after 'defined' operand");
    117         }
    118         ++j;
    119       }
    120       {
    121         Tok t;
    122         memset(&t, 0, sizeof(t));
    123         t.kind = TOK_NUM;
    124         t.flags = in[i].flags & (TF_AT_BOL | TF_HAS_SPACE);
    125         t.loc = in[i].loc;
    126         t.spelling =
    127             kit_sym_intern(pp->pool->c, mt_get(pp, ident) ? KIT_SLICE_LIT("1")
    128                                                           : KIT_SLICE_LIT("0"));
    129         tv_push(pp, out, t);
    130       }
    131       i = j - 1;
    132     } else {
    133       tv_push(pp, out, in[i]);
    134     }
    135   }
    136 }
    137 
    138 /* Macro-expand a sequence of pre-#if tokens to completion.
    139  *
    140  * Sets pp->in_if_expansion for the duration so pp_next_raw can keep
    141  * `defined`-operator operands raw even when they ride out of a macro
    142  * body via the ## operator. Without this flag a macro body like
    143  *   #define G(x) (!defined(__G_DEFINED_ ## x))
    144  * would have the pasted operand expanded if it happens to name an
    145  * already-defined macro, leaving the second prepass to choke on
    146  * `defined()`. */
    147 static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
    148   Tok* slice;
    149   u8 saved;
    150   if (nin == 0) return;
    151   slice = arena_array(pp->arena, Tok, nin);
    152   memcpy(slice, in, sizeof(Tok) * nin);
    153   saved = pp->in_if_expansion;
    154   pp->in_if_expansion = 1;
    155   pp->defined_skip = 0;
    156   expand_arg_to_eof(pp, slice, NULL, nin, out);
    157   pp->in_if_expansion = saved;
    158   pp->defined_skip = 0;
    159 }
    160 
    161 /* Replace remaining identifiers with `0` per §6.10.1 ¶4, after `defined`
    162  * has been handled. */
    163 static void replace_remaining_if_identifiers(Pp* pp, TokVec* toks) {
    164   u32 i;
    165   Sym zero = kit_sym_intern(pp->pool->c, KIT_SLICE_LIT("0"));
    166   for (i = 0; i < toks->n; ++i) {
    167     if (toks->data[i].kind == TOK_IDENT) {
    168       toks->data[i].kind = TOK_NUM;
    169       toks->data[i].spelling = zero;
    170     }
    171   }
    172 }
    173 
    174 /* Recursive-descent expression evaluator over an expanded token list. */
    175 typedef struct EE {
    176   Pp* pp;
    177   const Tok* toks;
    178   u32 n;
    179   u32 pos;
    180   SrcLoc loc;
    181   SrcLoc op_loc; /* loc of the binary operator being applied (for panics) */
    182 } EE;
    183 
    184 static i64 ee_ternary(EE* e);
    185 
    186 static const Tok* ee_peek(EE* e) {
    187   return e->pos < e->n ? &e->toks[e->pos] : NULL;
    188 }
    189 
    190 static int ee_match_punct(EE* e, u32 p) {
    191   const Tok* t = ee_peek(e);
    192   if (t && t->kind == TOK_PUNCT && t->v.punct == p) {
    193     ++e->pos;
    194     return 1;
    195   }
    196   return 0;
    197 }
    198 
    199 static i64 ee_primary(EE* e) {
    200   const Tok* t = ee_peek(e);
    201   if (!t) compiler_panic(e->pp->c, e->loc, "#if: missing operand");
    202   if (t->kind == TOK_NUM) {
    203     KitSlice s = kit_sym_str(e->pp->pool->c, t->spelling);
    204     ++e->pos;
    205     return parse_pp_int(s.s, s.len);
    206   }
    207   if (t->kind == TOK_CHR) {
    208     /* Treat as the codepoint of the first character (post-decoding
    209      * not implemented; cover the common case of a single ASCII
    210      * char). */
    211     KitSlice s = kit_sym_str(e->pp->pool->c, t->spelling);
    212     ++e->pos;
    213     if (s.len >= 3 && s.s[0] == '\'') return (unsigned char)s.s[1];
    214     return 0;
    215   }
    216   if (t->kind == TOK_PUNCT && t->v.punct == '(') {
    217     i64 v;
    218     ++e->pos;
    219     v = ee_ternary(e);
    220     if (!ee_match_punct(e, ')')) {
    221       compiler_panic(e->pp->c, t->loc, "#if: expected ')'");
    222     }
    223     return v;
    224   }
    225   compiler_panic(e->pp->c, t->loc, "#if: unexpected token in expression");
    226   return 0;
    227 }
    228 
    229 static i64 ee_unary(EE* e) {
    230   const Tok* t = ee_peek(e);
    231   if (t && t->kind == TOK_PUNCT) {
    232     u32 p = t->v.punct;
    233     if (p == '!' || p == '-' || p == '+' || p == '~') {
    234       i64 v;
    235       ++e->pos;
    236       v = ee_unary(e);
    237       switch (p) {
    238         case '!':
    239           return v ? 0 : 1;
    240         case '-':
    241           return -v;
    242         case '+':
    243           return v;
    244         case '~':
    245           return ~v;
    246       }
    247     }
    248   }
    249   return ee_primary(e);
    250 }
    251 
    252 /* One row per binary operator, highest `prec` binds tightest. All listed
    253  * operators are left-associative; ternary (right-assoc) stays special-cased
    254  * in ee_ternary. `apply` folds (lhs OP rhs) and owns the div/mod-by-zero
    255  * panic (it needs the operator loc, threaded via EE::op_loc). */
    256 typedef i64 (*EeApply)(EE* e, i64 a, i64 b);
    257 
    258 static i64 eb_mul(EE* e, i64 a, i64 b) {
    259   (void)e;
    260   return a * b;
    261 }
    262 static i64 eb_div(EE* e, i64 a, i64 b) {
    263   if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: division by zero");
    264   return a / b;
    265 }
    266 static i64 eb_mod(EE* e, i64 a, i64 b) {
    267   if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: modulo by zero");
    268   return a % b;
    269 }
    270 static i64 eb_add(EE* e, i64 a, i64 b) {
    271   (void)e;
    272   return a + b;
    273 }
    274 static i64 eb_sub(EE* e, i64 a, i64 b) {
    275   (void)e;
    276   return a - b;
    277 }
    278 static i64 eb_shl(EE* e, i64 a, i64 b) {
    279   (void)e;
    280   return a << b;
    281 }
    282 static i64 eb_shr(EE* e, i64 a, i64 b) {
    283   (void)e;
    284   return a >> b;
    285 }
    286 static i64 eb_lt(EE* e, i64 a, i64 b) {
    287   (void)e;
    288   return a < b;
    289 }
    290 static i64 eb_gt(EE* e, i64 a, i64 b) {
    291   (void)e;
    292   return a > b;
    293 }
    294 static i64 eb_le(EE* e, i64 a, i64 b) {
    295   (void)e;
    296   return a <= b;
    297 }
    298 static i64 eb_ge(EE* e, i64 a, i64 b) {
    299   (void)e;
    300   return a >= b;
    301 }
    302 static i64 eb_eq(EE* e, i64 a, i64 b) {
    303   (void)e;
    304   return a == b;
    305 }
    306 static i64 eb_ne(EE* e, i64 a, i64 b) {
    307   (void)e;
    308   return a != b;
    309 }
    310 static i64 eb_band(EE* e, i64 a, i64 b) {
    311   (void)e;
    312   return a & b;
    313 }
    314 static i64 eb_bxor(EE* e, i64 a, i64 b) {
    315   (void)e;
    316   return a ^ b;
    317 }
    318 static i64 eb_bor(EE* e, i64 a, i64 b) {
    319   (void)e;
    320   return a | b;
    321 }
    322 static i64 eb_logand(EE* e, i64 a, i64 b) {
    323   (void)e;
    324   return a && b;
    325 }
    326 static i64 eb_logor(EE* e, i64 a, i64 b) {
    327   (void)e;
    328   return a || b;
    329 }
    330 
    331 typedef struct EeOp {
    332   u32 punct; /* P_* / ASCII codepoint of the operator token */
    333   u8 prec;   /* higher binds tighter */
    334   EeApply apply;
    335 } EeOp;
    336 
    337 static const EeOp ee_ops[] = {
    338     {'*', 10, eb_mul},   {'/', 10, eb_div},     {'%', 10, eb_mod},
    339     {'+', 9, eb_add},    {'-', 9, eb_sub},      {P_SHL, 8, eb_shl},
    340     {P_SHR, 8, eb_shr},  {'<', 7, eb_lt},       {'>', 7, eb_gt},
    341     {P_LE, 7, eb_le},    {P_GE, 7, eb_ge},      {P_EQ, 6, eb_eq},
    342     {P_NE, 6, eb_ne},    {'&', 5, eb_band},     {'^', 4, eb_bxor},
    343     {'|', 3, eb_bor},    {P_AND, 2, eb_logand}, {P_OR, 1, eb_logor},
    344 };
    345 
    346 static const EeOp* ee_lookup_op(const Tok* t) {
    347   size_t i;
    348   if (!t || t->kind != TOK_PUNCT) return NULL;
    349   for (i = 0; i < sizeof(ee_ops) / sizeof(ee_ops[0]); ++i) {
    350     if (ee_ops[i].punct == t->v.punct) return &ee_ops[i];
    351   }
    352   return NULL;
    353 }
    354 
    355 /* Precedence-climbing fold of all left-associative binary operators. */
    356 static i64 ee_binary(EE* e, int min_prec) {
    357   i64 v = ee_unary(e);
    358   for (;;) {
    359     const Tok* t = ee_peek(e);
    360     const EeOp* op = ee_lookup_op(t);
    361     SrcLoc op_loc;
    362     i64 rhs;
    363     if (!op || op->prec < min_prec) break;
    364     op_loc = t->loc;
    365     ++e->pos;
    366     /* Left-associative: parse the RHS with strictly higher precedence so
    367      * same-prec operators fold left-to-right. */
    368     rhs = ee_binary(e, op->prec + 1);
    369     e->op_loc = op_loc;
    370     v = op->apply(e, v, rhs);
    371   }
    372   return v;
    373 }
    374 
    375 static i64 ee_ternary(EE* e) {
    376   i64 c = ee_binary(e, 1);
    377   if (ee_match_punct(e, '?')) {
    378     i64 a = ee_ternary(e);
    379     i64 b;
    380     if (!ee_match_punct(e, ':')) {
    381       compiler_panic(e->pp->c, e->loc, "#if: ':' expected in ternary");
    382     }
    383     b = ee_ternary(e);
    384     return c ? a : b;
    385   }
    386   return c;
    387 }
    388 
    389 i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
    390   TokVec defs = {0};
    391   TokVec exp = {0};
    392   TokVec defs2 = {0};
    393   EE e;
    394   i64 v;
    395 
    396   prepass_defined(pp, line, n, &defs);
    397   expand_for_if(pp, defs.data, defs.n, &exp);
    398   prepass_defined(pp, exp.data, exp.n, &defs2);
    399   replace_remaining_if_identifiers(pp, &defs2);
    400 
    401   e.pp = pp;
    402   e.toks = defs2.data;
    403   e.n = defs2.n;
    404   e.pos = 0;
    405   e.loc = loc;
    406   v = ee_ternary(&e);
    407   if (e.pos != e.n) {
    408     compiler_panic(pp->c, e.loc,
    409                    "#if: unexpected trailing tokens in expression");
    410   }
    411   return v;
    412 }
    413 
    414 /* ============================================================
    415  * Conditional inclusion helpers
    416  * ============================================================ */
    417 
    418 static void consume_to_newline(Pp* pp) {
    419   Tok t;
    420   do {
    421     t = src_next_raw(pp, NULL, NULL);
    422   } while (t.kind != TOK_NEWLINE && t.kind != TOK_EOF);
    423 }
    424 
    425 /* Drive the source forward consuming tokens until we either:
    426  *   - reach a balancing #endif (pops the frame, returns), or
    427  *   - reach a #elif / #else that flips the top frame to IF_INCLUDE
    428  *     (returns with that frame active).
    429  * Nested #if directives inside the skipped group are tracked via
    430  * `local_depth`. Unrecognised directives in skipped groups are tolerated
    431  * (§6.10 ¶4, covered by `8c_skipped_relaxed_syntax`). */
    432 static void skip_until_active(Pp* pp) {
    433   int local_depth = 0;
    434   while (pp->ifstk_n > 0) {
    435     IfFrame* top = if_top(pp);
    436     Tok t;
    437     if (top->state == IF_INCLUDE && local_depth == 0) return;
    438     t = src_next_raw(pp, NULL, NULL);
    439     if (t.kind == TOK_EOF) {
    440       compiler_panic(pp->c, top->loc, "unterminated #if / #ifdef");
    441     }
    442     if (t.kind != TOK_PP_HASH || (t.flags & TF_AT_BOL) == 0) continue;
    443 
    444     /* Read directive name (or null directive). */
    445     {
    446       Tok nt = src_next_raw(pp, NULL, NULL);
    447       Sym name;
    448       if (nt.kind == TOK_NEWLINE || nt.kind == TOK_EOF) continue;
    449       if (nt.kind != TOK_IDENT) {
    450         consume_to_newline(pp);
    451         continue;
    452       }
    453       name = nt.v.ident;
    454       if (name == pp->sym_if || name == pp->sym_ifdef ||
    455           name == pp->sym_ifndef) {
    456         ++local_depth;
    457         consume_to_newline(pp);
    458         continue;
    459       }
    460       if (name == pp->sym_endif) {
    461         consume_to_newline(pp);
    462         if (local_depth > 0) {
    463           --local_depth;
    464           continue;
    465         }
    466         if_pop(pp);
    467         return;
    468       }
    469       if (name == pp->sym_else) {
    470         consume_to_newline(pp);
    471         if (local_depth > 0) continue;
    472         if (top->has_else) {
    473           compiler_panic(pp->c, t.loc, "duplicate #else");
    474         }
    475         top->has_else = 1;
    476         if (top->state == IF_SEEK_TRUE) {
    477           top->state = IF_INCLUDE;
    478           return;
    479         }
    480         top->state = IF_DONE;
    481         continue;
    482       }
    483       if (name == pp->sym_elif) {
    484         if (local_depth > 0 || top->has_else || top->state == IF_DONE) {
    485           consume_to_newline(pp);
    486           continue;
    487         }
    488         if (top->state == IF_SEEK_TRUE) {
    489           Tok* line;
    490           u32 ln;
    491           i64 v;
    492           read_directive_line(pp, &line, &ln);
    493           v = eval_if_expr(pp, line, ln, t.loc);
    494           if (v != 0) {
    495             top->state = IF_INCLUDE;
    496             return;
    497           }
    498           continue;
    499         }
    500         /* Was IF_INCLUDE; #elif means we're done. (Should already
    501          * have been transitioned to DONE before entering this
    502          * skip — defensive.) */
    503         top->state = IF_DONE;
    504         consume_to_newline(pp);
    505         continue;
    506       }
    507       /* Other directive — relaxed: skip silently. */
    508       consume_to_newline(pp);
    509       continue;
    510     }
    511   }
    512 }
    513 
    514 /* ============================================================
    515  * Predefined macro name guard
    516  * ============================================================ */
    517 
    518 static int is_predefined_macro_name(Pp* pp, Sym name) {
    519   return name == pp->sym_va_args || name == pp->sym_line__ ||
    520          name == pp->sym_file__ || name == pp->sym_date__ ||
    521          name == pp->sym_time__;
    522   /* __STDC__/__STDC_HOSTED__/__STDC_VERSION__ are registered as real
    523    * macros, so the macro-table lookup catches them. */
    524 }
    525 
    526 /* ============================================================
    527  * #ifdef / #if / #elif / #else / #endif
    528  * ============================================================ */
    529 
    530 static void do_ifdef(Pp* pp, const Tok* line, u32 n, int negate, SrcLoc loc) {
    531   int defined;
    532   IfFrame f;
    533   if (n < 1 || line[0].kind != TOK_IDENT) {
    534     compiler_panic(pp->c, loc,
    535                    negate ? "#ifndef: expected identifier"
    536                           : "#ifdef: expected identifier");
    537   }
    538   defined = (mt_get(pp, line[0].v.ident) != NULL) ||
    539             is_predefined_macro_name(pp, line[0].v.ident);
    540   if (negate) defined = !defined;
    541   memset(&f, 0, sizeof(f));
    542   f.state = defined ? IF_INCLUDE : IF_SEEK_TRUE;
    543   f.loc = loc;
    544   if_push(pp, f);
    545   if (!defined) skip_until_active(pp);
    546 }
    547 
    548 static void do_if_directive(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
    549   i64 v = eval_if_expr(pp, line, n, loc);
    550   IfFrame f;
    551   memset(&f, 0, sizeof(f));
    552   f.state = v ? IF_INCLUDE : IF_SEEK_TRUE;
    553   f.loc = loc;
    554   if_push(pp, f);
    555   if (!v) skip_until_active(pp);
    556 }
    557 
    558 static void do_elif(Pp* pp, SrcLoc loc) {
    559   /* We only reach do_elif from the active branch — meaning the
    560    * preceding group emitted code. So we must skip the rest. */
    561   IfFrame* top = if_top(pp);
    562   if (!top) compiler_panic(pp->c, loc, "stray #elif");
    563   if (top->has_else) compiler_panic(pp->c, loc, "#elif after #else");
    564   top->state = IF_DONE;
    565   skip_until_active(pp);
    566 }
    567 
    568 static void do_else(Pp* pp, SrcLoc loc) {
    569   IfFrame* top = if_top(pp);
    570   if (!top) compiler_panic(pp->c, loc, "stray #else");
    571   if (top->has_else) compiler_panic(pp->c, loc, "duplicate #else");
    572   top->has_else = 1;
    573   top->state = IF_DONE;
    574   skip_until_active(pp);
    575 }
    576 
    577 static void do_endif(Pp* pp, SrcLoc loc) {
    578   if (!if_top(pp)) compiler_panic(pp->c, loc, "stray #endif");
    579   if_pop(pp);
    580 }
    581 
    582 /* ============================================================
    583  * #include (§6.10.2)
    584  * ============================================================ */
    585 
    586 /* Read `path` via the host's file_io and copy its bytes into the pp
    587  * arena so they outlive io->release. Returns 1 on success. */
    588 static int try_open_include(Pp* pp, const char* path, const u8** data_out,
    589                             size_t* size_out) {
    590   KitFileData fd;
    591   const KitFileIO* io;
    592   u8* buf;
    593 
    594   memset(&fd, 0, sizeof(fd));
    595   io = kit_compiler_context(pp->c)->file_io;
    596   if (!io || !io->read_all) {
    597     compiler_panic(pp->c, (SrcLoc){0, 0, 0},
    598                    "#include: env.file_io is not configured");
    599   }
    600   if (io->read_all(io->user, path, &fd) != KIT_OK) return 0;
    601   {
    602     size_t sz = fd.size;
    603     buf = (u8*)arena_alloc(pp->arena, sz ? sz : 1, 1);
    604     if (sz && fd.data) memcpy(buf, fd.data, sz);
    605     if (io->release) io->release(io->user, &fd); /* zeros fd */
    606     *data_out = buf;
    607     *size_out = sz;
    608   }
    609   return 1;
    610 }
    611 
    612 /* Return the includer's directory for resolving a quoted include, or "."
    613  * for in-memory/builtin sources (where CWD is the natural fallback, like
    614  * gcc treats stdin). `dir_out` must point to a buffer of size >= cap. */
    615 static int includer_dir(Pp* pp, SrcLoc loc, char* dir_out, size_t cap) {
    616   KitSourceFile sf;
    617   const char* p = NULL;
    618   size_t plen = 0;
    619   const char* slash;
    620   size_t dlen;
    621   memset(&sf, 0, sizeof(sf));
    622   if (kit_source_file(pp->c, loc.file_id, &sf) == 0 && sf.name) {
    623     KitSlice s = kit_sym_str(pp->pool->c, sf.name);
    624     p = s.s;
    625     plen = s.len;
    626   }
    627   if (!p || plen == 0 || p[0] == '<') {
    628     if (cap < 2) return 0;
    629     dir_out[0] = '.';
    630     dir_out[1] = 0;
    631     return 1;
    632   }
    633   slash = NULL;
    634   {
    635     size_t i;
    636     for (i = plen; i > 0; --i) {
    637       if (p[i - 1] == '/') {
    638         slash = p + i - 1;
    639         break;
    640       }
    641     }
    642   }
    643   if (!slash) {
    644     if (cap < 2) return 0;
    645     dir_out[0] = '.';
    646     dir_out[1] = 0;
    647     return 1;
    648   }
    649   dlen = (size_t)(slash - p);
    650   if (dlen == 0) dlen = 1; /* path was "/x" — dir is "/" */
    651   if (dlen + 1 > cap) return 0;
    652   memcpy(dir_out, p, dlen);
    653   dir_out[dlen] = 0;
    654   return 1;
    655 }
    656 
    657 /* Search for a header. Absolute paths are opened verbatim. Quoted form
    658  * ("...") additionally searches the directory of the file containing the
    659  * #include first (per C §6.10.2); bracket form (<...>) skips that step.
    660  * Both forms then walk the configured -I / -isystem dirs in order. */
    661 static int find_and_open_include(Pp* pp, const char* path, int system,
    662                                  SrcLoc loc, const u8** data, size_t* size,
    663                                  char* resolved, size_t resolved_cap) {
    664   char buf[4096];
    665   u32 i;
    666   size_t plen = kit_slice_cstr(path).len;
    667 
    668   if (plen > 0 && path[0] == '/') {
    669     if (try_open_include(pp, path, data, size)) {
    670       if (plen + 1 > resolved_cap) return 0;
    671       memcpy(resolved, path, plen + 1);
    672       return 1;
    673     }
    674     return 0;
    675   }
    676 
    677   if (!system) {
    678     char dir[4096];
    679     if (includer_dir(pp, loc, dir, sizeof(dir))) {
    680       size_t dlen = kit_slice_cstr(dir).len;
    681       if (dlen + 1 + plen + 1 <= sizeof(buf)) {
    682         memcpy(buf, dir, dlen);
    683         buf[dlen] = '/';
    684         memcpy(buf + dlen + 1, path, plen);
    685         buf[dlen + 1 + plen] = 0;
    686         if (try_open_include(pp, buf, data, size)) {
    687           if (dlen + 1 + plen + 1 > resolved_cap) return 0;
    688           memcpy(resolved, buf, dlen + 1 + plen + 1);
    689           return 1;
    690         }
    691       }
    692     }
    693   }
    694   for (i = 0; i < pp->ninc_dirs; ++i) {
    695     const char* d = pp->inc_dirs[i].path;
    696     size_t dlen = kit_slice_cstr(d).len;
    697     if (dlen + 1 + plen + 1 > sizeof(buf)) continue;
    698     memcpy(buf, d, dlen);
    699     buf[dlen] = '/';
    700     memcpy(buf + dlen + 1, path, plen);
    701     buf[dlen + 1 + plen] = 0;
    702     if (try_open_include(pp, buf, data, size)) {
    703       if (dlen + 1 + plen + 1 > resolved_cap) return 0;
    704       memcpy(resolved, buf, dlen + 1 + plen + 1);
    705       return 1;
    706     }
    707   }
    708   return 0;
    709 }
    710 
    711 /* Parse the directive arguments into (path, system_flag). Handles:
    712  *   - directly-lexed TOK_HEADER: < ... > or " ... "
    713  *   - macro-replaced form: line is macro-expanded, then expected to
    714  *     produce either a TOK_STR ("...") or a < ... > sequence. */
    715 static void parse_include_path(Pp* pp, const Tok* line, u32 n, SrcLoc loc,
    716                                char* path_out, size_t cap, int* system_out) {
    717   if (n == 0) compiler_panic(pp->c, loc, "#include: missing path");
    718 
    719   if (line[0].kind == TOK_HEADER) {
    720     KitSlice sl = kit_sym_str(pp->pool->c, line[0].spelling);
    721     const char* s = sl.s;
    722     size_t slen = sl.len;
    723     if (slen < 2) compiler_panic(pp->c, loc, "#include: malformed header name");
    724     if (s[0] == '<' && s[slen - 1] == '>')
    725       *system_out = 1;
    726     else if (s[0] == '"' && s[slen - 1] == '"')
    727       *system_out = 0;
    728     else
    729       compiler_panic(pp->c, loc, "#include: malformed header name");
    730     if (slen - 2 + 1 > cap)
    731       compiler_panic(pp->c, loc, "#include: path too long");
    732     memcpy(path_out, s + 1, slen - 2);
    733     path_out[slen - 2] = 0;
    734     return;
    735   }
    736 
    737   /* Macro-replaced form. */
    738   {
    739     TokVec exp = {0};
    740     Tok* slice = arena_array(pp->arena, Tok, n);
    741     memcpy(slice, line, sizeof(Tok) * n);
    742     expand_arg_to_eof(pp, slice, NULL, n, &exp);
    743 
    744     if (exp.n == 0) {
    745       compiler_panic(pp->c, loc, "#include: empty after macro replacement");
    746     }
    747     if (exp.data[0].kind == TOK_STR) {
    748       KitSlice sl = kit_sym_str(pp->pool->c, exp.data[0].spelling);
    749       const char* s = sl.s;
    750       size_t slen = sl.len;
    751       if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
    752         compiler_panic(pp->c, loc, "#include: malformed string");
    753       }
    754       if (slen - 2 + 1 > cap) {
    755         compiler_panic(pp->c, loc, "#include: path too long");
    756       }
    757       memcpy(path_out, s + 1, slen - 2);
    758       path_out[slen - 2] = 0;
    759       *system_out = 0;
    760       return;
    761     }
    762     if (exp.data[0].kind == TOK_PUNCT && exp.data[0].v.punct == '<') {
    763       size_t pos = 0;
    764       u32 i;
    765       for (i = 1; i < exp.n; ++i) {
    766         size_t slen = 0;
    767         const char* s = NULL;
    768         if (exp.data[i].kind == TOK_PUNCT && exp.data[i].v.punct == '>') {
    769           break;
    770         }
    771         if (exp.data[i].spelling) {
    772           KitSlice sl = kit_sym_str(pp->pool->c, exp.data[i].spelling);
    773           s = sl.s;
    774           slen = sl.len;
    775         }
    776         if (s && pos + slen + 1 <= cap) {
    777           memcpy(path_out + pos, s, slen);
    778           pos += slen;
    779         }
    780       }
    781       path_out[pos] = 0;
    782       *system_out = 1;
    783       return;
    784     }
    785     compiler_panic(pp->c, loc,
    786                    "#include: expected \"...\" or <...> after expansion");
    787   }
    788 }
    789 
    790 static void do_include(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
    791   char path[4096];
    792   char resolved[4096];
    793   int system_form = 0;
    794   const u8* data;
    795   size_t size;
    796   Lexer* lex;
    797   u32 includer_id = 0;
    798   u32 included_id;
    799   u32 i;
    800   TokSrc s;
    801 
    802   parse_include_path(pp, line, n, loc, path, sizeof(path), &system_form);
    803 
    804   if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
    805                              sizeof(resolved))) {
    806     compiler_panic(pp->c, loc, "#include: file not found: %.*s",
    807                    KIT_SLICE_ARG(kit_slice_cstr(path)));
    808   }
    809 
    810   /* Walk the source stack to find the current includer's file_id. */
    811   for (i = pp->nsources; i > 0; --i) {
    812     TokSrc* tp = &pp->sources[i - 1];
    813     if (tp->kind == SRC_LEX && tp->lex) {
    814       includer_id = lex_file_id(tp->lex);
    815       break;
    816     }
    817   }
    818 
    819   lex = lex_open_mem(pp->c, resolved, (const char*)data, size);
    820   included_id = lex_file_id(lex);
    821 
    822   memset(&s, 0, sizeof(s));
    823   s.kind = SRC_LEX;
    824   s.lex = lex;
    825   src_push(pp, s);
    826 
    827   kit_source_add_include(pp->c, includer_id, included_id, loc, system_form);
    828 }
    829 
    830 /* ============================================================
    831  * #line (§6.10.4)
    832  * ============================================================ */
    833 
    834 /* Find the topmost SRC_LEX source on the stack — that's the "current
    835  * file" whose line/file should track #line directives. */
    836 TokSrc* current_lex_src(Pp* pp) {
    837   u32 i;
    838   for (i = pp->nsources; i > 0; --i) {
    839     TokSrc* s = &pp->sources[i - 1];
    840     if (s->kind == SRC_LEX) return s;
    841   }
    842   return NULL;
    843 }
    844 
    845 static void do_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
    846   /* Macro-replace arguments first (a2). */
    847   TokVec exp = {0};
    848   Tok* slice;
    849   TokSrc* lex_src;
    850   i64 target_line;
    851   Sym target_file = 0;
    852 
    853   if (n == 0) compiler_panic(pp->c, loc, "#line: missing arguments");
    854   slice = arena_array(pp->arena, Tok, n);
    855   memcpy(slice, line, sizeof(Tok) * n);
    856   expand_arg_to_eof(pp, slice, NULL, n, &exp);
    857 
    858   if (exp.n == 0 || exp.data[0].kind != TOK_NUM) {
    859     compiler_panic(pp->c, loc, "#line: expected line number");
    860   }
    861   {
    862     KitSlice s = kit_sym_str(pp->pool->c, exp.data[0].spelling);
    863     target_line = parse_pp_int(s.s, s.len);
    864   }
    865   if (exp.n >= 2) {
    866     if (exp.data[1].kind != TOK_STR) {
    867       compiler_panic(pp->c, loc, "#line: file argument must be a string");
    868     }
    869     {
    870       KitSlice s = kit_sym_str(pp->pool->c, exp.data[1].spelling);
    871       if (s.len >= 2 && s.s[0] == '"' && s.s[s.len - 1] == '"') {
    872         /* Destringize to logical bytes (undo \" and \\): file_override is
    873          * stored unescaped, like a real source path, and __FILE__ re-escapes
    874          * it uniformly when expanded. */
    875         char* fbuf = (char*)arena_alloc(pp->arena, s.len, 1);
    876         size_t flen = 0;
    877         destringize(pp, &exp.data[1], fbuf, s.len, &flen);
    878         target_file =
    879             kit_sym_intern(pp->pool->c, (KitSlice){.s = fbuf, .len = flen});
    880       }
    881     }
    882   }
    883 
    884   lex_src = current_lex_src(pp);
    885   if (!lex_src) compiler_panic(pp->c, loc, "#line outside any file");
    886   {
    887     /* The next token (post-directive-NL) currently has lex.line ==
    888      * <lex's line counter>. Set delta so its user-visible line ==
    889      * target_line. */
    890     SrcLoc here = lex_loc(lex_src->lex);
    891     lex_src->line_delta = (i32)target_line - (i32)here.line;
    892     if (target_file) lex_src->file_override = target_file;
    893   }
    894 }
    895 
    896 /* ============================================================
    897  * #pragma + _Pragma (§6.10.6, §6.10.9)
    898  * ============================================================ */
    899 
    900 /* Push the unmodified directive line back onto the source stack as a
    901  * buffer, so pp_emit_text writes it as-is. SRC_BUF gates directive
    902  * recognition off, so this won't recurse. */
    903 void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
    904   TokVec out = {0};
    905   HidesetId* hids;
    906   u32 i;
    907   Tok hash, ident, nl;
    908 
    909   memset(&hash, 0, sizeof(hash));
    910   hash.kind = TOK_PP_HASH;
    911   hash.flags = TF_AT_BOL;
    912   hash.loc = loc;
    913   hash.spelling = kit_sym_intern(pp->pool->c, KIT_SLICE_LIT("#"));
    914   tv_push(pp, &out, hash);
    915 
    916   memset(&ident, 0, sizeof(ident));
    917   ident.kind = TOK_IDENT;
    918   ident.flags = 0;
    919   ident.loc = loc;
    920   ident.spelling = pp->sym_pragma_kw;
    921   ident.v.ident = pp->sym_pragma_kw;
    922   tv_push(pp, &out, ident);
    923 
    924   for (i = 0; i < n; ++i) {
    925     Tok t = line[i];
    926     /* Force a leading space between tokens. */
    927     t.flags |= TF_HAS_SPACE;
    928     if (i == 0) {
    929       /* Space between "pragma" and the first arg. */
    930     }
    931     tv_push(pp, &out, t);
    932   }
    933 
    934   memset(&nl, 0, sizeof(nl));
    935   nl.kind = TOK_NEWLINE;
    936   nl.loc = loc;
    937   tv_push(pp, &out, nl);
    938 
    939   hids = arena_array(pp->arena, HidesetId, out.n ? out.n : 1);
    940   for (i = 0; i < out.n; ++i) hids[i] = HS_EMPTY;
    941   push_buf(pp, out.data, hids, out.n);
    942 }
    943 
    944 static int pragma_num_u32(Pp* pp, const Tok* t, u32* out) {
    945   const char* s;
    946   size_t len;
    947   u32 v = 0;
    948   KitSlice sl;
    949   if (!t || t->kind != TOK_NUM || !out) return 0;
    950   sl = kit_sym_str(pp->pool->c, t->spelling);
    951   s = sl.s;
    952   len = sl.len;
    953   if (!s || len == 0) return 0;
    954   for (size_t i = 0; i < len; ++i) {
    955     if (s[i] < '0' || s[i] > '9') break;
    956     v = v * 10u + (u32)(s[i] - '0');
    957   }
    958   *out = v;
    959   return 1;
    960 }
    961 
    962 static void handle_pragma_pack(Pp* pp, const Tok* line, u32 n) {
    963   u32 i = 0;
    964   if (n < 3 || line[0].kind != TOK_IDENT) return;
    965   {
    966     KitSlice sl = kit_sym_str(pp->pool->c, line[0].v.ident);
    967     const char* s = sl.s;
    968     size_t len = sl.len;
    969     if (!s || len != 4 || memcmp(s, "pack", 4) != 0) return;
    970   }
    971   if (line[1].kind != TOK_PUNCT || line[1].v.punct != '(') return;
    972   i = 2;
    973   if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ')') {
    974     pp->pack_align = 0;
    975     return;
    976   }
    977   if (i < n && line[i].kind == TOK_IDENT) {
    978     KitSlice sl = kit_sym_str(pp->pool->c, line[i].v.ident);
    979     const char* s = sl.s;
    980     size_t len = sl.len;
    981     if (s && len == 4 && memcmp(s, "push", 4) == 0) {
    982       if (pp->pack_stack_n <
    983           (u32)(sizeof pp->pack_stack / sizeof pp->pack_stack[0])) {
    984         pp->pack_stack[pp->pack_stack_n++] = pp->pack_align;
    985       }
    986       ++i;
    987       if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ',') {
    988         u32 v = 0;
    989         ++i;
    990         if (i < n && pragma_num_u32(pp, &line[i], &v)) pp->pack_align = v;
    991       }
    992       return;
    993     }
    994     if (s && len == 3 && memcmp(s, "pop", 3) == 0) {
    995       if (pp->pack_stack_n) pp->pack_align = pp->pack_stack[--pp->pack_stack_n];
    996       return;
    997     }
    998   }
    999   {
   1000     u32 v = 0;
   1001     if (pragma_num_u32(pp, &line[i], &v)) pp->pack_align = v;
   1002   }
   1003 }
   1004 
   1005 static void do_pragma(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
   1006   /* Forward unrecognised pragmas to the output. STDC pragmas pass
   1007    * through too; we don't act on them yet. */
   1008   handle_pragma_pack(pp, line, n);
   1009   emit_pragma_line(pp, line, n, loc);
   1010 }
   1011 
   1012 /* Destringize a string literal token's content: strip surrounding quotes
   1013  * and undo the `\"` and `\\` escapes. Other escape sequences pass
   1014  * through verbatim — the result is fed back through the lexer, which
   1015  * does its own escape handling for any string literals nested inside. */
   1016 static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap,
   1017                         size_t* out_len) {
   1018   KitSlice sl = kit_sym_str(pp->pool->c, str_tok->spelling);
   1019   const char* s = sl.s;
   1020   size_t slen = sl.len;
   1021   size_t i, w = 0;
   1022   if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
   1023     compiler_panic(pp->c, str_tok->loc,
   1024                    "_Pragma: argument must be a string literal");
   1025   }
   1026   for (i = 1; i + 1 < slen; ++i) {
   1027     char c = s[i];
   1028     if (c == '\\' && i + 2 < slen && (s[i + 1] == '\\' || s[i + 1] == '"')) {
   1029       ++i;
   1030       c = s[i];
   1031     }
   1032     if (w + 1 >= cap)
   1033       compiler_panic(pp->c, str_tok->loc, "_Pragma: payload too long");
   1034     out[w++] = c;
   1035   }
   1036   out[w] = 0;
   1037   *out_len = w;
   1038 }
   1039 
   1040 /* Handle a `_Pragma("...")` invocation. Caller has consumed the
   1041  * `_Pragma` identifier. Reads `(` STR `)`, destringizes, re-lexes the
   1042  * payload, and emits a #pragma directive line. */
   1043 int try_expand_pragma_op(Pp* pp, const Tok* invoke) {
   1044   Tok lp, str, rp;
   1045   char buf[1024];
   1046   size_t buf_n = 0;
   1047   Lexer* lex;
   1048   TokVec args = {0};
   1049 
   1050   /* Peek '(' (skipping NL). Use peek_for_invoke_paren for consistency,
   1051    * but we need the saved-back behavior for a non-match. */
   1052   {
   1053     int saw_ws;
   1054     if (!peek_for_invoke_paren(pp, &saw_ws)) {
   1055       return 0; /* not an invocation; emit _Pragma as ident */
   1056     }
   1057     (void)saw_ws;
   1058   }
   1059   /* Read the string literal arg. */
   1060   {
   1061     HidesetId hs;
   1062     str = src_next_raw(pp, &hs, NULL);
   1063   }
   1064   if (str.kind != TOK_STR) {
   1065     compiler_panic(pp->c, invoke->loc, "_Pragma: expected string literal");
   1066   }
   1067   {
   1068     HidesetId hs;
   1069     rp = src_next_raw(pp, &hs, NULL);
   1070   }
   1071   if (rp.kind != TOK_PUNCT || rp.v.punct != ')') {
   1072     compiler_panic(pp->c, invoke->loc, "_Pragma: expected ')'");
   1073   }
   1074   (void)lp;
   1075 
   1076   destringize(pp, &str, buf, sizeof(buf) - 2, &buf_n);
   1077   /* Append a NL so the lexer terminates cleanly. */
   1078   buf[buf_n++] = '\n';
   1079   buf[buf_n] = 0;
   1080 
   1081   /* Re-lex into args. Bytes need to live until lex_close; copy into
   1082    * arena. */
   1083   {
   1084     char* arena_buf = (char*)arena_alloc(pp->arena, buf_n + 1, 1);
   1085     memcpy(arena_buf, buf, buf_n + 1);
   1086     lex = lex_open_mem(pp->c, "<_Pragma>", arena_buf, buf_n);
   1087   }
   1088   for (;;) {
   1089     Tok t = lex_next(lex);
   1090     if (t.kind == TOK_EOF || t.kind == TOK_NEWLINE) break;
   1091     tv_push(pp, &args, t);
   1092   }
   1093   lex_close(lex);
   1094 
   1095   emit_pragma_line(pp, args.data, args.n, invoke->loc);
   1096   return 1;
   1097 }
   1098 
   1099 /* ============================================================
   1100  * #error / #warning
   1101  * ============================================================ */
   1102 
   1103 static void directive_message(Pp* pp, const Tok* line, u32 n, CharBuf* cb) {
   1104   u32 i;
   1105   for (i = 0; i < n; ++i) {
   1106     KitSlice slc = line[i].spelling ? kit_sym_str(pp->pool->c, line[i].spelling)
   1107                                     : KIT_SLICE_NULL;
   1108     const char* s = slc.s;
   1109     size_t sl = slc.len;
   1110     if (i > 0) cb_putc(pp, cb, ' ');
   1111     if (s && sl) cb_append(pp, cb, s, (u32)sl);
   1112   }
   1113   cb_putc(pp, cb, 0);
   1114 }
   1115 
   1116 static void pp_warn(Pp* pp, SrcLoc loc, const char* fmt, ...) {
   1117   KitDiagSink* sink = kit_compiler_context(pp->c)->diag;
   1118   va_list ap;
   1119   if (sink && sink->emit) {
   1120     va_start(ap, fmt);
   1121     sink->emit(sink, KIT_DIAG_WARN, loc, fmt, ap);
   1122     va_end(ap);
   1123   }
   1124   if (sink) sink->warnings++;
   1125 }
   1126 
   1127 static void do_error(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
   1128   CharBuf cb = {0};
   1129   directive_message(pp, line, n, &cb);
   1130   compiler_panic(pp->c, loc, "#error: %.*s",
   1131                  KIT_SLICE_ARG(kit_slice_cstr(cb.data ? cb.data : "")));
   1132 }
   1133 
   1134 static void do_warning(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
   1135   CharBuf cb = {0};
   1136   directive_message(pp, line, n, &cb);
   1137   pp_warn(pp, loc, "#warning: %.*s",
   1138           KIT_SLICE_ARG(kit_slice_cstr(cb.data ? cb.data : "")));
   1139 }
   1140 
   1141 /* ============================================================
   1142  * #embed (C23, §6.10.* per N3033)
   1143  * ============================================================ */
   1144 
   1145 static void do_embed(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
   1146   char path[4096];
   1147   char resolved[4096];
   1148   int system_form = 0;
   1149   const u8* data;
   1150   size_t size;
   1151   u32 j;
   1152   /* Optional embed parameters parsed below. */
   1153   i64 limit_n = -1;
   1154   Tok* if_empty_toks = NULL;
   1155   u32 if_empty_n = 0;
   1156   /* Header-name path: first token. */
   1157   u32 arg_start = 0;
   1158 
   1159   if (n == 0) compiler_panic(pp->c, loc, "#embed: missing path");
   1160 
   1161   if (line[0].kind == TOK_HEADER) {
   1162     KitSlice slc = kit_sym_str(pp->pool->c, line[0].spelling);
   1163     const char* s = slc.s;
   1164     size_t sl = slc.len;
   1165     if (sl < 2) compiler_panic(pp->c, loc, "#embed: malformed header name");
   1166     if (s[0] == '<' && s[sl - 1] == '>')
   1167       system_form = 1;
   1168     else if (s[0] == '"' && s[sl - 1] == '"')
   1169       system_form = 0;
   1170     else
   1171       compiler_panic(pp->c, loc, "#embed: malformed header name");
   1172     memcpy(path, s + 1, sl - 2);
   1173     path[sl - 2] = 0;
   1174     arg_start = 1;
   1175   } else {
   1176     compiler_panic(pp->c, loc, "#embed: header-name argument required");
   1177   }
   1178 
   1179   /* Parse trailing parameters: limit(N), if_empty(...). */
   1180   j = arg_start;
   1181   while (j < n) {
   1182     if (line[j].kind == TOK_IDENT) {
   1183       KitSlice slc = kit_sym_str(pp->pool->c, line[j].v.ident);
   1184       const char* s = slc.s;
   1185       size_t sl = slc.len;
   1186       if (sl == 5 && memcmp(s, "limit", 5) == 0) {
   1187         if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
   1188             line[j + 1].v.punct != '(') {
   1189           compiler_panic(pp->c, loc, "#embed: expected '(' after limit");
   1190         }
   1191         j += 2;
   1192         if (j >= n || line[j].kind != TOK_NUM) {
   1193           compiler_panic(pp->c, loc, "#embed: limit() expects an integer");
   1194         }
   1195         {
   1196           KitSlice s2 = kit_sym_str(pp->pool->c, line[j].spelling);
   1197           limit_n = parse_pp_int(s2.s, s2.len);
   1198         }
   1199         ++j;
   1200         if (j >= n || line[j].kind != TOK_PUNCT || line[j].v.punct != ')') {
   1201           compiler_panic(pp->c, loc, "#embed: expected ')' to close limit");
   1202         }
   1203         ++j;
   1204         continue;
   1205       }
   1206       if (sl == 8 && memcmp(s, "if_empty", 8) == 0) {
   1207         u32 depth = 0;
   1208         u32 start;
   1209         if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
   1210             line[j + 1].v.punct != '(') {
   1211           compiler_panic(pp->c, loc, "#embed: expected '(' after if_empty");
   1212         }
   1213         j += 2;
   1214         start = j;
   1215         while (j < n) {
   1216           if (line[j].kind == TOK_PUNCT) {
   1217             if (line[j].v.punct == '(')
   1218               ++depth;
   1219             else if (line[j].v.punct == ')') {
   1220               if (depth == 0) break;
   1221               --depth;
   1222             }
   1223           }
   1224           ++j;
   1225         }
   1226         if (j >= n) {
   1227           compiler_panic(pp->c, loc, "#embed: unterminated if_empty");
   1228         }
   1229         if_empty_toks = arena_array(pp->arena, Tok, j - start ? j - start : 1);
   1230         if_empty_n = j - start;
   1231         memcpy(if_empty_toks, line + start, sizeof(Tok) * if_empty_n);
   1232         ++j; /* skip ')' */
   1233         continue;
   1234       }
   1235     }
   1236     compiler_panic(pp->c, loc, "#embed: unexpected token in parameter list");
   1237   }
   1238 
   1239   if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
   1240                              sizeof(resolved))) {
   1241     compiler_panic(pp->c, loc, "#embed: file not found: %.*s",
   1242                    KIT_SLICE_ARG(kit_slice_cstr(path)));
   1243   }
   1244 
   1245   /* Apply limit(). */
   1246   {
   1247     size_t emit_n = size;
   1248     if (limit_n >= 0 && (u64)limit_n < emit_n) emit_n = (size_t)limit_n;
   1249     if (emit_n == 0) {
   1250       /* Empty: emit if_empty payload (or nothing). */
   1251       if (if_empty_toks && if_empty_n) {
   1252         HidesetId* hids = arena_array(pp->arena, HidesetId, if_empty_n);
   1253         u32 i;
   1254         for (i = 0; i < if_empty_n; ++i) hids[i] = HS_EMPTY;
   1255         push_buf(pp, if_empty_toks, hids, if_empty_n);
   1256       }
   1257       return;
   1258     }
   1259     /* Build a buffer of pp-numbers separated by ',' punctuators. */
   1260     {
   1261       TokVec out = {0};
   1262       HidesetId* hids;
   1263       size_t i;
   1264       for (i = 0; i < emit_n; ++i) {
   1265         char numbuf[8];
   1266         int nl = 0;
   1267         u8 v = data[i];
   1268         /* "u8 -> decimal" without sprintf. */
   1269         if (v == 0) {
   1270           numbuf[nl++] = '0';
   1271         } else {
   1272           char tmp[4];
   1273           int k = 0;
   1274           while (v) {
   1275             tmp[k++] = (char)('0' + (v % 10));
   1276             v /= 10;
   1277           }
   1278           while (k > 0) numbuf[nl++] = tmp[--k];
   1279         }
   1280         {
   1281           Tok t;
   1282           memset(&t, 0, sizeof(t));
   1283           t.kind = TOK_NUM;
   1284           t.loc = loc;
   1285           t.spelling = kit_sym_intern(
   1286               pp->pool->c, (KitSlice){.s = numbuf, .len = (size_t)nl});
   1287           if (i == 0) t.flags = TF_AT_BOL;
   1288           /* Bytes after a comma get a leading space to match
   1289            * clang's `, ` separator format. */
   1290           else
   1291             t.flags = TF_HAS_SPACE;
   1292           tv_push(pp, &out, t);
   1293         }
   1294         if (i + 1 < emit_n) {
   1295           Tok comma;
   1296           memset(&comma, 0, sizeof(comma));
   1297           comma.kind = TOK_PUNCT;
   1298           comma.v.punct = ',';
   1299           comma.loc = loc;
   1300           comma.spelling = kit_sym_intern(pp->pool->c, KIT_SLICE_LIT(","));
   1301           tv_push(pp, &out, comma);
   1302         }
   1303       }
   1304       hids = arena_array(pp->arena, HidesetId, out.n ? out.n : 1);
   1305       {
   1306         u32 k;
   1307         for (k = 0; k < out.n; ++k) hids[k] = HS_EMPTY;
   1308       }
   1309       push_buf(pp, out.data, hids, out.n);
   1310     }
   1311   }
   1312 }
   1313 
   1314 /* ============================================================
   1315  * Directive dispatch
   1316  * ============================================================ */
   1317 
   1318 void process_directive(Pp* pp, SrcLoc hash_loc) {
   1319   Tok* line;
   1320   u32 n;
   1321   Sym name;
   1322 
   1323   read_directive_line(pp, &line, &n);
   1324   if (n == 0) {
   1325     /* Null directive: '#' newline. Nothing to do. */
   1326     return;
   1327   }
   1328   if (line[0].kind != TOK_IDENT) {
   1329     compiler_panic(pp->c, line[0].loc, "expected directive name after '#'");
   1330   }
   1331   name = line[0].v.ident;
   1332   if (name == pp->sym_define)
   1333     do_define(pp, line + 1, n - 1);
   1334   else if (name == pp->sym_undef)
   1335     do_undef(pp, line + 1, n - 1);
   1336   else if (name == pp->sym_if)
   1337     do_if_directive(pp, line + 1, n - 1, hash_loc);
   1338   else if (name == pp->sym_ifdef)
   1339     do_ifdef(pp, line + 1, n - 1, 0, hash_loc);
   1340   else if (name == pp->sym_ifndef)
   1341     do_ifdef(pp, line + 1, n - 1, 1, hash_loc);
   1342   else if (name == pp->sym_elif)
   1343     do_elif(pp, hash_loc);
   1344   else if (name == pp->sym_else)
   1345     do_else(pp, hash_loc);
   1346   else if (name == pp->sym_endif)
   1347     do_endif(pp, hash_loc);
   1348   else if (name == pp->sym_include)
   1349     do_include(pp, line + 1, n - 1, hash_loc);
   1350   else if (name == pp->sym_line)
   1351     do_line(pp, line + 1, n - 1, hash_loc);
   1352   else if (name == pp->sym_pragma)
   1353     do_pragma(pp, line + 1, n - 1, hash_loc);
   1354   else if (name == pp->sym_error)
   1355     do_error(pp, line + 1, n - 1, hash_loc);
   1356   else if (name == pp->sym_warning)
   1357     do_warning(pp, line + 1, n - 1, hash_loc);
   1358   else if (name == pp->sym_embed)
   1359     do_embed(pp, line + 1, n - 1, hash_loc);
   1360   else {
   1361     compiler_panic(pp->c, line[0].loc, "unsupported directive");
   1362   }
   1363 }