kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 58cae09b4b02e5c3df711540a0572f21f7da59db
parent 84f59b9951ab1c267c3938e30897244d8e2caca6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 10:30:31 -0700

cpp: delete dead literal table; collapse #if binops into precedence loop

(1) Remove the vestigial literal-table abstraction. lex_lit always returned
NULL, the lexer never built a LitInfo nor set Tok.lit, and pp_lit just
forwarded to it; the sole writer was parse.c assigning LIT_NONE with no
reader. Delete LitId/LIT_NONE/LitKind/LitEnc/LitInfo, the Tok.lit field,
lex_lit, pp_lit, and the LIT_NONE store. (asm's AsmLit* analog is
independent and untouched; cpp_support.h keeps Sym/SrcLoc/BytesId.)

(2) Replace the nine near-identical operator-fold functions
(ee_mul/ee_add/ee_shift/ee_rel/ee_eq/ee_band/ee_bxor/ee_bor/ee_logand/
ee_logor) with one precedence-climbing ee_binary() over a static const
operator table {punct, prec, apply}. div/mod-by-zero panic lives in the
apply fns (op loc threaded via EE::op_loc). ee_unary/ee_primary/ee_ternary
stay special (ternary is right-assoc).

Add test/pp/cases/8f_if_binops covering every binary operator, precedence,
parens, and ternary so a transcription slip is caught.

(cherry picked from commit 4e5839b7be6806da45cc5b2ad8c3b000c47e2f6c)

Diffstat:
Mlang/c/parse/parse.c | 1-
Mlang/cpp/lex/lex.c | 5-----
Mlang/cpp/lex/lex.h | 28----------------------------
Mlang/cpp/pp/pp.c | 12------------
Mlang/cpp/pp/pp.h | 1-
Mlang/cpp/pp/pp_directive.c | 230+++++++++++++++++++++++++++++++++++++------------------------------------------
Atest/pp/cases/8f_if_binops.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/pp/cases/8f_if_binops.expected | 21+++++++++++++++++++++
8 files changed, 193 insertions(+), 168 deletions(-)

diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -157,7 +157,6 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { out = a; out.spelling = kit_sym_intern(p->pool->c, (KitSlice){.s = buf, .len = k}); out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc); - out.lit = LIT_NONE; h->free(h, buf, 0); return out; } diff --git a/lang/cpp/lex/lex.c b/lang/cpp/lex/lex.c @@ -169,11 +169,6 @@ void lex_skip_shebang(Lexer* l) { SrcLoc lex_loc(const Lexer* l) { return lex_here(l); } u32 lex_file_id(const Lexer* l) { return l->file_id; } -const LitInfo* lex_lit(const Lexer* l, LitId id) { - (void)l; - (void)id; - return NULL; -} /* Intern bytes [start, end) with line splices (\<newline>) removed, so token * spellings reflect post-phase-2 logical text. */ diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h @@ -64,38 +64,11 @@ typedef enum Punct { P_HASH_HASH, } Punct; -typedef u32 LitId; -#define LIT_NONE 0u - -typedef enum LitKind { - LIT_INT, - LIT_FLOAT, - LIT_STRING, - LIT_CHAR, -} LitKind; - -typedef enum LitEnc { - LENC_ORDINARY, - LENC_UTF8, - LENC_WIDE, - LENC_UTF16, - LENC_UTF32, -} LitEnc; - -typedef struct LitInfo { - u8 kind; /* LitKind */ - u8 enc; /* LitEnc for strings/chars */ - u16 flags; /* TokFlag suffix/encoding bits */ - Sym spelling; /* exact source spelling */ - BytesId bytes; /* decoded bytes/code units, if already decoded */ -} LitInfo; - typedef struct Tok { u16 kind; u16 flags; SrcLoc loc; Sym spelling; /* exact token spelling for diagnostics/#/## */ - LitId lit; /* literal-table handle; LIT_NONE otherwise */ union { Sym ident; Sym str; @@ -127,6 +100,5 @@ void lex_skip_shebang(Lexer*); Tok lex_next(Lexer*); SrcLoc lex_loc(const Lexer*); u32 lex_file_id(const Lexer*); -const LitInfo* lex_lit(const Lexer*, LitId); #endif diff --git a/lang/cpp/pp/pp.c b/lang/cpp/pp/pp.c @@ -827,15 +827,3 @@ void pp_add_include_edge(Pp* pp, u32 includer, u32 included, SrcLoc include_loc, int system) { kit_source_add_include(pp->c, includer, included, include_loc, system); } - -const LitInfo* pp_lit(const Pp* pp, LitId id) { - /* Stage 1+2 doesn't synthesize new literals; defer to the active - * lexer's table. */ - TokSrc* s; - u32 i; - for (i = pp->nsources; i > 0; --i) { - s = &((Pp*)pp)->sources[i - 1]; - if (s->kind == SRC_LEX) return lex_lit(s->lex, id); - } - return NULL; -} diff --git a/lang/cpp/pp/pp.h b/lang/cpp/pp/pp.h @@ -29,7 +29,6 @@ void pp_add_include_edge(Pp*, u32 includer_file_id, u32 included_file_id, /* Streaming. Yields preprocessed tokens (macro-expanded, directives consumed). */ Tok pp_next(Pp*); -const LitInfo* pp_lit(const Pp*, LitId); /* Drains pp_next into `out` as preprocessed C source text: token spellings * separated by single spaces where TF_HAS_SPACE is set, with newlines for diff --git a/lang/cpp/pp/pp_directive.c b/lang/cpp/pp/pp_directive.c @@ -175,6 +175,7 @@ typedef struct EE { u32 n; u32 pos; SrcLoc loc; + SrcLoc op_loc; /* loc of the binary operator being applied (for panics) */ } EE; static i64 ee_ternary(EE* e); @@ -245,144 +246,131 @@ static i64 ee_unary(EE* e) { return ee_primary(e); } -static i64 ee_mul(EE* e) { - i64 v = ee_unary(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '*') { - ++e->pos; - v = v * ee_unary(e); - } else if (t->v.punct == '/') { - i64 r; - ++e->pos; - r = ee_unary(e); - if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero"); - v = v / r; - } else if (t->v.punct == '%') { - i64 r; - ++e->pos; - r = ee_unary(e); - if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero"); - v = v % r; - } else - break; - } - return v; -} +/* One row per binary operator, highest `prec` binds tightest. All listed + * operators are left-associative; ternary (right-assoc) stays special-cased + * in ee_ternary. `apply` folds (lhs OP rhs) and owns the div/mod-by-zero + * panic (it needs the operator loc, threaded via EE::op_loc). */ +typedef i64 (*EeApply)(EE* e, i64 a, i64 b); -static i64 ee_add(EE* e) { - i64 v = ee_mul(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '+') { - ++e->pos; - v = v + ee_mul(e); - } else if (t->v.punct == '-') { - ++e->pos; - v = v - ee_mul(e); - } else - break; - } - return v; +static i64 eb_mul(EE* e, i64 a, i64 b) { + (void)e; + return a * b; } - -static i64 ee_shift(EE* e) { - i64 v = ee_add(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == P_SHL) { - ++e->pos; - v = v << ee_add(e); - } else if (t->v.punct == P_SHR) { - ++e->pos; - v = v >> ee_add(e); - } else - break; - } - return v; +static i64 eb_div(EE* e, i64 a, i64 b) { + if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: division by zero"); + return a / b; } - -static i64 ee_rel(EE* e) { - i64 v = ee_shift(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == '<') { - ++e->pos; - v = (v < ee_shift(e)); - } else if (t->v.punct == '>') { - ++e->pos; - v = (v > ee_shift(e)); - } else if (t->v.punct == P_LE) { - ++e->pos; - v = (v <= ee_shift(e)); - } else if (t->v.punct == P_GE) { - ++e->pos; - v = (v >= ee_shift(e)); - } else - break; - } - return v; +static i64 eb_mod(EE* e, i64 a, i64 b) { + if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: modulo by zero"); + return a % b; } - -static i64 ee_eq(EE* e) { - i64 v = ee_rel(e); - for (;;) { - const Tok* t = ee_peek(e); - if (!t || t->kind != TOK_PUNCT) break; - if (t->v.punct == P_EQ) { - ++e->pos; - v = (v == ee_rel(e)); - } else if (t->v.punct == P_NE) { - ++e->pos; - v = (v != ee_rel(e)); - } else - break; - } - return v; +static i64 eb_add(EE* e, i64 a, i64 b) { + (void)e; + return a + b; } - -static i64 ee_band(EE* e) { - i64 v = ee_eq(e); - while (ee_match_punct(e, '&')) v = v & ee_eq(e); - return v; +static i64 eb_sub(EE* e, i64 a, i64 b) { + (void)e; + return a - b; } - -static i64 ee_bxor(EE* e) { - i64 v = ee_band(e); - while (ee_match_punct(e, '^')) v = v ^ ee_band(e); - return v; +static i64 eb_shl(EE* e, i64 a, i64 b) { + (void)e; + return a << b; } - -static i64 ee_bor(EE* e) { - i64 v = ee_bxor(e); - while (ee_match_punct(e, '|')) v = v | ee_bxor(e); - return v; +static i64 eb_shr(EE* e, i64 a, i64 b) { + (void)e; + return a >> b; +} +static i64 eb_lt(EE* e, i64 a, i64 b) { + (void)e; + return a < b; +} +static i64 eb_gt(EE* e, i64 a, i64 b) { + (void)e; + return a > b; +} +static i64 eb_le(EE* e, i64 a, i64 b) { + (void)e; + return a <= b; +} +static i64 eb_ge(EE* e, i64 a, i64 b) { + (void)e; + return a >= b; +} +static i64 eb_eq(EE* e, i64 a, i64 b) { + (void)e; + return a == b; +} +static i64 eb_ne(EE* e, i64 a, i64 b) { + (void)e; + return a != b; +} +static i64 eb_band(EE* e, i64 a, i64 b) { + (void)e; + return a & b; +} +static i64 eb_bxor(EE* e, i64 a, i64 b) { + (void)e; + return a ^ b; +} +static i64 eb_bor(EE* e, i64 a, i64 b) { + (void)e; + return a | b; +} +static i64 eb_logand(EE* e, i64 a, i64 b) { + (void)e; + return a && b; +} +static i64 eb_logor(EE* e, i64 a, i64 b) { + (void)e; + return a || b; } -static i64 ee_logand(EE* e) { - i64 v = ee_bor(e); - while (ee_match_punct(e, P_AND)) { - i64 r = ee_bor(e); - v = (v && r); +typedef struct EeOp { + u32 punct; /* P_* / ASCII codepoint of the operator token */ + u8 prec; /* higher binds tighter */ + EeApply apply; +} EeOp; + +static const EeOp ee_ops[] = { + {'*', 10, eb_mul}, {'/', 10, eb_div}, {'%', 10, eb_mod}, + {'+', 9, eb_add}, {'-', 9, eb_sub}, {P_SHL, 8, eb_shl}, + {P_SHR, 8, eb_shr}, {'<', 7, eb_lt}, {'>', 7, eb_gt}, + {P_LE, 7, eb_le}, {P_GE, 7, eb_ge}, {P_EQ, 6, eb_eq}, + {P_NE, 6, eb_ne}, {'&', 5, eb_band}, {'^', 4, eb_bxor}, + {'|', 3, eb_bor}, {P_AND, 2, eb_logand}, {P_OR, 1, eb_logor}, +}; + +static const EeOp* ee_lookup_op(const Tok* t) { + size_t i; + if (!t || t->kind != TOK_PUNCT) return NULL; + for (i = 0; i < sizeof(ee_ops) / sizeof(ee_ops[0]); ++i) { + if (ee_ops[i].punct == t->v.punct) return &ee_ops[i]; } - return v; + return NULL; } -static i64 ee_logor(EE* e) { - i64 v = ee_logand(e); - while (ee_match_punct(e, P_OR)) { - i64 r = ee_logand(e); - v = (v || r); +/* Precedence-climbing fold of all left-associative binary operators. */ +static i64 ee_binary(EE* e, int min_prec) { + i64 v = ee_unary(e); + for (;;) { + const Tok* t = ee_peek(e); + const EeOp* op = ee_lookup_op(t); + SrcLoc op_loc; + i64 rhs; + if (!op || op->prec < min_prec) break; + op_loc = t->loc; + ++e->pos; + /* Left-associative: parse the RHS with strictly higher precedence so + * same-prec operators fold left-to-right. */ + rhs = ee_binary(e, op->prec + 1); + e->op_loc = op_loc; + v = op->apply(e, v, rhs); } return v; } static i64 ee_ternary(EE* e) { - i64 c = ee_logor(e); + i64 c = ee_binary(e, 1); if (ee_match_punct(e, '?')) { i64 a = ee_ternary(e); i64 b; diff --git a/test/pp/cases/8f_if_binops.c b/test/pp/cases/8f_if_binops.c @@ -0,0 +1,63 @@ +#if 6 * 7 == 42 +mul_ok +#endif +#if 20 / 4 == 5 +div_ok +#endif +#if 17 % 5 == 2 +mod_ok +#endif +#if 2 + 3 == 5 +add_ok +#endif +#if 9 - 4 == 5 +sub_ok +#endif +#if 1 << 4 == 16 +shl_ok +#endif +#if 64 >> 2 == 16 +shr_ok +#endif +#if 3 < 4 +lt_ok +#endif +#if 5 > 2 +gt_ok +#endif +#if 4 <= 4 +le_ok +#endif +#if 7 >= 7 +ge_ok +#endif +#if 8 == 8 +eq_ok +#endif +#if 8 != 9 +ne_ok +#endif +#if (6 & 3) == 2 +band_ok +#endif +#if (6 ^ 3) == 5 +bxor_ok +#endif +#if (4 | 1) == 5 +bor_ok +#endif +#if 1 && 2 +logand_ok +#endif +#if 0 || 3 +logor_ok +#endif +#if 2 + 3 * 4 == 14 +prec_ok +#endif +#if (2 + 3) * 4 == 20 +paren_ok +#endif +#if 1 ? 10 : 20 +tern_ok +#endif diff --git a/test/pp/cases/8f_if_binops.expected b/test/pp/cases/8f_if_binops.expected @@ -0,0 +1,21 @@ +mul_ok +div_ok +mod_ok +add_ok +sub_ok +shl_ok +shr_ok +lt_ok +gt_ok +le_ok +ge_ok +eq_ok +ne_ok +band_ok +bxor_ok +bor_ok +logand_ok +logor_ok +prec_ok +paren_ok +tern_ok