commit 58cae09b4b02e5c3df711540a0572f21f7da59db
parent 84f59b9951ab1c267c3938e30897244d8e2caca6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 3 Jun 2026 10:30:31 -0700
cpp: delete dead literal table; collapse #if binops into precedence loop
(1) Remove the vestigial literal-table abstraction. lex_lit always returned
NULL, the lexer never built a LitInfo nor set Tok.lit, and pp_lit just
forwarded to it; the sole writer was parse.c assigning LIT_NONE with no
reader. Delete LitId/LIT_NONE/LitKind/LitEnc/LitInfo, the Tok.lit field,
lex_lit, pp_lit, and the LIT_NONE store. (asm's AsmLit* analog is
independent and untouched; cpp_support.h keeps Sym/SrcLoc/BytesId.)
(2) Replace the nine near-identical operator-fold functions
(ee_mul/ee_add/ee_shift/ee_rel/ee_eq/ee_band/ee_bxor/ee_bor/ee_logand/
ee_logor) with one precedence-climbing ee_binary() over a static const
operator table {punct, prec, apply}. div/mod-by-zero panic lives in the
apply fns (op loc threaded via EE::op_loc). ee_unary/ee_primary/ee_ternary
stay special (ternary is right-assoc).
Add test/pp/cases/8f_if_binops covering every binary operator, precedence,
parens, and ternary so a transcription slip is caught.
(cherry picked from commit 4e5839b7be6806da45cc5b2ad8c3b000c47e2f6c)
Diffstat:
8 files changed, 193 insertions(+), 168 deletions(-)
diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c
@@ -157,7 +157,6 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) {
out = a;
out.spelling = kit_sym_intern(p->pool->c, (KitSlice){.s = buf, .len = k});
out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc);
- out.lit = LIT_NONE;
h->free(h, buf, 0);
return out;
}
diff --git a/lang/cpp/lex/lex.c b/lang/cpp/lex/lex.c
@@ -169,11 +169,6 @@ void lex_skip_shebang(Lexer* l) {
SrcLoc lex_loc(const Lexer* l) { return lex_here(l); }
u32 lex_file_id(const Lexer* l) { return l->file_id; }
-const LitInfo* lex_lit(const Lexer* l, LitId id) {
- (void)l;
- (void)id;
- return NULL;
-}
/* Intern bytes [start, end) with line splices (\<newline>) removed, so token
* spellings reflect post-phase-2 logical text. */
diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h
@@ -64,38 +64,11 @@ typedef enum Punct {
P_HASH_HASH,
} Punct;
-typedef u32 LitId;
-#define LIT_NONE 0u
-
-typedef enum LitKind {
- LIT_INT,
- LIT_FLOAT,
- LIT_STRING,
- LIT_CHAR,
-} LitKind;
-
-typedef enum LitEnc {
- LENC_ORDINARY,
- LENC_UTF8,
- LENC_WIDE,
- LENC_UTF16,
- LENC_UTF32,
-} LitEnc;
-
-typedef struct LitInfo {
- u8 kind; /* LitKind */
- u8 enc; /* LitEnc for strings/chars */
- u16 flags; /* TokFlag suffix/encoding bits */
- Sym spelling; /* exact source spelling */
- BytesId bytes; /* decoded bytes/code units, if already decoded */
-} LitInfo;
-
typedef struct Tok {
u16 kind;
u16 flags;
SrcLoc loc;
Sym spelling; /* exact token spelling for diagnostics/#/## */
- LitId lit; /* literal-table handle; LIT_NONE otherwise */
union {
Sym ident;
Sym str;
@@ -127,6 +100,5 @@ void lex_skip_shebang(Lexer*);
Tok lex_next(Lexer*);
SrcLoc lex_loc(const Lexer*);
u32 lex_file_id(const Lexer*);
-const LitInfo* lex_lit(const Lexer*, LitId);
#endif
diff --git a/lang/cpp/pp/pp.c b/lang/cpp/pp/pp.c
@@ -827,15 +827,3 @@ void pp_add_include_edge(Pp* pp, u32 includer, u32 included, SrcLoc include_loc,
int system) {
kit_source_add_include(pp->c, includer, included, include_loc, system);
}
-
-const LitInfo* pp_lit(const Pp* pp, LitId id) {
- /* Stage 1+2 doesn't synthesize new literals; defer to the active
- * lexer's table. */
- TokSrc* s;
- u32 i;
- for (i = pp->nsources; i > 0; --i) {
- s = &((Pp*)pp)->sources[i - 1];
- if (s->kind == SRC_LEX) return lex_lit(s->lex, id);
- }
- return NULL;
-}
diff --git a/lang/cpp/pp/pp.h b/lang/cpp/pp/pp.h
@@ -29,7 +29,6 @@ void pp_add_include_edge(Pp*, u32 includer_file_id, u32 included_file_id,
/* Streaming. Yields preprocessed tokens (macro-expanded, directives consumed).
*/
Tok pp_next(Pp*);
-const LitInfo* pp_lit(const Pp*, LitId);
/* Drains pp_next into `out` as preprocessed C source text: token spellings
* separated by single spaces where TF_HAS_SPACE is set, with newlines for
diff --git a/lang/cpp/pp/pp_directive.c b/lang/cpp/pp/pp_directive.c
@@ -175,6 +175,7 @@ typedef struct EE {
u32 n;
u32 pos;
SrcLoc loc;
+ SrcLoc op_loc; /* loc of the binary operator being applied (for panics) */
} EE;
static i64 ee_ternary(EE* e);
@@ -245,144 +246,131 @@ static i64 ee_unary(EE* e) {
return ee_primary(e);
}
-static i64 ee_mul(EE* e) {
- i64 v = ee_unary(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '*') {
- ++e->pos;
- v = v * ee_unary(e);
- } else if (t->v.punct == '/') {
- i64 r;
- ++e->pos;
- r = ee_unary(e);
- if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero");
- v = v / r;
- } else if (t->v.punct == '%') {
- i64 r;
- ++e->pos;
- r = ee_unary(e);
- if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero");
- v = v % r;
- } else
- break;
- }
- return v;
-}
+/* One row per binary operator, highest `prec` binds tightest. All listed
+ * operators are left-associative; ternary (right-assoc) stays special-cased
+ * in ee_ternary. `apply` folds (lhs OP rhs) and owns the div/mod-by-zero
+ * panic (it needs the operator loc, threaded via EE::op_loc). */
+typedef i64 (*EeApply)(EE* e, i64 a, i64 b);
-static i64 ee_add(EE* e) {
- i64 v = ee_mul(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '+') {
- ++e->pos;
- v = v + ee_mul(e);
- } else if (t->v.punct == '-') {
- ++e->pos;
- v = v - ee_mul(e);
- } else
- break;
- }
- return v;
+static i64 eb_mul(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a * b;
}
-
-static i64 ee_shift(EE* e) {
- i64 v = ee_add(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == P_SHL) {
- ++e->pos;
- v = v << ee_add(e);
- } else if (t->v.punct == P_SHR) {
- ++e->pos;
- v = v >> ee_add(e);
- } else
- break;
- }
- return v;
+static i64 eb_div(EE* e, i64 a, i64 b) {
+ if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: division by zero");
+ return a / b;
}
-
-static i64 ee_rel(EE* e) {
- i64 v = ee_shift(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '<') {
- ++e->pos;
- v = (v < ee_shift(e));
- } else if (t->v.punct == '>') {
- ++e->pos;
- v = (v > ee_shift(e));
- } else if (t->v.punct == P_LE) {
- ++e->pos;
- v = (v <= ee_shift(e));
- } else if (t->v.punct == P_GE) {
- ++e->pos;
- v = (v >= ee_shift(e));
- } else
- break;
- }
- return v;
+static i64 eb_mod(EE* e, i64 a, i64 b) {
+ if (b == 0) compiler_panic(e->pp->c, e->op_loc, "#if: modulo by zero");
+ return a % b;
}
-
-static i64 ee_eq(EE* e) {
- i64 v = ee_rel(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == P_EQ) {
- ++e->pos;
- v = (v == ee_rel(e));
- } else if (t->v.punct == P_NE) {
- ++e->pos;
- v = (v != ee_rel(e));
- } else
- break;
- }
- return v;
+static i64 eb_add(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a + b;
}
-
-static i64 ee_band(EE* e) {
- i64 v = ee_eq(e);
- while (ee_match_punct(e, '&')) v = v & ee_eq(e);
- return v;
+static i64 eb_sub(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a - b;
}
-
-static i64 ee_bxor(EE* e) {
- i64 v = ee_band(e);
- while (ee_match_punct(e, '^')) v = v ^ ee_band(e);
- return v;
+static i64 eb_shl(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a << b;
}
-
-static i64 ee_bor(EE* e) {
- i64 v = ee_bxor(e);
- while (ee_match_punct(e, '|')) v = v | ee_bxor(e);
- return v;
+static i64 eb_shr(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a >> b;
+}
+static i64 eb_lt(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a < b;
+}
+static i64 eb_gt(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a > b;
+}
+static i64 eb_le(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a <= b;
+}
+static i64 eb_ge(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a >= b;
+}
+static i64 eb_eq(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a == b;
+}
+static i64 eb_ne(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a != b;
+}
+static i64 eb_band(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a & b;
+}
+static i64 eb_bxor(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a ^ b;
+}
+static i64 eb_bor(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a | b;
+}
+static i64 eb_logand(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a && b;
+}
+static i64 eb_logor(EE* e, i64 a, i64 b) {
+ (void)e;
+ return a || b;
}
-static i64 ee_logand(EE* e) {
- i64 v = ee_bor(e);
- while (ee_match_punct(e, P_AND)) {
- i64 r = ee_bor(e);
- v = (v && r);
+typedef struct EeOp {
+ u32 punct; /* P_* / ASCII codepoint of the operator token */
+ u8 prec; /* higher binds tighter */
+ EeApply apply;
+} EeOp;
+
+static const EeOp ee_ops[] = {
+ {'*', 10, eb_mul}, {'/', 10, eb_div}, {'%', 10, eb_mod},
+ {'+', 9, eb_add}, {'-', 9, eb_sub}, {P_SHL, 8, eb_shl},
+ {P_SHR, 8, eb_shr}, {'<', 7, eb_lt}, {'>', 7, eb_gt},
+ {P_LE, 7, eb_le}, {P_GE, 7, eb_ge}, {P_EQ, 6, eb_eq},
+ {P_NE, 6, eb_ne}, {'&', 5, eb_band}, {'^', 4, eb_bxor},
+ {'|', 3, eb_bor}, {P_AND, 2, eb_logand}, {P_OR, 1, eb_logor},
+};
+
+static const EeOp* ee_lookup_op(const Tok* t) {
+ size_t i;
+ if (!t || t->kind != TOK_PUNCT) return NULL;
+ for (i = 0; i < sizeof(ee_ops) / sizeof(ee_ops[0]); ++i) {
+ if (ee_ops[i].punct == t->v.punct) return &ee_ops[i];
}
- return v;
+ return NULL;
}
-static i64 ee_logor(EE* e) {
- i64 v = ee_logand(e);
- while (ee_match_punct(e, P_OR)) {
- i64 r = ee_logand(e);
- v = (v || r);
+/* Precedence-climbing fold of all left-associative binary operators. */
+static i64 ee_binary(EE* e, int min_prec) {
+ i64 v = ee_unary(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ const EeOp* op = ee_lookup_op(t);
+ SrcLoc op_loc;
+ i64 rhs;
+ if (!op || op->prec < min_prec) break;
+ op_loc = t->loc;
+ ++e->pos;
+ /* Left-associative: parse the RHS with strictly higher precedence so
+ * same-prec operators fold left-to-right. */
+ rhs = ee_binary(e, op->prec + 1);
+ e->op_loc = op_loc;
+ v = op->apply(e, v, rhs);
}
return v;
}
static i64 ee_ternary(EE* e) {
- i64 c = ee_logor(e);
+ i64 c = ee_binary(e, 1);
if (ee_match_punct(e, '?')) {
i64 a = ee_ternary(e);
i64 b;
diff --git a/test/pp/cases/8f_if_binops.c b/test/pp/cases/8f_if_binops.c
@@ -0,0 +1,63 @@
+#if 6 * 7 == 42
+mul_ok
+#endif
+#if 20 / 4 == 5
+div_ok
+#endif
+#if 17 % 5 == 2
+mod_ok
+#endif
+#if 2 + 3 == 5
+add_ok
+#endif
+#if 9 - 4 == 5
+sub_ok
+#endif
+#if 1 << 4 == 16
+shl_ok
+#endif
+#if 64 >> 2 == 16
+shr_ok
+#endif
+#if 3 < 4
+lt_ok
+#endif
+#if 5 > 2
+gt_ok
+#endif
+#if 4 <= 4
+le_ok
+#endif
+#if 7 >= 7
+ge_ok
+#endif
+#if 8 == 8
+eq_ok
+#endif
+#if 8 != 9
+ne_ok
+#endif
+#if (6 & 3) == 2
+band_ok
+#endif
+#if (6 ^ 3) == 5
+bxor_ok
+#endif
+#if (4 | 1) == 5
+bor_ok
+#endif
+#if 1 && 2
+logand_ok
+#endif
+#if 0 || 3
+logor_ok
+#endif
+#if 2 + 3 * 4 == 14
+prec_ok
+#endif
+#if (2 + 3) * 4 == 20
+paren_ok
+#endif
+#if 1 ? 10 : 20
+tern_ok
+#endif
diff --git a/test/pp/cases/8f_if_binops.expected b/test/pp/cases/8f_if_binops.expected
@@ -0,0 +1,21 @@
+mul_ok
+div_ok
+mod_ok
+add_ok
+sub_ok
+shl_ok
+shr_ok
+lt_ok
+gt_ok
+le_ok
+ge_ok
+eq_ok
+ne_ok
+band_ok
+bxor_ok
+bor_ok
+logand_ok
+logor_ok
+prec_ok
+paren_ok
+tern_ok