kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 13c7eb6490e2aaef2c457641287d02eadde21ed2
parent 36197435a80ec4f7368e486af514a63be5ee03ad
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 04:15:46 -0700

lex: add lexer implementation and lex/pp tests

Diffstat:
Asrc/lex/lex.c | 436+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/basic_punct.c | 3+++
Atest/lex/cases/basic_punct.expected | 16++++++++++++++++
Atest/lex/cases/comment_edges.c | 12++++++++++++
Atest/lex/cases/comment_edges.expected | 21+++++++++++++++++++++
Atest/lex/cases/comments.c | 16++++++++++++++++
Atest/lex/cases/comments.expected | 34++++++++++++++++++++++++++++++++++
Atest/lex/cases/empty.c | 0
Atest/lex/cases/empty.expected | 1+
Atest/lex/cases/float_constants.c | 34++++++++++++++++++++++++++++++++++
Atest/lex/cases/float_constants.expected | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/identifiers.c | 31+++++++++++++++++++++++++++++++
Atest/lex/cases/identifiers.expected | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/int_constants.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/int_constants.expected | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/keywords.c | 8++++++++
Atest/lex/cases/keywords.expected | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/line_splice.c | 25+++++++++++++++++++++++++
Atest/lex/cases/line_splice.expected | 25+++++++++++++++++++++++++
Atest/lex/cases/maximal_munch.c | 21+++++++++++++++++++++
Atest/lex/cases/maximal_munch.expected | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/pp_directives.c | 21+++++++++++++++++++++
Atest/lex/cases/pp_directives.expected | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/pp_numbers.c | 29+++++++++++++++++++++++++++++
Atest/lex/cases/pp_numbers.expected | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/pp_passthrough.c | 2++
Atest/lex/cases/pp_passthrough.expected | 10++++++++++
Atest/lex/cases/punctuators.c | 10++++++++++
Atest/lex/cases/punctuators.expected | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/string_escapes.c | 38++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/string_escapes.expected | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/cases/strings_chars.c | 23+++++++++++++++++++++++
Atest/lex/cases/strings_chars.expected | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/lex/run.sh | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/pp/cases/define_function.actual | 0
Atest/pp/cases/define_function.c | 2++
Atest/pp/cases/define_function.expected | 1+
Atest/pp/cases/define_object.actual | 0
Atest/pp/cases/define_object.c | 2++
Atest/pp/cases/define_object.expected | 1+
Atest/pp/cases/ifdef_basic.actual | 0
Atest/pp/cases/ifdef_basic.c | 11+++++++++++
Atest/pp/cases/ifdef_basic.expected | 2++
Atest/pp/cases/include_local.actual | 0
Atest/pp/cases/include_local.c | 2++
Atest/pp/cases/include_local.expected | 2++
Atest/pp/cases/include_local.h | 2++
Atest/pp/cases/undef.actual | 0
Atest/pp/cases/undef.c | 6++++++
Atest/pp/cases/undef.expected | 3+++
Atest/pp/run.sh | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/test.mk | 41+++++++++++++++++++++++++++++++++++++++++
52 files changed, 1812 insertions(+), 0 deletions(-)

diff --git a/src/lex/lex.c b/src/lex/lex.c @@ -0,0 +1,436 @@ +/* C11 lexer (§6.4). Streams tokens out of a borrowed source buffer. + * + * Tokens are recognized per the standard's lexical grammar: + * - identifiers (§6.4.2) — keyword bucketing happens later in parse_c + * - pp-numbers (§6.4.8), classified into TOK_NUM / TOK_FLT + * - string literals (§6.4.5) and character constants (§6.4.4.4) + * including the L/u/u8/U encoding prefixes + * - punctuators (§6.4.6), longest-match, including digraphs + * - `#` and `##` surface as TOK_PP_HASH / TOK_PP_PASTE so the + * preprocessor can recognize directives and the paste operator + * + * Comments (§6.4.9) are consumed as whitespace; physical newlines surface + * as TOK_NEWLINE so PP can implement directive-line semantics. */ + +#include "lex/lex.h" +#include "core/heap.h" +#include "core/pool.h" + +#include <string.h> + +struct Lexer { + Compiler* c; + Pool* pool; + Heap* heap; + const char* src; + size_t len; + size_t pos; + u32 file_id; + u32 line; + u32 col; + u8 at_bol; + u8 had_space; +}; + +static int peek(const Lexer* l, size_t off) +{ + if (l->pos + off >= l->len) return -1; + return (unsigned char)l->src[l->pos + off]; +} + +static int bump(Lexer* l) +{ + int ch; + if (l->pos >= l->len) return -1; + ch = (unsigned char)l->src[l->pos++]; + if (ch == '\n') { l->line++; l->col = 1; } + else { l->col++; } + return ch; +} + +static int is_digit(int c) { return c >= '0' && c <= '9'; } +static int is_hex_digit(int c) +{ + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} +/* Identifier-start byte (§6.4.2.1). Letters and underscore are ASCII; bytes + * ≥ 0x80 are accepted as the implementation-defined "other characters" + * permitted in identifiers — in practice UTF-8 lead/continuation bytes for + * extended source characters. UCNs are matched separately via ucn_len since + * they span multiple source bytes. */ +static int is_alpha(int c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80; +} +static int is_alnum(int c) { return is_alpha(c) || is_digit(c); } + +/* Match a UCN at offset `off` from the current position. Returns the total + * length (6 for \uXXXX, 10 for \UXXXXXXXX), or 0 if no UCN matches. The + * range constraints from §6.4.3 (no UCN < 00A0 except $/@/`, and none in + * D800–DFFF) are not enforced here — the lexical form is matched and any + * downstream phase that cares can diagnose. */ +static int ucn_len(const Lexer* l, size_t off) +{ + int n, i; + if (peek(l, off) != '\\') return 0; + if (peek(l, off + 1) == 'u') n = 4; + else if (peek(l, off + 1) == 'U') n = 8; + else return 0; + for (i = 0; i < n; ++i) { + if (!is_hex_digit(peek(l, off + 2 + i))) return 0; + } + return 2 + n; +} + +static SrcLoc lex_here(const Lexer* l) +{ + SrcLoc loc; + loc.file_id = l->file_id; + loc.line = l->line; + loc.col = l->col; + return loc; +} + +Lexer* lex_open_mem(Compiler* c, const char* name, const char* src, size_t len) +{ + Heap* h = (Heap*)c->env->heap; + Lexer* l = (Lexer*)h->alloc(h, sizeof(*l), _Alignof(Lexer)); + if (!l) return NULL; + memset(l, 0, sizeof(*l)); + l->c = c; + l->pool = c->global; + l->heap = h; + l->src = src ? src : ""; + l->len = src ? len : 0; + l->pos = 0; + l->file_id = source_add_memory(c->sources, name); + l->line = 1; + l->col = 1; + l->at_bol = 1; + l->had_space = 0; + return l; +} + +void lex_close(Lexer* l) +{ + if (!l) return; + l->heap->free(l->heap, l, sizeof(*l)); +} + +SrcLoc lex_loc(const Lexer* l) { return lex_here(l); } +u32 lex_file_id(const Lexer* l) { return l->file_id; } +const LitInfo* lex_lit(const Lexer* l, LitId id) { (void)l; (void)id; return NULL; } + +/* Skip whitespace and comments. Returns 1 if a newline boundary was crossed + * via comment consumption (caller still emits the explicit newline token on + * an in-source '\n'). */ +static void skip_ws_and_comments(Lexer* l) +{ + for (;;) { + int ch = peek(l, 0); + if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\v' || ch == '\f') { + bump(l); + l->had_space = 1; + continue; + } + if (ch == '/' && peek(l, 1) == '/') { + bump(l); bump(l); + while (l->pos < l->len && (unsigned char)l->src[l->pos] != '\n') bump(l); + l->had_space = 1; + continue; + } + if (ch == '/' && peek(l, 1) == '*') { + bump(l); bump(l); + while (l->pos < l->len) { + if ((unsigned char)l->src[l->pos] == '*' && + l->pos + 1 < l->len && + (unsigned char)l->src[l->pos + 1] == '/') { + bump(l); bump(l); + break; + } + bump(l); + } + l->had_space = 1; + continue; + } + break; + } +} + +/* Consume a pp-number per §6.4.8. The cursor is positioned at the leading + * digit (or `.` followed by a digit) on entry. */ +static void scan_pp_number(Lexer* l) +{ + if (peek(l, 0) == '.') bump(l); + bump(l); /* first digit */ + while (l->pos < l->len) { + int c = peek(l, 0); + int n = peek(l, 1); + if ((c == 'e' || c == 'E' || c == 'p' || c == 'P') && (n == '+' || n == '-')) { + bump(l); bump(l); + } else if (is_alnum(c) || c == '.') { + bump(l); + } else { + break; + } + } +} + +/* 1 if the pp-number text is a floating constant (§6.4.4.2): contains a + * radix `.`, a hex `p`/`P` exponent, or a decimal `e`/`E` exponent. */ +static int pp_number_is_float(const char* s, size_t n) +{ + int is_hex = 0; + size_t i = 0; + if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + is_hex = 1; + i = 2; + } + for (; i < n; ++i) { + char c = s[i]; + if (c == '.') return 1; + if (is_hex && (c == 'p' || c == 'P')) return 1; + if (!is_hex && (c == 'e' || c == 'E')) { + if (i + 1 < n) { + char nx = s[i + 1]; + if (nx == '+' || nx == '-' || (nx >= '0' && nx <= '9')) return 1; + } + } + } + return 0; +} + +/* Consume a quoted body — string ('"') or character ('\''). The cursor is + * positioned at the opening quote on entry. Returns 1 on an unterminated or + * newline-broken literal, 0 on a clean close. */ +static int scan_quoted(Lexer* l, int quote) +{ + bump(l); /* opening quote */ + while (l->pos < l->len) { + int ch = (unsigned char)l->src[l->pos]; + if (ch == quote) { bump(l); return 0; } + if (ch == '\n') return 1; + if (ch == '\\' && l->pos + 1 < l->len) { + bump(l); /* backslash */ + bump(l); /* the escaped char (incl. potential newline in line + * splice scenarios; we do not splice here) */ + continue; + } + bump(l); + } + return 1; +} + +Tok lex_next(Lexer* l) +{ + Tok t; + SrcLoc tloc; + size_t start; + int ch; + + memset(&t, 0, sizeof(t)); + + /* Skip whitespace and comments. A newline token is emitted before any + * subsequent content tokens for the line that follows. */ + for (;;) { + skip_ws_and_comments(l); + if (l->pos >= l->len) { + t.kind = TOK_EOF; + t.loc = lex_here(l); + return t; + } + if (peek(l, 0) == '\n') { + tloc = lex_here(l); + bump(l); + t.kind = TOK_NEWLINE; + t.loc = tloc; + l->at_bol = 1; + l->had_space = 0; + return t; + } + break; + } + + tloc = lex_here(l); + start = l->pos; + ch = peek(l, 0); + + if (l->at_bol) t.flags |= TF_AT_BOL; + if (l->had_space) t.flags |= TF_HAS_SPACE; + l->at_bol = 0; + l->had_space = 0; + t.loc = tloc; + + /* String / character literal, with optional encoding prefix. The prefix + * length and encoding flag are decoded together so the spelling we + * intern includes the prefix bytes. */ + { + int sp_len = -1; + int is_char = 0; + u32 encf = 0; + + if (ch == '"') { sp_len = 0; is_char = 0; } + else if (ch == '\'') { sp_len = 0; is_char = 1; } + else if (ch == 'L' && peek(l, 1) == '"') { sp_len = 1; is_char = 0; encf = TF_STR_WIDE; } + else if (ch == 'L' && peek(l, 1) == '\'') { sp_len = 1; is_char = 1; encf = TF_STR_WIDE; } + else if (ch == 'u' && peek(l, 1) == '8' && peek(l, 2) == '"') { sp_len = 2; is_char = 0; encf = TF_STR_U8; } + else if (ch == 'u' && peek(l, 1) == '"') { sp_len = 1; is_char = 0; encf = TF_STR_U16; } + else if (ch == 'u' && peek(l, 1) == '\'') { sp_len = 1; is_char = 1; encf = TF_STR_U16; } + else if (ch == 'U' && peek(l, 1) == '"') { sp_len = 1; is_char = 0; encf = TF_STR_U32; } + else if (ch == 'U' && peek(l, 1) == '\'') { sp_len = 1; is_char = 1; encf = TF_STR_U32; } + + if (sp_len >= 0) { + int i; + for (i = 0; i < sp_len; ++i) bump(l); + if (scan_quoted(l, is_char ? '\'' : '"')) t.flags |= TF_LITERAL_BAD; + t.kind = (u16)(is_char ? TOK_CHR : TOK_STR); + t.flags |= encf; + t.spelling = pool_intern(l->pool, l->src + start, l->pos - start); + t.v.str = t.spelling; + return t; + } + } + + /* Identifier (§6.4.2). Encoding-prefix candidates above are matched + * before this since L/u/U followed by a quote is a literal, not an + * identifier. The grammar's identifier-nondigit covers letters, _, + * extended source chars (impl-defined; bytes ≥ 0x80 here), and UCNs + * (§6.4.3) — the latter span multiple source bytes so they're matched + * via ucn_len rather than the per-byte is_alpha predicate. */ + { + int u = ucn_len(l, 0); + if (is_alpha(ch) || u) { + if (u) { int i; for (i = 0; i < u; ++i) bump(l); } + else bump(l); + for (;;) { + int c = peek(l, 0); + if (is_alnum(c)) { + bump(l); + } else if ((u = ucn_len(l, 0))) { + int i; for (i = 0; i < u; ++i) bump(l); + } else { + break; + } + } + t.kind = TOK_IDENT; + t.spelling = pool_intern(l->pool, l->src + start, l->pos - start); + t.v.ident = t.spelling; + return t; + } + } + + /* pp-number (§6.4.8), then classified to TOK_NUM / TOK_FLT. */ + if (is_digit(ch) || (ch == '.' && is_digit(peek(l, 1)))) { + scan_pp_number(l); + t.kind = (u16)(pp_number_is_float(l->src + start, l->pos - start) + ? TOK_FLT : TOK_NUM); + t.spelling = pool_intern(l->pool, l->src + start, l->pos - start); + return t; + } + + /* Punctuator (§6.4.6) — longest match. `#` and `##` (and their digraph + * forms `%:` and `%:%:`) become TOK_PP_HASH / TOK_PP_PASTE so PP can + * recognize directives and the paste operator. */ + { + int n0 = peek(l, 0); + int n1 = peek(l, 1); + int n2 = peek(l, 2); + int n3 = peek(l, 3); + int adv = 1; + u32 punct = P_NONE; + u16 kind = TOK_PUNCT; + int i; + + switch (n0) { + case '#': + if (n1 == '#') { adv = 2; kind = TOK_PP_PASTE; punct = P_HASH_HASH; } + else { adv = 1; kind = TOK_PP_HASH; punct = '#'; } + break; + case '.': + if (n1 == '.' && n2 == '.') { adv = 3; punct = P_ELLIPSIS; } + else { adv = 1; punct = '.'; } + break; + case '-': + if (n1 == '>') { adv = 2; punct = P_ARROW; } + else if (n1 == '-') { adv = 2; punct = P_DEC; } + else if (n1 == '=') { adv = 2; punct = P_SUB_ASSIGN; } + else { adv = 1; punct = '-'; } + break; + case '+': + if (n1 == '+') { adv = 2; punct = P_INC; } + else if (n1 == '=') { adv = 2; punct = P_ADD_ASSIGN; } + else { adv = 1; punct = '+'; } + break; + case '<': + if (n1 == '<' && n2 == '=') { adv = 3; punct = P_SHL_ASSIGN; } + else if (n1 == '<') { adv = 2; punct = P_SHL; } + else if (n1 == '=') { adv = 2; punct = P_LE; } + else if (n1 == ':') { adv = 2; punct = '['; } /* digraph */ + else if (n1 == '%') { adv = 2; punct = '{'; } /* digraph */ + else { adv = 1; punct = '<'; } + break; + case '>': + if (n1 == '>' && n2 == '=') { adv = 3; punct = P_SHR_ASSIGN; } + else if (n1 == '>') { adv = 2; punct = P_SHR; } + else if (n1 == '=') { adv = 2; punct = P_GE; } + else { adv = 1; punct = '>'; } + break; + case '=': + if (n1 == '=') { adv = 2; punct = P_EQ; } + else { adv = 1; punct = '='; } + break; + case '!': + if (n1 == '=') { adv = 2; punct = P_NE; } + else { adv = 1; punct = '!'; } + break; + case '&': + if (n1 == '&') { adv = 2; punct = P_AND; } + else if (n1 == '=') { adv = 2; punct = P_AND_ASSIGN; } + else { adv = 1; punct = '&'; } + break; + case '|': + if (n1 == '|') { adv = 2; punct = P_OR; } + else if (n1 == '=') { adv = 2; punct = P_OR_ASSIGN; } + else { adv = 1; punct = '|'; } + break; + case '^': + if (n1 == '=') { adv = 2; punct = P_XOR_ASSIGN; } + else { adv = 1; punct = '^'; } + break; + case '*': + if (n1 == '=') { adv = 2; punct = P_MUL_ASSIGN; } + else { adv = 1; punct = '*'; } + break; + case '/': + if (n1 == '=') { adv = 2; punct = P_DIV_ASSIGN; } + else { adv = 1; punct = '/'; } + break; + case '%': + if (n1 == ':' && n2 == '%' && n3 == ':') { adv = 4; kind = TOK_PP_PASTE; punct = P_HASH_HASH; } + else if (n1 == ':') { adv = 2; kind = TOK_PP_HASH; punct = '#'; } + else if (n1 == '=') { adv = 2; punct = P_MOD_ASSIGN; } + else if (n1 == '>') { adv = 2; punct = '}'; } /* digraph */ + else { adv = 1; punct = '%'; } + break; + case ':': + if (n1 == '>') { adv = 2; punct = ']'; } /* digraph */ + else { adv = 1; punct = ':'; } + break; + case '(': case ')': case '{': case '}': case '[': case ']': + case ',': case ';': case '?': case '~': + adv = 1; punct = (u32)n0; + break; + default: + /* Unknown byte. Surface as a single-char punct so the token + * stream still progresses; PP/parse may diagnose. */ + adv = 1; punct = (u32)n0; + break; + } + + for (i = 0; i < adv; ++i) bump(l); + t.kind = kind; + t.v.punct = punct; + t.spelling = pool_intern(l->pool, l->src + start, l->pos - start); + return t; + } +} diff --git a/test/lex/cases/basic_punct.c b/test/lex/cases/basic_punct.c @@ -0,0 +1,3 @@ +int main(void) { + return x->y; +} diff --git a/test/lex/cases/basic_punct.expected b/test/lex/cases/basic_punct.expected @@ -0,0 +1,16 @@ +(ident int) +(ident main) +(punct () +(ident void) +(punct )) +(punct {) +(newline) +(ident return) +(ident x) +(punct ->) +(ident y) +(punct ;) +(newline) +(punct }) +(newline) +(eof) diff --git a/test/lex/cases/comment_edges.c b/test/lex/cases/comment_edges.c @@ -0,0 +1,12 @@ +/* /* */ x */ +/* / */ y +/* * */ z +/* foo **/ w +/* a *//* b */ c +/* /// not line comment */ k +/* "looks like string" */ m +/*****/ +/**//**/ +/* x + y + z */ n diff --git a/test/lex/cases/comment_edges.expected b/test/lex/cases/comment_edges.expected @@ -0,0 +1,21 @@ +(ident x) +(punct *) +(punct /) +(newline) +(ident y) +(newline) +(ident z) +(newline) +(ident w) +(newline) +(ident c) +(newline) +(ident k) +(newline) +(ident m) +(newline) +(newline) +(newline) +(ident n) +(newline) +(eof) diff --git a/test/lex/cases/comments.c b/test/lex/cases/comments.c @@ -0,0 +1,15 @@ +// just a line comment +int x; /* block */ int y; +/* multi + line + line */ +int z; +a /**/ b +c // tail comment +d /* with * and / inside */ e +// "string-like" text and /* nested-looking */ stays comment +f /* contains // line-comment chars */ g +h/**/i +j/* one *//* two */k +/*/ slash-star-slash content */ m +// no trailing newline at EOF +\ No newline at end of file diff --git a/test/lex/cases/comments.expected b/test/lex/cases/comments.expected @@ -0,0 +1,34 @@ +(newline) +(ident int) +(ident x) +(punct ;) +(ident int) +(ident y) +(punct ;) +(newline) +(newline) +(ident int) +(ident z) +(punct ;) +(newline) +(ident a) +(ident b) +(newline) +(ident c) +(newline) +(ident d) +(ident e) +(newline) +(newline) +(ident f) +(ident g) +(newline) +(ident h) +(ident i) +(newline) +(ident j) +(ident k) +(newline) +(ident m) +(newline) +(eof) diff --git a/test/lex/cases/empty.c b/test/lex/cases/empty.c diff --git a/test/lex/cases/empty.expected b/test/lex/cases/empty.expected @@ -0,0 +1 @@ +(eof) diff --git a/test/lex/cases/float_constants.c b/test/lex/cases/float_constants.c @@ -0,0 +1,34 @@ +1.0 +1. +.1 +0.0 +3.14 +1e0 +1E10 +1e+1 +1e-1 +1.5e+2 +1.5e-2 +.5e10 +1.e10 +1.E-5 +.0 +.0L +1.0f +1.0F +1.0l +1.0L +2.5f +6.022e23L +1.0Lf +0x1p0 +0x1P0 +0X1P0 +0x1p+0 +0x1P-2 +0xFp-1 +0x1.8p+1 +0x.8p1 +0xA.Bp+3 +0x1.8 +0x1. diff --git a/test/lex/cases/float_constants.expected b/test/lex/cases/float_constants.expected @@ -0,0 +1,69 @@ +(flt 1.0) +(newline) +(flt 1.) +(newline) +(flt .1) +(newline) +(flt 0.0) +(newline) +(flt 3.14) +(newline) +(flt 1e0) +(newline) +(flt 1E10) +(newline) +(flt 1e+1) +(newline) +(flt 1e-1) +(newline) +(flt 1.5e+2) +(newline) +(flt 1.5e-2) +(newline) +(flt .5e10) +(newline) +(flt 1.e10) +(newline) +(flt 1.E-5) +(newline) +(flt .0) +(newline) +(flt .0L) +(newline) +(flt 1.0f) +(newline) +(flt 1.0F) +(newline) +(flt 1.0l) +(newline) +(flt 1.0L) +(newline) +(flt 2.5f) +(newline) +(flt 6.022e23L) +(newline) +(flt 1.0Lf) +(newline) +(flt 0x1p0) +(newline) +(flt 0x1P0) +(newline) +(flt 0X1P0) +(newline) +(flt 0x1p+0) +(newline) +(flt 0x1P-2) +(newline) +(flt 0xFp-1) +(newline) +(flt 0x1.8p+1) +(newline) +(flt 0x.8p1) +(newline) +(flt 0xA.Bp+3) +(newline) +(flt 0x1.8) +(newline) +(flt 0x1.) +(newline) +(eof) diff --git a/test/lex/cases/identifiers.c b/test/lex/cases/identifiers.c @@ -0,0 +1,31 @@ +foo +_bar +__baz +foo123 +A1B2C3 +_ +__ +___ +a +Z +_0 +_9 +__func__ +__LINE__ +__FILE__ +camelCase +PascalCase +SHOUTY_SNAKE +_private99 +mixed_Case_42 +_1_2_3 +abc_def_ghi +x0 +x1y2z3_ +naïve +λ_func +café +éstart +caf\u00e9 +\u00e9start +with\U0001F600paste diff --git a/test/lex/cases/identifiers.expected b/test/lex/cases/identifiers.expected @@ -0,0 +1,63 @@ +(ident foo) +(newline) +(ident _bar) +(newline) +(ident __baz) +(newline) +(ident foo123) +(newline) +(ident A1B2C3) +(newline) +(ident _) +(newline) +(ident __) +(newline) +(ident ___) +(newline) +(ident a) +(newline) +(ident Z) +(newline) +(ident _0) +(newline) +(ident _9) +(newline) +(ident __func__) +(newline) +(ident __LINE__) +(newline) +(ident __FILE__) +(newline) +(ident camelCase) +(newline) +(ident PascalCase) +(newline) +(ident SHOUTY_SNAKE) +(newline) +(ident _private99) +(newline) +(ident mixed_Case_42) +(newline) +(ident _1_2_3) +(newline) +(ident abc_def_ghi) +(newline) +(ident x0) +(newline) +(ident x1y2z3_) +(newline) +(ident naïve) +(newline) +(ident λ_func) +(newline) +(ident café) +(newline) +(ident éstart) +(newline) +(ident caf\u00e9) +(newline) +(ident \u00e9start) +(newline) +(ident with\U0001F600paste) +(newline) +(eof) diff --git a/test/lex/cases/int_constants.c b/test/lex/cases/int_constants.c @@ -0,0 +1,49 @@ +0 +1 +123 +2147483648 +00 +0755 +01234567 +0x0 +0X0 +0xFF +0X1AbC +0xDEADBEEF +1u +1U +1l +1L +1ll +1LL +1ul +1uL +1Ul +1UL +1lu +1Lu +1lU +1LU +1ull +1uLL +1Ull +1ULL +1llu +1LLu +1llU +1LLU +0xFFu +0755L +0xFFFFFFFFULL +1lL +1Ll +1lll +1LLL +1ufL +0b1010ULL +123abc +0xGHI +0x +0X +077u8 +0Lu8 diff --git a/test/lex/cases/int_constants.expected b/test/lex/cases/int_constants.expected @@ -0,0 +1,99 @@ +(num 0) +(newline) +(num 1) +(newline) +(num 123) +(newline) +(num 2147483648) +(newline) +(num 00) +(newline) +(num 0755) +(newline) +(num 01234567) +(newline) +(num 0x0) +(newline) +(num 0X0) +(newline) +(num 0xFF) +(newline) +(num 0X1AbC) +(newline) +(num 0xDEADBEEF) +(newline) +(num 1u) +(newline) +(num 1U) +(newline) +(num 1l) +(newline) +(num 1L) +(newline) +(num 1ll) +(newline) +(num 1LL) +(newline) +(num 1ul) +(newline) +(num 1uL) +(newline) +(num 1Ul) +(newline) +(num 1UL) +(newline) +(num 1lu) +(newline) +(num 1Lu) +(newline) +(num 1lU) +(newline) +(num 1LU) +(newline) +(num 1ull) +(newline) +(num 1uLL) +(newline) +(num 1Ull) +(newline) +(num 1ULL) +(newline) +(num 1llu) +(newline) +(num 1LLu) +(newline) +(num 1llU) +(newline) +(num 1LLU) +(newline) +(num 0xFFu) +(newline) +(num 0755L) +(newline) +(num 0xFFFFFFFFULL) +(newline) +(num 1lL) +(newline) +(num 1Ll) +(newline) +(num 1lll) +(newline) +(num 1LLL) +(newline) +(num 1ufL) +(newline) +(num 0b1010ULL) +(newline) +(num 123abc) +(newline) +(num 0xGHI) +(newline) +(num 0x) +(newline) +(num 0X) +(newline) +(num 077u8) +(newline) +(num 0Lu8) +(newline) +(eof) diff --git a/test/lex/cases/keywords.c b/test/lex/cases/keywords.c @@ -0,0 +1,8 @@ +auto break case char const continue default do double +else enum extern float for goto if inline int long +register restrict return short signed sizeof static struct +switch typedef union unsigned void volatile while +_Alignas _Alignof _Atomic _Bool _Complex _Generic +_Imaginary _Noreturn _Static_assert _Thread_local +INT Int iNT _alignas _alignof _atomic _bool +ints intt return0 returns ifx whilez forX gotoo diff --git a/test/lex/cases/keywords.expected b/test/lex/cases/keywords.expected @@ -0,0 +1,68 @@ +(ident auto) +(ident break) +(ident case) +(ident char) +(ident const) +(ident continue) +(ident default) +(ident do) +(ident double) +(newline) +(ident else) +(ident enum) +(ident extern) +(ident float) +(ident for) +(ident goto) +(ident if) +(ident inline) +(ident int) +(ident long) +(newline) +(ident register) +(ident restrict) +(ident return) +(ident short) +(ident signed) +(ident sizeof) +(ident static) +(ident struct) +(newline) +(ident switch) +(ident typedef) +(ident union) +(ident unsigned) +(ident void) +(ident volatile) +(ident while) +(newline) +(ident _Alignas) +(ident _Alignof) +(ident _Atomic) +(ident _Bool) +(ident _Complex) +(ident _Generic) +(newline) +(ident _Imaginary) +(ident _Noreturn) +(ident _Static_assert) +(ident _Thread_local) +(newline) +(ident INT) +(ident Int) +(ident iNT) +(ident _alignas) +(ident _alignof) +(ident _atomic) +(ident _bool) +(newline) +(ident ints) +(ident intt) +(ident return0) +(ident returns) +(ident ifx) +(ident whilez) +(ident forX) +(ident gotoo) +(newline) +(eof) diff --git a/test/lex/cases/line_splice.c b/test/lex/cases/line_splice.c @@ -0,0 +1,25 @@ +he\ +llo +"foo\ +bar" +12\ +345 +0x1\ +2.3\ +p+\ +4 +in\ +t ma\ +in +// foo\ +still +end +/* block\ +comment */ y +a<\ +<b +str\ +1 +\ +\ +trailing diff --git a/test/lex/cases/line_splice.expected b/test/lex/cases/line_splice.expected @@ -0,0 +1,25 @@ +(ident hello) +(newline) +(str "foobar") +(newline) +(num 12345) +(newline) +(flt 0x12.3p+4) +(newline) +(ident int) +(ident main) +(newline) +(newline) +(ident end) +(newline) +(ident y) +(newline) +(ident a) +(punct <<) +(ident b) +(newline) +(ident str1) +(newline) +(ident trailing) +(newline) +(eof) diff --git a/test/lex/cases/maximal_munch.c b/test/lex/cases/maximal_munch.c @@ -0,0 +1,21 @@ ++++a +a---b +x<<=y +x>>=y +a<<b +a>>b +p->q +i==j +i!=j +i<=j +i>=j +&&|| +...x +..x +.x +x+++++y +<::> +<::: +<%%> +%:%: +%:% diff --git a/test/lex/cases/maximal_munch.expected b/test/lex/cases/maximal_munch.expected @@ -0,0 +1,80 @@ +(punct ++) +(punct +) +(ident a) +(newline) +(ident a) +(punct --) +(punct -) +(ident b) +(newline) +(ident x) +(punct <<=) +(ident y) +(newline) +(ident x) +(punct >>=) +(ident y) +(newline) +(ident a) +(punct <<) +(ident b) +(newline) +(ident a) +(punct >>) +(ident b) +(newline) +(ident p) +(punct ->) +(ident q) +(newline) +(ident i) +(punct ==) +(ident j) +(newline) +(ident i) +(punct !=) +(ident j) +(newline) +(ident i) +(punct <=) +(ident j) +(newline) +(ident i) +(punct >=) +(ident j) +(newline) +(punct &&) +(punct ||) +(newline) +(punct ...) +(ident x) +(newline) +(punct .) +(punct .) +(ident x) +(newline) +(punct .) +(ident x) +(newline) +(ident x) +(punct ++) +(punct ++) +(punct +) +(ident y) +(newline) +(punct <:) +(punct :>) +(newline) +(punct <:) +(punct :) +(punct :) +(newline) +(punct <%) +(punct %>) +(newline) +(pp-paste) +(newline) +(pp-hash) +(punct %) +(newline) +(eof) diff --git a/test/lex/cases/pp_directives.c b/test/lex/cases/pp_directives.c @@ -0,0 +1,21 @@ +#include "bar.h" +#include <foo.h> +#define MAX 100 +#define ID(x) x +#define STR(x) #x +#define CAT(a, b) a ## b +#undef MAX +#ifdef X +#endif +#ifndef Y +#else +#endif +#if 1 + 2 +#elif 3 +#endif +#error msg +#pragma once +#line 42 "f.c" +# +%:include "x.h" +a %:%: b diff --git a/test/lex/cases/pp_directives.expected b/test/lex/cases/pp_directives.expected @@ -0,0 +1,100 @@ +(pp-hash) +(ident include) +(header "bar.h") +(newline) +(pp-hash) +(ident include) +(header <foo.h>) +(newline) +(pp-hash) +(ident define) +(ident MAX) +(num 100) +(newline) +(pp-hash) +(ident define) +(ident ID) +(punct () +(ident x) +(punct )) +(ident x) +(newline) +(pp-hash) +(ident define) +(ident STR) +(punct () +(ident x) +(punct )) +(pp-hash) +(ident x) +(newline) +(pp-hash) +(ident define) +(ident CAT) +(punct () +(ident a) +(punct ,) +(ident b) +(punct )) +(ident a) +(pp-paste) +(ident b) +(newline) +(pp-hash) +(ident undef) +(ident MAX) +(newline) +(pp-hash) +(ident ifdef) +(ident X) +(newline) +(pp-hash) +(ident endif) +(newline) +(pp-hash) +(ident ifndef) +(ident Y) +(newline) +(pp-hash) +(ident else) +(newline) +(pp-hash) +(ident endif) +(newline) +(pp-hash) +(ident if) +(num 1) +(punct +) +(num 2) +(newline) +(pp-hash) +(ident elif) +(num 3) +(newline) +(pp-hash) +(ident endif) +(newline) +(pp-hash) +(ident error) +(ident msg) +(newline) +(pp-hash) +(ident pragma) +(ident once) +(newline) +(pp-hash) +(ident line) +(num 42) +(str "f.c") +(newline) +(pp-hash) +(newline) +(pp-hash) +(ident include) +(header "x.h") +(newline) +(ident a) +(pp-paste) +(ident b) +(newline) +(eof) diff --git a/test/lex/cases/pp_numbers.c b/test/lex/cases/pp_numbers.c @@ -0,0 +1,29 @@ +0..1 +1...3 +.5. +.5..6 +1e+1e+1 +123abc +0xGHI +1ea +1e+x +1e+ +1.e +.5e +0xAp +0xAp+ +0xAp+x +. +.x +. 5 +.5 +1... +1.2.3.4 +0xA.Bp+3 +0xFFp+2.5 +99e +99e+ +99e9 +99E-9 +1_underscore +3.14_pi diff --git a/test/lex/cases/pp_numbers.expected b/test/lex/cases/pp_numbers.expected @@ -0,0 +1,61 @@ +(flt 0..1) +(newline) +(flt 1...3) +(newline) +(flt .5.) +(newline) +(flt .5..6) +(newline) +(flt 1e+1e+1) +(newline) +(num 123abc) +(newline) +(num 0xGHI) +(newline) +(num 1ea) +(newline) +(flt 1e+x) +(newline) +(flt 1e+) +(newline) +(flt 1.e) +(newline) +(flt .5e) +(newline) +(flt 0xAp) +(newline) +(flt 0xAp+) +(newline) +(flt 0xAp+x) +(newline) +(punct .) +(newline) +(punct .) +(ident x) +(newline) +(punct .) +(num 5) +(newline) +(flt .5) +(newline) +(flt 1...) +(newline) +(flt 1.2.3.4) +(newline) +(flt 0xA.Bp+3) +(newline) +(flt 0xFFp+2.5) +(newline) +(num 99e) +(newline) +(flt 99e+) +(newline) +(flt 99e9) +(newline) +(flt 99E-9) +(newline) +(num 1_underscore) +(newline) +(flt 3.14_pi) +(newline) +(eof) diff --git a/test/lex/cases/pp_passthrough.c b/test/lex/cases/pp_passthrough.c @@ -0,0 +1,2 @@ +#define X 1 +#include "foo.h" diff --git a/test/lex/cases/pp_passthrough.expected b/test/lex/cases/pp_passthrough.expected @@ -0,0 +1,10 @@ +(pp-hash) +(ident define) +(ident X) +(num 1) +(newline) +(pp-hash) +(ident include) +(header "foo.h") +(newline) +(eof) diff --git a/test/lex/cases/punctuators.c b/test/lex/cases/punctuators.c @@ -0,0 +1,10 @@ +[ ] ( ) { } . -> +++ -- & * + - ~ ! +/ % << >> < > <= >= == != ^ | && || +? : ; ... += *= /= %= += -= <<= >>= &= ^= |= +, +# ## +<: :> <% %> %: %:%: +<:a:> +<%b%> diff --git a/test/lex/cases/punctuators.expected b/test/lex/cases/punctuators.expected @@ -0,0 +1,71 @@ +(punct [) +(punct ]) +(punct () +(punct )) +(punct {) +(punct }) +(punct .) +(punct ->) +(newline) +(punct ++) +(punct --) +(punct &) +(punct *) +(punct +) +(punct -) +(punct ~) +(punct !) +(newline) +(punct /) +(punct %) +(punct <<) +(punct >>) +(punct <) +(punct >) +(punct <=) +(punct >=) +(punct ==) +(punct !=) +(punct ^) +(punct |) +(punct &&) +(punct ||) +(newline) +(punct ?) +(punct :) +(punct ;) +(punct ...) +(newline) +(punct =) +(punct *=) +(punct /=) +(punct %=) +(punct +=) +(punct -=) +(punct <<=) +(punct >>=) +(punct &=) +(punct ^=) +(punct |=) +(newline) +(punct ,) +(newline) +(pp-hash) +(pp-paste) +(newline) +(punct <:) +(punct :>) +(punct <%) +(punct %>) +(pp-hash) +(pp-paste) +(newline) +(punct <:) +(ident a) +(punct :>) +(newline) +(punct <%) +(ident b) +(punct %>) +(newline) +(eof) diff --git a/test/lex/cases/string_escapes.c b/test/lex/cases/string_escapes.c @@ -0,0 +1,38 @@ +'\'' +'\"' +'\?' +'\\' +'\a' +'\b' +'\f' +'\n' +'\r' +'\t' +'\v' +'\0' +'\7' +'\077' +'\377' +'\x0' +'\x41' +'\xff' +'\xfff' +'\U0001F600' +"\a\b\f\n\r\t\v\"\'\\\?" +"\0\7\077\377" +"\x0\x41\xff\xfff" +"\\" +"\"" +"a\nb" +"tab\there" +L"\n" +u8"\xff" +u"é" +U"\U0001F600" +'\18' +'\779' +"\1234" +"\xffG" +"\xabc\x12" +'é' +"é " diff --git a/test/lex/cases/string_escapes.expected b/test/lex/cases/string_escapes.expected @@ -0,0 +1,77 @@ +(chr '\'') +(newline) +(chr '\"') +(newline) +(chr '\?') +(newline) +(chr '\\') +(newline) +(chr '\a') +(newline) +(chr '\b') +(newline) +(chr '\f') +(newline) +(chr '\n') +(newline) +(chr '\r') +(newline) +(chr '\t') +(newline) +(chr '\v') +(newline) +(chr '\0') +(newline) +(chr '\7') +(newline) +(chr '\077') +(newline) +(chr '\377') +(newline) +(chr '\x0') +(newline) +(chr '\x41') +(newline) +(chr '\xff') +(newline) +(chr '\xfff') +(newline) +(chr '\U0001F600') +(newline) +(str "\a\b\f\n\r\t\v\"\'\\\?") +(newline) +(str "\0\7\077\377") +(newline) +(str "\x0\x41\xff\xfff") +(newline) +(str "\\") +(newline) +(str "\"") +(newline) +(str "a\nb") +(newline) +(str "tab\there") +(newline) +(str L"\n") +(newline) +(str u8"\xff") +(newline) +(str u"é") +(newline) +(str U"\U0001F600") +(newline) +(chr '\18') +(newline) +(chr '\779') +(newline) +(str "\1234") +(newline) +(str "\xffG") +(newline) +(str "\xabc\x12") +(newline) +(chr 'é') +(newline) +(str "é ") +(newline) +(eof) diff --git a/test/lex/cases/strings_chars.c b/test/lex/cases/strings_chars.c @@ -0,0 +1,23 @@ +"" +"hello" +'a' +'0' +' ' +'ab' +"a" "b" "c" +"L" +"u8" +"u" +"U" +L"wide" +u8"utf8" +u"u16" +U"u32" +L'w' +u'A' +U'B' +u8'a' +L "x" +L"x" +'À' +"À\U0001F600" diff --git a/test/lex/cases/strings_chars.expected b/test/lex/cases/strings_chars.expected @@ -0,0 +1,51 @@ +(str "") +(newline) +(str "hello") +(newline) +(chr 'a') +(newline) +(chr '0') +(newline) +(chr ' ') +(newline) +(chr 'ab') +(newline) +(str "a") +(str "b") +(str "c") +(newline) +(str "L") +(newline) +(str "u8") +(newline) +(str "u") +(newline) +(str "U") +(newline) +(str L"wide") +(newline) +(str u8"utf8") +(newline) +(str u"u16") +(newline) +(str U"u32") +(newline) +(chr L'w') +(newline) +(chr u'A') +(newline) +(chr U'B') +(newline) +(ident u8) +(chr 'a') +(newline) +(ident L) +(str "x") +(newline) +(str L"x") +(newline) +(chr 'À') +(newline) +(str "À\U0001F600") +(newline) +(eof) diff --git a/test/lex/run.sh b/test/lex/run.sh @@ -0,0 +1,66 @@ +#!/bin/sh +# Data-driven lexer test runner. +# +# For each test/lex/cases/*.c, runs `cfree cc --dump-tokens` and diffs the +# output against the matching .expected file. Leaves .actual files behind on +# failure so they can be reviewed and copied over the expected baseline once +# intentional output changes are validated. +# +# Honors $CFREE for the binary path; defaults to build/cfree relative to the +# repo root inferred from this script's location. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) +cases_dir="$script_dir/cases" + +CFREE="${CFREE:-$repo_root/build/cfree}" + +if [ ! -x "$CFREE" ]; then + echo "lex: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +pass=0 +fail=0 +failures= + +for src in "$cases_dir"/*.c; do + [ -e "$src" ] || continue + expected="${src%.c}.expected" + actual="${src%.c}.actual" + name=$(basename "${src%.c}") + + if [ ! -e "$expected" ]; then + printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if ! "$CFREE" cc --dump-tokens "$src" -o "$actual" >/dev/null 2>&1; then + printf 'FAIL %s (cfree exit nonzero; see %s)\n' "$name" "$(basename "$actual")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if diff -u "$expected" "$actual" >/dev/null 2>&1; then + printf 'PASS %s\n' "$name" + rm -f "$actual" + pass=$((pass + 1)) + else + printf 'FAIL %s\n' "$name" + diff -u "$expected" "$actual" || true + fail=$((fail + 1)) + failures="$failures $name" + fi +done + +total=$((pass + fail)) +printf '\nlex: %d/%d passed\n' "$pass" "$total" +if [ "$fail" -gt 0 ]; then + printf 'lex: failures:%s\n' "$failures" + exit 1 +fi diff --git a/test/pp/cases/define_function.actual b/test/pp/cases/define_function.actual diff --git a/test/pp/cases/define_function.c b/test/pp/cases/define_function.c @@ -0,0 +1,2 @@ +#define ADD(a, b) ((a) + (b)) +ADD(1, 2 * 3) diff --git a/test/pp/cases/define_function.expected b/test/pp/cases/define_function.expected @@ -0,0 +1 @@ +((1) + (2 * 3)) diff --git a/test/pp/cases/define_object.actual b/test/pp/cases/define_object.actual diff --git a/test/pp/cases/define_object.c b/test/pp/cases/define_object.c @@ -0,0 +1,2 @@ +#define X 42 +X X X diff --git a/test/pp/cases/define_object.expected b/test/pp/cases/define_object.expected @@ -0,0 +1 @@ +42 42 42 diff --git a/test/pp/cases/ifdef_basic.actual b/test/pp/cases/ifdef_basic.actual diff --git a/test/pp/cases/ifdef_basic.c b/test/pp/cases/ifdef_basic.c @@ -0,0 +1,11 @@ +#define KEEP 1 +#ifdef KEEP +chosen +#else +skipped +#endif +#ifndef MISSING +also_chosen +#else +also_skipped +#endif diff --git a/test/pp/cases/ifdef_basic.expected b/test/pp/cases/ifdef_basic.expected @@ -0,0 +1,2 @@ +chosen +also_chosen diff --git a/test/pp/cases/include_local.actual b/test/pp/cases/include_local.actual diff --git a/test/pp/cases/include_local.c b/test/pp/cases/include_local.c @@ -0,0 +1,2 @@ +#include "include_local.h" +FROM_HEADER diff --git a/test/pp/cases/include_local.expected b/test/pp/cases/include_local.expected @@ -0,0 +1,2 @@ +header_token +7 diff --git a/test/pp/cases/include_local.h b/test/pp/cases/include_local.h @@ -0,0 +1,2 @@ +#define FROM_HEADER 7 +header_token diff --git a/test/pp/cases/undef.actual b/test/pp/cases/undef.actual diff --git a/test/pp/cases/undef.c b/test/pp/cases/undef.c @@ -0,0 +1,6 @@ +#define X 1 +X +#undef X +X +#define X 2 +X diff --git a/test/pp/cases/undef.expected b/test/pp/cases/undef.expected @@ -0,0 +1,3 @@ +1 +X +2 diff --git a/test/pp/run.sh b/test/pp/run.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# Data-driven preprocessor test runner. +# +# For each test/pp/cases/*.c, runs `cfree cc -E` (with -I pointing at the +# cases dir so sibling headers resolve) and diffs the output against the +# matching .expected file. Leaves .actual files behind on failure so they +# can be reviewed and copied over the expected baseline once intentional +# output changes are validated. +# +# Honors $CFREE for the binary path; defaults to build/cfree relative to the +# repo root inferred from this script's location. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) +cases_dir="$script_dir/cases" + +CFREE="${CFREE:-$repo_root/build/cfree}" + +if [ ! -x "$CFREE" ]; then + echo "pp: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +pass=0 +fail=0 +failures= + +for src in "$cases_dir"/*.c; do + [ -e "$src" ] || continue + expected="${src%.c}.expected" + actual="${src%.c}.actual" + name=$(basename "${src%.c}") + + if [ ! -e "$expected" ]; then + printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if ! "$CFREE" cc -E -I "$cases_dir" "$src" -o "$actual" >/dev/null 2>&1; then + printf 'FAIL %s (cfree exit nonzero; see %s)\n' "$name" "$(basename "$actual")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if diff -u "$expected" "$actual" >/dev/null 2>&1; then + printf 'PASS %s\n' "$name" + rm -f "$actual" + pass=$((pass + 1)) + else + printf 'FAIL %s\n' "$name" + diff -u "$expected" "$actual" || true + fail=$((fail + 1)) + failures="$failures $name" + fi +done + +total=$((pass + fail)) +printf '\npp: %d/%d passed\n' "$pass" "$total" +if [ "$fail" -gt 0 ]; then + printf 'pp: failures:%s\n' "$failures" + exit 1 +fi diff --git a/test/test.mk b/test/test.mk @@ -0,0 +1,41 @@ +# Data-driven tests. Included from the top-level Makefile. +# +# - test-lex / test-pp: C frontend runners; depend on the cfree driver +# binary, which today fails to link (most of libcfree is header-only). +# - test-elf: ELF roundtrip harness in test/elf/; depends only on +# libcfree.a and compiles its own test binaries against it. Skipped +# layers are reported (set CFREE_TEST_ALLOW_SKIP=1 to allow skips). +# - test-ar: in-process ar reader/writer tests; depends only on +# libcfree.a. Set CFREE_AR_TEST_HOST=1 to also dump produced bytes +# to /tmp and run the host's `ar t` / `nm --print-armap` as a +# cross-check. + +.PHONY: test test-lex test-pp test-elf test-ar + +test: test-lex test-pp test-elf test-ar + +test-lex: bin + @CFREE=$(BIN) test/lex/run.sh + +test-pp: bin + @CFREE=$(BIN) test/pp/run.sh + +test-elf: lib bin-soft + bash test/elf/run.sh + +# Best-effort cfree binary build: Layer D needs build/cfree, but the +# binary may not link until enough libcfree symbols exist. The harness +# detects a missing binary and skips that layer; don't break test-elf +# when bin fails. +.PHONY: bin-soft +bin-soft: + -@$(MAKE) bin 2>/dev/null || true + +AR_TEST_BIN = build/test/ar_test + +test-ar: $(AR_TEST_BIN) + $(AR_TEST_BIN) + +$(AR_TEST_BIN): test/ar_test.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(DRIVER_CFLAGS) test/ar_test.c $(LIB_AR) -o $@