lex.h (2630B)
1 #ifndef KIT_LEX_H 2 #define KIT_LEX_H 3 4 #include "cpp_support.h" 5 6 typedef enum TokKind { 7 TOK_EOF = 0, 8 TOK_IDENT, /* v.ident */ 9 TOK_NUM, /* lit */ 10 TOK_FLT, /* lit */ 11 TOK_STR, /* lit; v.str is decoded bytes if target-independent */ 12 TOK_CHR, /* lit */ 13 TOK_PUNCT, /* v.punct */ 14 TOK_PP_HASH, /* # */ 15 TOK_PP_PASTE, /* ## */ 16 TOK_HEADER, /* header-name in #include / #embed */ 17 TOK_NEWLINE, /* visible to PP only */ 18 TOK_KW_FIRST, 19 /* C11 keywords are inserted into this range by parse_c via pool */ 20 TOK_KW_LAST = 0x1000, 21 } TokKind; 22 23 typedef enum TokFlag { 24 TF_AT_BOL = 1u << 0, 25 TF_HAS_SPACE = 1u << 1, 26 TF_NO_EXPAND = 1u << 2, 27 TF_INT_U = 1u << 3, 28 TF_INT_L = 1u << 4, 29 TF_INT_LL = 1u << 5, 30 TF_FLT_F = 1u << 6, 31 TF_FLT_L = 1u << 7, 32 TF_STR_WIDE = 1u << 8, 33 TF_STR_U8 = 1u << 9, 34 TF_STR_U16 = 1u << 10, 35 TF_STR_U32 = 1u << 11, 36 TF_LITERAL_BAD = 1u << 12, 37 } TokFlag; 38 39 typedef enum Punct { 40 P_NONE = 0, 41 /* Single-char punctuators reuse their ASCII codepoint here. */ 42 P_ARROW = 256, 43 P_INC, 44 P_DEC, 45 P_SHL, 46 P_SHR, 47 P_LE, 48 P_GE, 49 P_EQ, 50 P_NE, 51 P_AND, 52 P_OR, 53 P_ADD_ASSIGN, 54 P_SUB_ASSIGN, 55 P_MUL_ASSIGN, 56 P_DIV_ASSIGN, 57 P_MOD_ASSIGN, 58 P_AND_ASSIGN, 59 P_OR_ASSIGN, 60 P_XOR_ASSIGN, 61 P_SHL_ASSIGN, 62 P_SHR_ASSIGN, 63 P_ELLIPSIS, 64 P_HASH_HASH, 65 } Punct; 66 67 typedef struct Tok { 68 u16 kind; 69 u16 flags; 70 SrcLoc loc; 71 Sym spelling; /* exact token spelling for diagnostics/#/## */ 72 union { 73 Sym ident; 74 Sym str; 75 u32 punct; 76 } v; 77 } Tok; 78 79 typedef struct Lexer Lexer; 80 81 /* lex_open_mem borrows (src, len). The lexer does not copy source bytes; 82 * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics 83 * and the preprocessor's directive scanner read from the borrowed buffer. 84 * 85 * Ownership: a Lexer that has been handed to pp_push_input is owned by PP 86 * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call 87 * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly). 88 * 89 * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed 90 * Lexer means outliving pp_free. */ 91 Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); 92 void lex_close(Lexer*); 93 94 /* Skip a leading `#!` script-interpreter ("shebang") line so an executable 95 * C file run via `kit run` lexes cleanly. Call only on a freshly-opened 96 * primary source lexer, before any token is pulled; no-op otherwise. */ 97 void lex_skip_shebang(Lexer*); 98 99 /* Streaming. Returns TOK_EOF repeatedly at end of input. */ 100 Tok lex_next(Lexer*); 101 SrcLoc lex_loc(const Lexer*); 102 u32 lex_file_id(const Lexer*); 103 104 #endif