kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

lex.h (2630B)


      1 #ifndef KIT_LEX_H
      2 #define KIT_LEX_H
      3 
      4 #include "cpp_support.h"
      5 
      6 typedef enum TokKind {
      7   TOK_EOF = 0,
      8   TOK_IDENT,    /* v.ident */
      9   TOK_NUM,      /* lit */
     10   TOK_FLT,      /* lit */
     11   TOK_STR,      /* lit; v.str is decoded bytes if target-independent */
     12   TOK_CHR,      /* lit */
     13   TOK_PUNCT,    /* v.punct */
     14   TOK_PP_HASH,  /* # */
     15   TOK_PP_PASTE, /* ## */
     16   TOK_HEADER,   /* header-name in #include / #embed */
     17   TOK_NEWLINE,  /* visible to PP only */
     18   TOK_KW_FIRST,
     19   /* C11 keywords are inserted into this range by parse_c via pool */
     20   TOK_KW_LAST = 0x1000,
     21 } TokKind;
     22 
     23 typedef enum TokFlag {
     24   TF_AT_BOL = 1u << 0,
     25   TF_HAS_SPACE = 1u << 1,
     26   TF_NO_EXPAND = 1u << 2,
     27   TF_INT_U = 1u << 3,
     28   TF_INT_L = 1u << 4,
     29   TF_INT_LL = 1u << 5,
     30   TF_FLT_F = 1u << 6,
     31   TF_FLT_L = 1u << 7,
     32   TF_STR_WIDE = 1u << 8,
     33   TF_STR_U8 = 1u << 9,
     34   TF_STR_U16 = 1u << 10,
     35   TF_STR_U32 = 1u << 11,
     36   TF_LITERAL_BAD = 1u << 12,
     37 } TokFlag;
     38 
     39 typedef enum Punct {
     40   P_NONE = 0,
     41   /* Single-char punctuators reuse their ASCII codepoint here. */
     42   P_ARROW = 256,
     43   P_INC,
     44   P_DEC,
     45   P_SHL,
     46   P_SHR,
     47   P_LE,
     48   P_GE,
     49   P_EQ,
     50   P_NE,
     51   P_AND,
     52   P_OR,
     53   P_ADD_ASSIGN,
     54   P_SUB_ASSIGN,
     55   P_MUL_ASSIGN,
     56   P_DIV_ASSIGN,
     57   P_MOD_ASSIGN,
     58   P_AND_ASSIGN,
     59   P_OR_ASSIGN,
     60   P_XOR_ASSIGN,
     61   P_SHL_ASSIGN,
     62   P_SHR_ASSIGN,
     63   P_ELLIPSIS,
     64   P_HASH_HASH,
     65 } Punct;
     66 
     67 typedef struct Tok {
     68   u16 kind;
     69   u16 flags;
     70   SrcLoc loc;
     71   Sym spelling; /* exact token spelling for diagnostics/#/## */
     72   union {
     73     Sym ident;
     74     Sym str;
     75     u32 punct;
     76   } v;
     77 } Tok;
     78 
     79 typedef struct Lexer Lexer;
     80 
     81 /* lex_open_mem borrows (src, len). The lexer does not copy source bytes;
     82  * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics
     83  * and the preprocessor's directive scanner read from the borrowed buffer.
     84  *
     85  * Ownership: a Lexer that has been handed to pp_push_input is owned by PP
     86  * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call
     87  * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly).
     88  *
     89  * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed
     90  * Lexer means outliving pp_free. */
     91 Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len);
     92 void lex_close(Lexer*);
     93 
     94 /* Skip a leading `#!` script-interpreter ("shebang") line so an executable
     95  * C file run via `kit run` lexes cleanly. Call only on a freshly-opened
     96  * primary source lexer, before any token is pulled; no-op otherwise. */
     97 void lex_skip_shebang(Lexer*);
     98 
     99 /* Streaming. Returns TOK_EOF repeatedly at end of input. */
    100 Tok lex_next(Lexer*);
    101 SrcLoc lex_loc(const Lexer*);
    102 u32 lex_file_id(const Lexer*);
    103 
    104 #endif