boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

data.scm (8749B)


      1 ;; cc/data.scm — record types and symbol alphabets shared across modules.
      2 ;;
      3 ;; Concrete realization of:
      4 ;;   docs/CC-INTERNALS.md §data.scm
      5 ;;   docs/CC-CONTRACTS.md  §1
      6 ;;
      7 ;; Adding a record or alphabet symbol requires updating the contract
      8 ;; doc first.
      9 
     10 ;; --------------------------------------------------------------------
     11 ;; loc — source location for diagnostics
     12 ;; --------------------------------------------------------------------
     13 (define-record-type loc
     14   (%loc file line col)
     15   loc?
     16   (file loc-file)            ; bv
     17   (line loc-line)            ; fixnum
     18   (col  loc-col))            ; fixnum
     19 
     20 ;; --------------------------------------------------------------------
     21 ;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for
     22 ;; PUNCT value symbols, §1.3 for KW value symbols.
     23 ;; --------------------------------------------------------------------
     24 (define-record-type tok
     25   (%tok kind value loc hide)
     26   tok?
     27   (kind  tok-kind)           ; symbol from §1.1
     28   (value tok-value)          ; bv | fixnum | symbol | #f
     29   (loc   tok-loc)            ; loc
     30   (hide  tok-hide))          ; list of bv (macro names already expanded)
     31 
     32 (define (make-tok kind value loc)
     33   (%tok kind value loc '()))
     34 
     35 ;; --------------------------------------------------------------------
     36 ;; macro — preprocessor macro definition
     37 ;; --------------------------------------------------------------------
     38 (define-record-type macro
     39   (%macro kind params body)
     40   macro?
     41   (kind   macro-kind)        ; 'obj | 'fn | 'fn-vararg
     42   (params macro-params)      ; list of bv
     43   (body   macro-body))       ; list of tok
     44 
     45 ;; --------------------------------------------------------------------
     46 ;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and
     47 ;; CC-INTERNALS §data.scm for the ext payload table.
     48 ;;
     49 ;; Fields that mutate over a ctype's lifetime:
     50 ;;   size and align — set to -1/-1 on forward struct/union decl,
     51 ;;                    fixed when the type is completed.
     52 ;;   ext            — same; struct/union ext changes shape when
     53 ;;                    the body is parsed.
     54 ;; --------------------------------------------------------------------
     55 (define-record-type ctype
     56   (%ctype kind size align ext)
     57   ctype?
     58   (kind  ctype-kind)
     59   (size  ctype-size  ctype-size-set!)
     60   (align ctype-align ctype-align-set!)
     61   (ext   ctype-ext   ctype-ext-set!))
     62 
     63 ;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?.
     64 (define %t-void  (%ctype 'void  -1 -1 #f))
     65 (define %t-i8    (%ctype 'i8     1  1 #f))
     66 (define %t-u8    (%ctype 'u8     1  1 #f))
     67 (define %t-i16   (%ctype 'i16    2  2 #f))
     68 (define %t-u16   (%ctype 'u16    2  2 #f))
     69 (define %t-i32   (%ctype 'i32    4  4 #f))
     70 (define %t-u32   (%ctype 'u32    4  4 #f))
     71 (define %t-i64   (%ctype 'i64    8  8 #f))
     72 (define %t-u64   (%ctype 'u64    8  8 #f))
     73 (define %t-bool  (%ctype 'bool   1  1 #f))
     74 
     75 ;; --------------------------------------------------------------------
     76 ;; sym — declared identifier (function, variable, typedef, …)
     77 ;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage).
     78 ;; --------------------------------------------------------------------
     79 (define-record-type sym
     80   (%sym name kind storage type slot)
     81   sym?
     82   (name    sym-name)         ; bv
     83   (kind    sym-kind)         ; symbol from §1.7
     84   (storage sym-storage)      ; symbol from §1.8 or #f
     85   (type    sym-type)         ; ctype
     86   (slot    sym-slot))        ; fixnum | bv | #f, per kind
     87 
     88 ;; --------------------------------------------------------------------
     89 ;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind),
     90 ;; §1.10 (reg names).
     91 ;; --------------------------------------------------------------------
     92 (define-record-type opnd
     93   (%opnd kind type ext lval?)
     94   opnd?
     95   (kind  opnd-kind)
     96   (type  opnd-type)
     97   (ext   opnd-ext)
     98   (lval? opnd-lval?))
     99 
    100 ;; --------------------------------------------------------------------
    101 ;; loop-ctx — entry on parser's loop/switch context stack.
    102 ;; See CC-CONTRACTS §1.9.
    103 ;; --------------------------------------------------------------------
    104 (define-record-type loop-ctx
    105   (%loop-ctx kind tag has-continue?)
    106   loop-ctx?
    107   (kind          loop-ctx-kind)
    108   (tag           loop-ctx-tag)
    109   (has-continue? loop-ctx-has-continue?))
    110 
    111 ;; --------------------------------------------------------------------
    112 ;; fn-ctx — current-function context inside the parser.
    113 ;; --------------------------------------------------------------------
    114 (define-record-type fn-ctx
    115   (%fn-ctx name return-type params variadic? labels)
    116   fn-ctx?
    117   (name        fn-ctx-name)
    118   (return-type fn-ctx-return-type)
    119   (params      fn-ctx-params)
    120   (variadic?   fn-ctx-variadic?)
    121   (labels      fn-ctx-labels      fn-ctx-labels-set!))
    122 
    123 ;; --------------------------------------------------------------------
    124 ;; pstate — parser state. Owned by parse.scm; read-only to cg.
    125 ;; --------------------------------------------------------------------
    126 (define-record-type pstate
    127   (%pstate toks scope tags loops fn-ctx typedefs cg)
    128   pstate?
    129   (toks     ps-toks      ps-toks-set!)
    130   (scope    ps-scope     ps-scope-set!)
    131   (tags     ps-tags      ps-tags-set!)
    132   (loops    ps-loops     ps-loops-set!)
    133   (fn-ctx   ps-fn-ctx    ps-fn-ctx-set!)
    134   (typedefs ps-typedefs  ps-typedefs-set!)
    135   (cg       ps-cg))
    136 
    137 ;; --------------------------------------------------------------------
    138 ;; cg — codegen state. Owned by cg.scm.
    139 ;; --------------------------------------------------------------------
    140 (define-record-type cg
    141   (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-buf prologue-buf max-outgoing)
    142   cg?
    143   (text         cg-text)
    144   (data         cg-data)
    145   (bss          cg-bss)
    146   (vstack       cg-vstack       cg-vstack-set!)
    147   (frame-hi     cg-frame-hi     cg-frame-hi-set!)
    148   (label-ctr    cg-label-ctr    cg-label-ctr-set!)
    149   (str-pool     cg-str-pool     cg-str-pool-set!)
    150   (globals      cg-globals      cg-globals-set!)
    151   (fn-buf       cg-fn-buf       cg-fn-buf-set!)
    152   (prologue-buf cg-prologue-buf cg-prologue-buf-set!)
    153   (max-outgoing cg-max-outgoing cg-max-outgoing-set!))
    154 
    155 ;; --------------------------------------------------------------------
    156 ;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1.
    157 ;; --------------------------------------------------------------------
    158 
    159 ;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol.
    160 (define %keyword-alist
    161   '(;; storage
    162     ("auto" . auto) ("register" . register) ("static" . static)
    163     ("extern" . extern) ("typedef" . typedef)
    164     ;; qualifiers (parsed and discarded by parse)
    165     ("const" . const) ("volatile" . volatile) ("restrict" . restrict)
    166     ("inline" . inline)
    167     ;; type specifiers
    168     ("void" . void) ("char" . char) ("short" . short)
    169     ("int" . int) ("long" . long)
    170     ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool)
    171     ;; rejected type specifiers (KW so diagnostics are crisp)
    172     ("float" . float) ("double" . double)
    173     ;; aggregates
    174     ("struct" . struct) ("union" . union) ("enum" . enum)
    175     ;; statements
    176     ("if" . if) ("else" . else)
    177     ("while" . while) ("do" . do) ("for" . for)
    178     ("switch" . switch) ("case" . case) ("default" . default)
    179     ("break" . break) ("continue" . continue)
    180     ("return" . return) ("goto" . goto)
    181     ;; operators
    182     ("sizeof" . sizeof)
    183     ;; reserved-and-rejected (KW so diagnostics are crisp)
    184     ("_Generic" . _Generic) ("_Atomic" . _Atomic)
    185     ("_Thread_local" . _Thread_local)
    186     ("_Alignof" . _Alignof) ("_Alignas" . _Alignas)
    187     ("_Static_assert" . _Static_assert)
    188     ("_Complex" . _Complex) ("_Imaginary" . _Imaginary)))
    189 
    190 ;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol.
    191 ;; Listed longest-match-first; the lexer scans this list in order.
    192 ;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents.
    193 (define %punct-alist
    194   '(;; 4-byte
    195     ("%:%:" . paste)
    196     ;; 3-byte
    197     ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq)
    198     ;; 2-byte
    199     ("##" . paste) ("->" . arrow)
    200     ("++" . inc) ("--" . dec)
    201     ("<<" . shl) (">>" . shr)
    202     ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne)
    203     ("&&" . land) ("||" . lor)
    204     ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq)
    205     ("/=" . slash-eq) ("%=" . pct-eq)
    206     ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq)
    207     ;; digraphs (mapped to the standard equivalent symbol)
    208     ("<:" . lbrack) (":>" . rbrack)
    209     ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash)
    210     ;; 1-byte
    211     ("[" . lbrack) ("]" . rbrack)
    212     ("(" . lparen) (")" . rparen)
    213     ("{" . lbrace) ("}" . rbrace)
    214     ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark)
    215     ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct)
    216     ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang)
    217     ("<" . lt) (">" . gt) ("=" . assign)
    218     ("#" . hash)))