M1pp.c - boot2 - Playing with the boostrap

M1pp.c (48376B)
      1 /*
      2  * Tiny single-pass M1 macro expander.
      3  *
      4  * Syntax:
      5  *   %macro NAME(a, b)
      6  *   ... body ...
      7  *   %endm
      8  *
      9  *   %struct NAME { f1 f2 ... }   fixed-layout 8-byte-field aggregate
     10  *   %enum   NAME { l1 l2 ... }   incrementing integer constants
     11  *
     12  *   %NAME(x, y)      function-like macro call
     13  *   ##               token pasting inside macro bodies
     14  *   !(expr)          evaluate an integer S-expression, emit LE 8-bit hex
     15  *   @(expr)          evaluate an integer S-expression, emit LE 16-bit hex
     16  *   %(expr)          evaluate an integer S-expression, emit LE 32-bit hex
     17  *   $(expr)          evaluate an integer S-expression, emit LE 64-bit hex
     18  *   %select(c,t,e)   evaluate condition S-expression; expand t if nonzero else e
     19  *   %str(IDENT)      stringify a single WORD token into a "..."-quoted literal
     20  *
     21  * Expression syntax is intentionally Lisp-shaped:
     22  *   atoms: decimal or 0x-prefixed integer literals
     23  *   calls: (+ a b), (- a b), (* a b), (/ a b), (% a b), (<< a b), (>> a b)
     24  *          (& a b), (| a b), (^ a b), (~ a), (= a b), (!= a b),
     25  *          (< a b), (<= a b), (> a b), (>= a b)
     26  *
     27  * Flow:
     28  *   1. lex_source(): scan input_buf into source_tokens[]. Tokens are words,
     29  *      strings, newlines, parens, commas, and ## paste markers. Whitespace
     30  *      (excluding newlines) is dropped; # and ; comments are dropped.
     31  *
     32  *   2. process_tokens(): main loop driven by a stream stack (streams[]).
     33  *      The source token array is pushed as the initial stream. Each iteration
     34  *      pops a token from the top stream:
     35  *
     36  *        %macro NAME(p,...) / %endm at line-start
     37  *          -> define_macro(): consume header + body tokens into macros[] and
     38  *             macro_body_tokens[]; register name and param list.
     39  *
     40  *        !(e) / @(e) / %(e) / $(e) / %select(c,t,e)
     41  *          -> expand_builtin_call(): parse arg spans, eval S-expression(s) via
     42  *             eval_expr_range(), emit LE hex or push the chosen token span.
     43  *
     44  *        %NAME(...) matching a defined macro
     45  *          -> expand_call() -> expand_macro_tokens(): substitute arguments,
     46  *             apply ## paste via paste_pool_range(), write result into
     47  *             expand_pool[], then push that slice as a new stream (rescan).
     48  *
     49  *        Anything else
     50  *          -> emit_token() / emit_newline() directly into output_buf.
     51  *
     52  *      When a stream is exhausted it is popped; pool_used is rewound to the
     53  *      stream's pool_mark, reclaiming the expand_pool space it used.
     54  *
     55  *   3. Write output_buf to the output file.
     56  *
     57  * Notes:
     58  *   - Macros are define-before-use. There is no prescan.
     59  *   - Expansion rescans by pushing expanded tokens back through the same loop.
     60  *   - There is no cycle detection. Recursive macros will loop until a limit.
     61  *   - Only recognized %NAME(...) calls expand. Other text passes through.
     62  *   - Output formatting is normalized to tokens plus '\n', not preserved.
     63  */
     64 
     65 #include <errno.h>
     66 #include <stdio.h>
     67 #include <stdlib.h>
     68 #include <string.h>
     69 
     70 #define MAX_INPUT             262144
     71 #define MAX_OUTPUT            524288
     72 #define MAX_TEXT              524288
     73 #define MAX_TOKENS            65536
     74 #define MAX_MACROS            512
     75 #define MAX_PARAMS            16
     76 #define MAX_MACRO_BODY_TOKENS MAX_TOKENS
     77 #define MAX_EXPAND            65536
     78 #define MAX_STACK             64
     79 #define MAX_EXPR_FRAMES       256
     80 #define MAX_SCOPE_DEPTH       32
     81 
     82 enum {
     83     TOK_WORD,
     84     TOK_STRING,
     85     TOK_NEWLINE,
     86     TOK_LPAREN,
     87     TOK_RPAREN,
     88     TOK_COMMA,
     89     TOK_PASTE,
     90     TOK_LBRACE,
     91     TOK_RBRACE
     92 };
     93 
     94 enum ExprOp {
     95     EXPR_ADD,
     96     EXPR_SUB,
     97     EXPR_MUL,
     98     EXPR_DIV,
     99     EXPR_MOD,
    100     EXPR_SHL,
    101     EXPR_SHR,
    102     EXPR_AND,
    103     EXPR_OR,
    104     EXPR_XOR,
    105     EXPR_NOT,
    106     EXPR_EQ,
    107     EXPR_NE,
    108     EXPR_LT,
    109     EXPR_LE,
    110     EXPR_GT,
    111     EXPR_GE,
    112     EXPR_STRLEN,
    113     EXPR_INVALID
    114 };
    115 
    116 struct TextSpan {
    117     const char *ptr;
    118     int len;
    119 };
    120 
    121 struct Token {
    122     int kind;
    123     struct TextSpan text;
    124 };
    125 
    126 struct TokenSpan {
    127     struct Token *start;
    128     struct Token *end;
    129 };
    130 
    131 struct Macro {
    132     struct TextSpan name;
    133     int param_count;
    134     struct TextSpan params[MAX_PARAMS];
    135     struct Token *body_start;
    136     struct Token *body_end;
    137 };
    138 
    139 struct Stream {
    140     struct Token *start;
    141     struct Token *end;
    142     struct Token *pos;
    143     int line_start;
    144     int pool_mark;
    145 };
    146 
    147 struct ExprFrame {
    148     enum ExprOp op;
    149     long long args[MAX_PARAMS];
    150     int argc;
    151 };
    152 
    153 static char input_buf[MAX_INPUT + 1];
    154 static char output_buf[MAX_OUTPUT + 1];
    155 static char text_buf[MAX_TEXT];
    156 
    157 static struct Token source_tokens[MAX_TOKENS];
    158 static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS];
    159 static struct Token expand_pool[MAX_EXPAND];
    160 static struct Macro macros[MAX_MACROS];
    161 static struct Stream streams[MAX_STACK];
    162 static struct TextSpan scope_stack[MAX_SCOPE_DEPTH];
    163 
    164 static int text_used;
    165 static int source_count;
    166 static int macro_count;
    167 static int macro_body_used;
    168 static int pool_used;
    169 static int output_used;
    170 static int output_need_space;
    171 static int stream_top;
    172 static int next_expansion_id;
    173 static int scope_depth;
    174 
    175 static struct Token *arg_starts[MAX_PARAMS];
    176 static struct Token *arg_ends[MAX_PARAMS];
    177 static int arg_count;
    178 static struct Token *call_end_pos;
    179 
    180 static const char *error_msg;
    181 
    182 static int fail(const char *msg)
    183 {
    184     error_msg = msg;
    185     return 0;
    186 }
    187 
    188 static int is_space_no_nl(int c)
    189 {
    190     return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v';
    191 }
    192 
    193 static char *append_text_len(const char *s, int len)
    194 {
    195     int start;
    196 
    197     if (text_used + len + 1 > MAX_TEXT) {
    198         fail("text overflow");
    199         return NULL;
    200     }
    201     start = text_used;
    202     memcpy(text_buf + text_used, s, (size_t)len);
    203     text_used += len;
    204     text_buf[text_used++] = '\0';
    205     return text_buf + start;
    206 }
    207 
    208 static int push_token(struct Token *buf, int *count, int max_count,
    209                       int kind, struct TextSpan text)
    210 {
    211     if (*count >= max_count) {
    212         return fail("token overflow");
    213     }
    214     buf[*count].kind = kind;
    215     buf[*count].text = text;
    216     *count += 1;
    217     return 1;
    218 }
    219 
    220 static int push_pool_token(struct Token tok)
    221 {
    222     if (pool_used >= MAX_EXPAND) {
    223         return fail("expansion overflow");
    224     }
    225     expand_pool[pool_used++] = tok;
    226     return 1;
    227 }
    228 
    229 static int token_text_eq(const struct Token *tok, const char *s)
    230 {
    231     int len = (int)strlen(s);
    232 
    233     return tok->text.len == len &&
    234            memcmp(tok->text.ptr, s, (size_t)len) == 0;
    235 }
    236 
    237 static int span_eq_token(struct TextSpan span, const struct Token *tok)
    238 {
    239     return span.len == tok->text.len &&
    240            memcmp(span.ptr, tok->text.ptr, (size_t)span.len) == 0;
    241 }
    242 
    243 static int lex_source(const char *src)
    244 {
    245     int i = 0;
    246 
    247     while (src[i] != '\0') {
    248         int start;
    249         int len;
    250 
    251         if (is_space_no_nl((unsigned char)src[i])) {
    252             i++;
    253             continue;
    254         }
    255         if (src[i] == '\n') {
    256             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    257                             TOK_NEWLINE, (struct TextSpan){src + i, 1})) {
    258                 return 0;
    259             }
    260             i++;
    261             continue;
    262         }
    263         if (src[i] == '"' || src[i] == '\'') {
    264             int quote = src[i];
    265 
    266             start = i;
    267             i++;
    268             while (src[i] != '\0' && src[i] != quote) {
    269                 i++;
    270             }
    271             if (src[i] == quote) {
    272                 i++;
    273             }
    274             len = i - start;
    275             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    276                             TOK_STRING, (struct TextSpan){src + start, len})) {
    277                 return 0;
    278             }
    279             continue;
    280         }
    281         if (src[i] == '#' && src[i + 1] == '#') {
    282             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    283                             TOK_PASTE, (struct TextSpan){src + i, 2})) {
    284                 return 0;
    285             }
    286             i += 2;
    287             continue;
    288         }
    289         if (src[i] == '#' || src[i] == ';') {
    290             while (src[i] != '\0' && src[i] != '\n') {
    291                 i++;
    292             }
    293             continue;
    294         }
    295         if (src[i] == '(') {
    296             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    297                             TOK_LPAREN, (struct TextSpan){src + i, 1})) {
    298                 return 0;
    299             }
    300             i++;
    301             continue;
    302         }
    303         if (src[i] == ')') {
    304             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    305                             TOK_RPAREN, (struct TextSpan){src + i, 1})) {
    306                 return 0;
    307             }
    308             i++;
    309             continue;
    310         }
    311         if (src[i] == ',') {
    312             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    313                             TOK_COMMA, (struct TextSpan){src + i, 1})) {
    314                 return 0;
    315             }
    316             i++;
    317             continue;
    318         }
    319         if (src[i] == '{') {
    320             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    321                             TOK_LBRACE, (struct TextSpan){src + i, 1})) {
    322                 return 0;
    323             }
    324             i++;
    325             continue;
    326         }
    327         if (src[i] == '}') {
    328             if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    329                             TOK_RBRACE, (struct TextSpan){src + i, 1})) {
    330                 return 0;
    331             }
    332             i++;
    333             continue;
    334         }
    335 
    336         start = i;
    337         while (src[i] != '\0' &&
    338                !is_space_no_nl((unsigned char)src[i]) &&
    339                src[i] != '\n' &&
    340                src[i] != '#' &&
    341                src[i] != ';' &&
    342                src[i] != '(' &&
    343                src[i] != ')' &&
    344                src[i] != ',' &&
    345                src[i] != '{' &&
    346                src[i] != '}' &&
    347                !(src[i] == '#' && src[i + 1] == '#')) {
    348             i++;
    349         }
    350         len = i - start;
    351         if (!push_token(source_tokens, &source_count, MAX_TOKENS,
    352                         TOK_WORD, (struct TextSpan){src + start, len})) {
    353             return 0;
    354         }
    355     }
    356 
    357     return 1;
    358 }
    359 
    360 static const struct Macro *find_macro(const struct Token *tok)
    361 {
    362     int i;
    363 
    364     if (tok->kind != TOK_WORD || tok->text.len < 2) {
    365         return NULL;
    366     }
    367     if (tok->text.ptr[0] != '%') {
    368         return NULL;
    369     }
    370     for (i = 0; i < macro_count; i++) {
    371         if (macros[i].name.len == tok->text.len - 1 &&
    372             memcmp(tok->text.ptr + 1,
    373                    macros[i].name.ptr,
    374                    (size_t)macros[i].name.len) == 0) {
    375             return &macros[i];
    376         }
    377     }
    378     return NULL;
    379 }
    380 
    381 static int find_param(const struct Macro *m, const struct Token *tok)
    382 {
    383     int i;
    384 
    385     if (tok->kind != TOK_WORD) {
    386         return 0;
    387     }
    388     for (i = 0; i < m->param_count; i++) {
    389         if (span_eq_token(m->params[i], tok)) {
    390             return i + 1;
    391         }
    392     }
    393     return 0;
    394 }
    395 
    396 static int emit_newline(void)
    397 {
    398     if (output_used + 1 >= MAX_OUTPUT) {
    399         return fail("output overflow");
    400     }
    401     output_buf[output_used++] = '\n';
    402     output_need_space = 0;
    403     return 1;
    404 }
    405 
    406 static int emit_scoped_label(const struct Token *tok, int skip, char sigil)
    407 {
    408     /* Rewrite `::name` or `&::name` against the current scope stack.
    409      * skip is the number of leading chars to drop (`::` -> 2, `&::` -> 3);
    410      * sigil is the single-char prefix to emit (`:` for definitions, `&`
    411      * for references). With a non-empty scope stack the output is
    412      * sigil + scope1 + "__" + ... + scopeN + "__" + name; with an empty
    413      * stack it degrades to sigil + name (pass-through). */
    414     int name_len = tok->text.len - skip;
    415     int i;
    416 
    417     if (name_len <= 0) {
    418         return fail("bad scope label");
    419     }
    420 
    421     if (output_need_space) {
    422         if (output_used + 1 >= MAX_OUTPUT) {
    423             return fail("output overflow");
    424         }
    425         output_buf[output_used++] = ' ';
    426     }
    427 
    428     if (output_used + 1 >= MAX_OUTPUT) {
    429         return fail("output overflow");
    430     }
    431     output_buf[output_used++] = sigil;
    432 
    433     for (i = 0; i < scope_depth; i++) {
    434         int span_len = scope_stack[i].len;
    435         if (output_used + span_len + 2 >= MAX_OUTPUT) {
    436             return fail("output overflow");
    437         }
    438         memcpy(output_buf + output_used, scope_stack[i].ptr,
    439                (size_t)span_len);
    440         output_used += span_len;
    441         output_buf[output_used++] = '_';
    442         output_buf[output_used++] = '_';
    443     }
    444 
    445     if (output_used + name_len >= MAX_OUTPUT) {
    446         return fail("output overflow");
    447     }
    448     memcpy(output_buf + output_used, tok->text.ptr + skip, (size_t)name_len);
    449     output_used += name_len;
    450     output_need_space = 1;
    451     return 1;
    452 }
    453 
    454 static int emit_token(const struct Token *tok)
    455 {
    456     if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) {
    457         return 1;
    458     }
    459     if (tok->kind == TOK_WORD && tok->text.len >= 2 &&
    460         tok->text.ptr[0] == ':' && tok->text.ptr[1] == ':') {
    461         return emit_scoped_label(tok, 2, ':');
    462     }
    463     if (tok->kind == TOK_WORD && tok->text.len >= 3 &&
    464         tok->text.ptr[0] == '&' &&
    465         tok->text.ptr[1] == ':' && tok->text.ptr[2] == ':') {
    466         return emit_scoped_label(tok, 3, '&');
    467     }
    468     if (output_need_space) {
    469         if (output_used + 1 >= MAX_OUTPUT) {
    470             return fail("output overflow");
    471         }
    472         output_buf[output_used++] = ' ';
    473     }
    474     if (output_used + tok->text.len >= MAX_OUTPUT) {
    475         return fail("output overflow");
    476     }
    477     memcpy(output_buf + output_used, tok->text.ptr,
    478            (size_t)tok->text.len);
    479     output_used += tok->text.len;
    480     output_need_space = 1;
    481     return 1;
    482 }
    483 
    484 static int push_stream_span(struct TokenSpan span, int pool_mark)
    485 {
    486     struct Stream *s;
    487 
    488     if (stream_top >= MAX_STACK) {
    489         return fail("stream overflow");
    490     }
    491     s = &streams[stream_top++];
    492     s->start = span.start;
    493     s->end = span.end;
    494     s->pos = span.start;
    495     s->line_start = 1;
    496     s->pool_mark = pool_mark;
    497     return 1;
    498 }
    499 
    500 static struct Stream *current_stream(void)
    501 {
    502     if (stream_top <= 0) {
    503         return NULL;
    504     }
    505     return &streams[stream_top - 1];
    506 }
    507 
    508 static void pop_stream(void)
    509 {
    510     if (stream_top <= 0) {
    511         return;
    512     }
    513     stream_top--;
    514     if (streams[stream_top].pool_mark >= 0) {
    515         pool_used = streams[stream_top].pool_mark;
    516     }
    517 }
    518 
    519 static int copy_span_to_pool(struct TokenSpan span)
    520 {
    521     struct Token *tok;
    522 
    523     for (tok = span.start; tok < span.end; tok++) {
    524         if (!push_pool_token(*tok)) {
    525             return 0;
    526         }
    527     }
    528     return 1;
    529 }
    530 
    531 static int push_pool_stream_from_mark(int mark)
    532 {
    533     if (pool_used == mark) {
    534         pool_used = mark;
    535         return 1;
    536     }
    537     return push_stream_span((struct TokenSpan){expand_pool + mark, expand_pool + pool_used},
    538                             mark);
    539 }
    540 
    541 static void skip_expr_newlines(struct Token **pos, struct Token *end)
    542 {
    543     while (*pos < end && (*pos)->kind == TOK_NEWLINE) {
    544         *pos += 1;
    545     }
    546 }
    547 
    548 static int emit_decimal_text(long long value, struct TextSpan *out)
    549 {
    550     /* Render a non-negative integer as decimal into text_buf and
    551      * return the span. No snprintf; plain reverse-fill. */
    552     char digits[24];
    553     int digit_count = 0;
    554     long long v = value;
    555     int start;
    556     int i;
    557 
    558     if (v < 0) {
    559         return fail("bad directive");
    560     }
    561     if (v == 0) {
    562         digits[digit_count++] = '0';
    563     } else {
    564         while (v > 0) {
    565             digits[digit_count++] = (char)('0' + (v % 10));
    566             v /= 10;
    567         }
    568     }
    569 
    570     if (text_used + digit_count + 1 > MAX_TEXT) {
    571         return fail("text overflow");
    572     }
    573     start = text_used;
    574     for (i = digit_count - 1; i >= 0; i--) {
    575         text_buf[text_used++] = digits[i];
    576     }
    577     text_buf[text_used++] = '\0';
    578     out->ptr = text_buf + start;
    579     out->len = digit_count;
    580     return 1;
    581 }
    582 
    583 static int emit_dotted_name(struct TextSpan base, const char *suffix,
    584                             int suffix_len, struct TextSpan *out)
    585 {
    586     int total = base.len + 1 + suffix_len;
    587     int start;
    588 
    589     if (text_used + total + 1 > MAX_TEXT) {
    590         return fail("text overflow");
    591     }
    592     start = text_used;
    593     memcpy(text_buf + text_used, base.ptr, (size_t)base.len);
    594     text_used += base.len;
    595     text_buf[text_used++] = '.';
    596     memcpy(text_buf + text_used, suffix, (size_t)suffix_len);
    597     text_used += suffix_len;
    598     text_buf[text_used++] = '\0';
    599     out->ptr = text_buf + start;
    600     out->len = total;
    601     return 1;
    602 }
    603 
    604 static int define_fielded_macro(struct TextSpan base, const char *suffix,
    605                                 int suffix_len, long long value)
    606 {
    607     struct Macro *m;
    608     struct Token body_tok;
    609 
    610     if (macro_count >= MAX_MACROS) {
    611         return fail("too many macros");
    612     }
    613     if (macro_body_used >= MAX_MACRO_BODY_TOKENS) {
    614         return fail("macro body overflow");
    615     }
    616     m = &macros[macro_count];
    617     memset(m, 0, sizeof(*m));
    618     if (!emit_dotted_name(base, suffix, suffix_len, &m->name)) {
    619         return 0;
    620     }
    621     m->param_count = 0;
    622     body_tok.kind = TOK_WORD;
    623     if (!emit_decimal_text(value, &body_tok.text)) {
    624         return 0;
    625     }
    626     m->body_start = macro_body_tokens + macro_body_used;
    627     macro_body_tokens[macro_body_used++] = body_tok;
    628     m->body_end = macro_body_tokens + macro_body_used;
    629     macro_count++;
    630     return 1;
    631 }
    632 
    633 static int define_fielded(struct Stream *s, long long stride,
    634                           const char *total_name, int total_name_len)
    635 {
    636     /* Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` and
    637      * synthesizes N+1 zero-parameter macros:
    638      *   NAME.field_k  -> k * stride
    639      *   NAME.<total>  -> N * stride    (SIZE for struct, COUNT for enum) */
    640     struct TextSpan base;
    641     long long index = 0;
    642 
    643     s->pos++;
    644     if (s->pos >= s->end || s->pos->kind != TOK_WORD) {
    645         return fail("bad directive");
    646     }
    647     base = s->pos->text;
    648     s->pos++;
    649 
    650     while (s->pos < s->end && s->pos->kind == TOK_NEWLINE) {
    651         s->pos++;
    652     }
    653     if (s->pos >= s->end || s->pos->kind != TOK_LBRACE) {
    654         return fail("bad directive");
    655     }
    656     s->pos++;
    657 
    658     for (;;) {
    659         while (s->pos < s->end &&
    660                (s->pos->kind == TOK_COMMA || s->pos->kind == TOK_NEWLINE)) {
    661             s->pos++;
    662         }
    663         if (s->pos >= s->end) {
    664             return fail("unterminated directive");
    665         }
    666         if (s->pos->kind == TOK_RBRACE) {
    667             s->pos++;
    668             break;
    669         }
    670         if (s->pos->kind != TOK_WORD) {
    671             return fail("bad directive");
    672         }
    673         if (!define_fielded_macro(base, s->pos->text.ptr, s->pos->text.len,
    674                                   index * stride)) {
    675             return 0;
    676         }
    677         s->pos++;
    678         index++;
    679     }
    680 
    681     if (!define_fielded_macro(base, total_name, total_name_len, index * stride)) {
    682         return 0;
    683     }
    684 
    685     while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) {
    686         s->pos++;
    687     }
    688     if (s->pos < s->end && s->pos->kind == TOK_NEWLINE) {
    689         s->pos++;
    690     }
    691     s->line_start = 1;
    692     return 1;
    693 }
    694 
    695 static int define_macro(struct Stream *s)
    696 {
    697     struct Macro *m;
    698     int line_start;
    699 
    700     if (macro_count >= MAX_MACROS) {
    701         return fail("too many macros");
    702     }
    703     if (macro_body_used >= MAX_MACRO_BODY_TOKENS) {
    704         return fail("macro body overflow");
    705     }
    706 
    707     m = &macros[macro_count];
    708     memset(m, 0, sizeof(*m));
    709     s->pos++;
    710 
    711     if (s->pos >= s->end || s->pos->kind != TOK_WORD) {
    712         return fail("bad macro header");
    713     }
    714     m->name = s->pos->text;
    715     s->pos++;
    716 
    717     if (s->pos >= s->end || s->pos->kind != TOK_LPAREN) {
    718         return fail("bad macro header");
    719     }
    720     s->pos++;
    721 
    722     if (s->pos < s->end && s->pos->kind != TOK_RPAREN) {
    723         while (1) {
    724             if (m->param_count >= MAX_PARAMS) {
    725                 return fail("bad macro header");
    726             }
    727             if (s->pos >= s->end || s->pos->kind != TOK_WORD) {
    728                 return fail("bad macro header");
    729             }
    730             m->params[m->param_count] = s->pos->text;
    731             m->param_count++;
    732             s->pos++;
    733             if (s->pos < s->end && s->pos->kind == TOK_COMMA) {
    734                 s->pos++;
    735                 continue;
    736             }
    737             break;
    738         }
    739     }
    740 
    741     if (s->pos >= s->end || s->pos->kind != TOK_RPAREN) {
    742         return fail("bad macro header");
    743     }
    744     s->pos++;
    745 
    746     if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) {
    747         return fail("bad macro header");
    748     }
    749     s->pos++;
    750 
    751     m->body_start = macro_body_tokens + macro_body_used;
    752     line_start = 1;
    753     while (s->pos < s->end) {
    754         if (line_start &&
    755             s->pos->kind == TOK_WORD &&
    756             token_text_eq(s->pos, "%endm")) {
    757             while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) {
    758                 s->pos++;
    759             }
    760             if (s->pos < s->end && s->pos->kind == TOK_NEWLINE) {
    761                 s->pos++;
    762             }
    763             m->body_end = macro_body_tokens + macro_body_used;
    764             s->line_start = 1;
    765             macro_count++;
    766             return 1;
    767         }
    768         if (macro_body_used >= MAX_MACRO_BODY_TOKENS) {
    769             return fail("macro body overflow");
    770         }
    771         macro_body_tokens[macro_body_used++] = *s->pos;
    772         line_start = (s->pos->kind == TOK_NEWLINE);
    773         s->pos++;
    774     }
    775 
    776     return fail("unterminated macro");
    777 }
    778 
    779 static int parse_args(struct Token *lparen, struct Token *limit)
    780 {
    781     struct Token *tok = lparen + 1;
    782     struct Token *arg_start = tok;
    783     int depth = 1;
    784     int brace_depth = 0;
    785     int arg_index = 0;
    786 
    787     while (tok < limit) {
    788         if (tok->kind == TOK_LPAREN) {
    789             depth++;
    790             tok++;
    791             continue;
    792         }
    793         if (tok->kind == TOK_RPAREN) {
    794             depth--;
    795             if (depth == 0) {
    796                 if (brace_depth != 0) {
    797                     return fail("unbalanced braces");
    798                 }
    799                 if (arg_start == tok && arg_index == 0) {
    800                     arg_count = 0;
    801                 } else {
    802                     if (arg_index >= MAX_PARAMS) {
    803                         return fail("too many args");
    804                     }
    805                     arg_starts[arg_index] = arg_start;
    806                     arg_ends[arg_index] = tok;
    807                     arg_count = arg_index + 1;
    808                 }
    809                 call_end_pos = tok + 1;
    810                 return 1;
    811             }
    812             tok++;
    813             continue;
    814         }
    815         if (tok->kind == TOK_LBRACE) {
    816             brace_depth++;
    817             tok++;
    818             continue;
    819         }
    820         if (tok->kind == TOK_RBRACE) {
    821             if (brace_depth <= 0) {
    822                 return fail("unbalanced braces");
    823             }
    824             brace_depth--;
    825             tok++;
    826             continue;
    827         }
    828         if (tok->kind == TOK_COMMA && depth == 1 && brace_depth == 0) {
    829             if (arg_index >= MAX_PARAMS) {
    830                 return fail("too many args");
    831             }
    832             arg_starts[arg_index] = arg_start;
    833             arg_ends[arg_index] = tok;
    834             arg_index++;
    835             arg_start = tok + 1;
    836             tok++;
    837             continue;
    838         }
    839         tok++;
    840     }
    841 
    842     return fail("unterminated macro call");
    843 }
    844 
    845 static int arg_is_braced(struct TokenSpan span)
    846 {
    847     struct Token *tok;
    848     int depth;
    849 
    850     if (span.end - span.start < 2) {
    851         return 0;
    852     }
    853     if (span.start->kind != TOK_LBRACE ||
    854         (span.end - 1)->kind != TOK_RBRACE) {
    855         return 0;
    856     }
    857     depth = 0;
    858     for (tok = span.start; tok < span.end; tok++) {
    859         if (tok->kind == TOK_LBRACE) {
    860             depth++;
    861         } else if (tok->kind == TOK_RBRACE) {
    862             depth--;
    863             if (depth == 0 && tok != span.end - 1) {
    864                 return 0;
    865             }
    866         }
    867     }
    868     return depth == 0;
    869 }
    870 
    871 static int copy_arg_tokens_to_pool(struct TokenSpan span)
    872 {
    873     if (span.start == span.end) {
    874         return fail("bad macro argument");
    875     }
    876     if (arg_is_braced(span)) {
    877         struct TokenSpan inner;
    878         inner.start = span.start + 1;
    879         inner.end = span.end - 1;
    880         if (inner.start == inner.end) {
    881             return 1;
    882         }
    883         return copy_span_to_pool(inner);
    884     }
    885     return copy_span_to_pool(span);
    886 }
    887 
    888 static int copy_paste_arg_to_pool(struct TokenSpan span)
    889 {
    890     if (arg_is_braced(span)) {
    891         return fail("bad macro argument");
    892     }
    893     if (span.end - span.start != 1) {
    894         return fail("bad macro argument");
    895     }
    896     return copy_span_to_pool(span);
    897 }
    898 
    899 static int append_pasted_token(struct Token *dst,
    900                                const struct Token *left,
    901                                const struct Token *right)
    902 {
    903     char tmp[512];
    904     char *text_ptr;
    905     int n;
    906 
    907     n = snprintf(tmp, sizeof(tmp), "%.*s%.*s",
    908                  left->text.len, left->text.ptr,
    909                  right->text.len, right->text.ptr);
    910     if (n < 0 || n >= (int)sizeof(tmp)) {
    911         return fail("bad paste");
    912     }
    913     text_ptr = append_text_len(tmp, n);
    914     if (text_ptr == NULL) {
    915         return 0;
    916     }
    917     dst->kind = TOK_WORD;
    918     dst->text.ptr = text_ptr;
    919     dst->text.len = n;
    920     return 1;
    921 }
    922 
    923 static int paste_pool_range(int mark)
    924 {
    925     struct Token *start = expand_pool + mark;
    926     struct Token *in = start;
    927     struct Token *out = start;
    928     struct Token *end = expand_pool + pool_used;
    929 
    930     while (in < end) {
    931         if (in->kind == TOK_PASTE) {
    932             if (out == start || in + 1 >= end) {
    933                 pool_used = mark;
    934                 return fail("bad paste");
    935             }
    936             if ((out - 1)->kind == TOK_NEWLINE ||
    937                 (out - 1)->kind == TOK_PASTE ||
    938                 (in + 1)->kind == TOK_NEWLINE ||
    939                 (in + 1)->kind == TOK_PASTE) {
    940                 pool_used = mark;
    941                 return fail("bad paste");
    942             }
    943             if (!append_pasted_token(out - 1, out - 1, in + 1)) {
    944                 pool_used = mark;
    945                 return 0;
    946             }
    947             in += 2;
    948             continue;
    949         }
    950         if (out != in) {
    951             *out = *in;
    952         }
    953         out++;
    954         in++;
    955     }
    956 
    957     pool_used = (int)(out - expand_pool);
    958     return 1;
    959 }
    960 
    961 static int is_local_label_token(const struct Token *tok)
    962 {
    963     if (tok->kind != TOK_WORD || tok->text.len < 3) {
    964         return 0;
    965     }
    966     if (tok->text.ptr[0] != ':' && tok->text.ptr[0] != '&') {
    967         return 0;
    968     }
    969     if (tok->text.ptr[1] != '@') {
    970         return 0;
    971     }
    972     return 1;
    973 }
    974 
    975 static int push_local_label_token(const struct Token *tok, int expansion_id)
    976 {
    977     /* Rewrite ":@name" -> ":name__NN", "&@name" -> "&name__NN".
    978      * Build the text directly in text_buf so the resulting span is stable. */
    979     char digits[16];
    980     int digit_count = 0;
    981     int unsigned_id;
    982     int start;
    983     int total;
    984     int i;
    985     struct Token out;
    986 
    987     unsigned_id = expansion_id;
    988     if (unsigned_id == 0) {
    989         digits[digit_count++] = '0';
    990     } else {
    991         while (unsigned_id > 0) {
    992             digits[digit_count++] = (char)('0' + (unsigned_id % 10));
    993             unsigned_id /= 10;
    994         }
    995     }
    996 
    997     /* Reserve: sigil(1) + tail(len-2) + "__"(2) + digits + NUL. */
    998     total = 1 + (tok->text.len - 2) + 2 + digit_count;
    999     if (text_used + total + 1 > MAX_TEXT) {
   1000         return fail("text overflow");
   1001     }
   1002     start = text_used;
   1003     text_buf[text_used++] = tok->text.ptr[0];
   1004     memcpy(text_buf + text_used, tok->text.ptr + 2, (size_t)(tok->text.len - 2));
   1005     text_used += tok->text.len - 2;
   1006     text_buf[text_used++] = '_';
   1007     text_buf[text_used++] = '_';
   1008     for (i = digit_count - 1; i >= 0; i--) {
   1009         text_buf[text_used++] = digits[i];
   1010     }
   1011     text_buf[text_used++] = '\0';
   1012 
   1013     out.kind = TOK_WORD;
   1014     out.text.ptr = text_buf + start;
   1015     out.text.len = total;
   1016     return push_pool_token(out);
   1017 }
   1018 
   1019 static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
   1020                                const struct Macro *m, struct Token **after_out,
   1021                                int *mark_out)
   1022 {
   1023     struct Token *body_tok;
   1024     struct Token *end_pos;
   1025     int mark;
   1026     int expansion_id;
   1027 
   1028     if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN) {
   1029         if (!parse_args(call_tok + 1, limit)) {
   1030             return 0;
   1031         }
   1032         if (arg_count != m->param_count) {
   1033             return fail("wrong arg count");
   1034         }
   1035         end_pos = call_end_pos;
   1036     } else if (m->param_count == 0) {
   1037         arg_count = 0;
   1038         end_pos = call_tok + 1;
   1039     } else {
   1040         return fail("bad macro call");
   1041     }
   1042 
   1043     expansion_id = ++next_expansion_id;
   1044     mark = pool_used;
   1045     for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) {
   1046         int param_idx = find_param(m, body_tok);
   1047         int pasted = 0;
   1048         int ok;
   1049 
   1050         if (param_idx != 0) {
   1051             struct TokenSpan arg = {arg_starts[param_idx - 1], arg_ends[param_idx - 1]};
   1052             pasted = (body_tok > m->body_start && (body_tok - 1)->kind == TOK_PASTE) ||
   1053                      (body_tok + 1 < m->body_end && (body_tok + 1)->kind == TOK_PASTE);
   1054             ok = pasted ? copy_paste_arg_to_pool(arg) : copy_arg_tokens_to_pool(arg);
   1055             if (!ok) {
   1056                 pool_used = mark;
   1057                 return 0;
   1058             }
   1059             continue;
   1060         }
   1061         if (is_local_label_token(body_tok)) {
   1062             if (!push_local_label_token(body_tok, expansion_id)) {
   1063                 pool_used = mark;
   1064                 return 0;
   1065             }
   1066             continue;
   1067         }
   1068         if (!push_pool_token(*body_tok)) {
   1069             pool_used = mark;
   1070             return 0;
   1071         }
   1072     }
   1073 
   1074     if (!paste_pool_range(mark)) {
   1075         return 0;
   1076     }
   1077     *after_out = end_pos;
   1078     *mark_out = mark;
   1079     return 1;
   1080 }
   1081 
   1082 static int parse_int_token(const struct Token *tok, long long *out)
   1083 {
   1084     char tmp[128];
   1085     char *end;
   1086     unsigned long long uv;
   1087     long long sv;
   1088 
   1089     if (tok->kind != TOK_WORD || tok->text.len <= 0 || tok->text.len >= (int)sizeof(tmp)) {
   1090         return fail("bad integer");
   1091     }
   1092     memcpy(tmp, tok->text.ptr, (size_t)tok->text.len);
   1093     tmp[tok->text.len] = '\0';
   1094 
   1095     errno = 0;
   1096     if (tmp[0] == '-') {
   1097         sv = strtoll(tmp, &end, 0);
   1098         if (errno != 0 || *end != '\0') {
   1099             return fail("bad integer");
   1100         }
   1101         *out = sv;
   1102         return 1;
   1103     }
   1104 
   1105     uv = strtoull(tmp, &end, 0);
   1106     if (errno != 0 || *end != '\0') {
   1107         return fail("bad integer");
   1108     }
   1109     *out = (long long)uv;
   1110     return 1;
   1111 }
   1112 
   1113 static enum ExprOp expr_op_code(const struct Token *tok)
   1114 {
   1115     if (tok->kind != TOK_WORD) {
   1116         return EXPR_INVALID;
   1117     }
   1118     if (token_text_eq(tok, "+")) {
   1119         return EXPR_ADD;
   1120     }
   1121     if (token_text_eq(tok, "-")) {
   1122         return EXPR_SUB;
   1123     }
   1124     if (token_text_eq(tok, "*")) {
   1125         return EXPR_MUL;
   1126     }
   1127     if (token_text_eq(tok, "/")) {
   1128         return EXPR_DIV;
   1129     }
   1130     if (token_text_eq(tok, "%")) {
   1131         return EXPR_MOD;
   1132     }
   1133     if (token_text_eq(tok, "<<")) {
   1134         return EXPR_SHL;
   1135     }
   1136     if (token_text_eq(tok, ">>")) {
   1137         return EXPR_SHR;
   1138     }
   1139     if (token_text_eq(tok, "&")) {
   1140         return EXPR_AND;
   1141     }
   1142     if (token_text_eq(tok, "|")) {
   1143         return EXPR_OR;
   1144     }
   1145     if (token_text_eq(tok, "^")) {
   1146         return EXPR_XOR;
   1147     }
   1148     if (token_text_eq(tok, "~")) {
   1149         return EXPR_NOT;
   1150     }
   1151     if (token_text_eq(tok, "=")) {
   1152         return EXPR_EQ;
   1153     }
   1154     if (token_text_eq(tok, "!=")) {
   1155         return EXPR_NE;
   1156     }
   1157     if (token_text_eq(tok, "<")) {
   1158         return EXPR_LT;
   1159     }
   1160     if (token_text_eq(tok, "<=")) {
   1161         return EXPR_LE;
   1162     }
   1163     if (token_text_eq(tok, ">")) {
   1164         return EXPR_GT;
   1165     }
   1166     if (token_text_eq(tok, ">=")) {
   1167         return EXPR_GE;
   1168     }
   1169     if (token_text_eq(tok, "strlen")) {
   1170         return EXPR_STRLEN;
   1171     }
   1172     return EXPR_INVALID;
   1173 }
   1174 
   1175 static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long long *out)
   1176 {
   1177     int i;
   1178 
   1179     switch (op) {
   1180     case EXPR_ADD:
   1181         if (argc < 1) {
   1182             return fail("bad expression");
   1183         }
   1184         *out = args[0];
   1185         for (i = 1; i < argc; i++) {
   1186             *out += args[i];
   1187         }
   1188         return 1;
   1189     case EXPR_SUB:
   1190         if (argc < 1) {
   1191             return fail("bad expression");
   1192         }
   1193         *out = (argc == 1) ? -args[0] : args[0];
   1194         for (i = 1; i < argc; i++) {
   1195             *out -= args[i];
   1196         }
   1197         return 1;
   1198     case EXPR_MUL:
   1199         if (argc < 1) {
   1200             return fail("bad expression");
   1201         }
   1202         *out = args[0];
   1203         for (i = 1; i < argc; i++) {
   1204             *out *= args[i];
   1205         }
   1206         return 1;
   1207     case EXPR_DIV:
   1208         if (argc != 2 || args[1] == 0) {
   1209             return fail("bad expression");
   1210         }
   1211         *out = args[0] / args[1];
   1212         return 1;
   1213     case EXPR_MOD:
   1214         if (argc != 2 || args[1] == 0) {
   1215             return fail("bad expression");
   1216         }
   1217         *out = args[0] % args[1];
   1218         return 1;
   1219     case EXPR_SHL:
   1220         if (argc != 2) {
   1221             return fail("bad expression");
   1222         }
   1223         *out = (long long)((unsigned long long)args[0] << args[1]);
   1224         return 1;
   1225     case EXPR_SHR:
   1226         if (argc != 2) {
   1227             return fail("bad expression");
   1228         }
   1229         *out = args[0] >> args[1];
   1230         return 1;
   1231     case EXPR_AND:
   1232         if (argc < 1) {
   1233             return fail("bad expression");
   1234         }
   1235         *out = args[0];
   1236         for (i = 1; i < argc; i++) {
   1237             *out &= args[i];
   1238         }
   1239         return 1;
   1240     case EXPR_OR:
   1241         if (argc < 1) {
   1242             return fail("bad expression");
   1243         }
   1244         *out = args[0];
   1245         for (i = 1; i < argc; i++) {
   1246             *out |= args[i];
   1247         }
   1248         return 1;
   1249     case EXPR_XOR:
   1250         if (argc < 1) {
   1251             return fail("bad expression");
   1252         }
   1253         *out = args[0];
   1254         for (i = 1; i < argc; i++) {
   1255             *out ^= args[i];
   1256         }
   1257         return 1;
   1258     case EXPR_NOT:
   1259         if (argc != 1) {
   1260             return fail("bad expression");
   1261         }
   1262         *out = ~args[0];
   1263         return 1;
   1264     case EXPR_EQ:
   1265         if (argc != 2) {
   1266             return fail("bad expression");
   1267         }
   1268         *out = (args[0] == args[1]);
   1269         return 1;
   1270     case EXPR_NE:
   1271         if (argc != 2) {
   1272             return fail("bad expression");
   1273         }
   1274         *out = (args[0] != args[1]);
   1275         return 1;
   1276     case EXPR_LT:
   1277         if (argc != 2) {
   1278             return fail("bad expression");
   1279         }
   1280         *out = (args[0] < args[1]);
   1281         return 1;
   1282     case EXPR_LE:
   1283         if (argc != 2) {
   1284             return fail("bad expression");
   1285         }
   1286         *out = (args[0] <= args[1]);
   1287         return 1;
   1288     case EXPR_GT:
   1289         if (argc != 2) {
   1290             return fail("bad expression");
   1291         }
   1292         *out = (args[0] > args[1]);
   1293         return 1;
   1294     case EXPR_GE:
   1295         if (argc != 2) {
   1296             return fail("bad expression");
   1297         }
   1298         *out = (args[0] >= args[1]);
   1299         return 1;
   1300     case EXPR_STRLEN:
   1301     case EXPR_INVALID:
   1302         break;
   1303     }
   1304 
   1305     return fail("bad expression");
   1306 }
   1307 
   1308 static int eval_expr_range(struct TokenSpan span, long long *out);
   1309 
   1310 static int eval_expr_atom(struct Token *tok, struct Token *limit,
   1311                           struct Token **after_out, long long *out)
   1312 {
   1313     const struct Macro *macro;
   1314     struct Token *after;
   1315     int mark;
   1316 
   1317     macro = find_macro(tok);
   1318     if (macro != NULL &&
   1319         ((tok + 1 < limit && (tok + 1)->kind == TOK_LPAREN) ||
   1320          macro->param_count == 0)) {
   1321         if (!expand_macro_tokens(tok, limit, macro, &after, &mark)) {
   1322             return 0;
   1323         }
   1324         if (pool_used == mark) {
   1325             pool_used = mark;
   1326             return fail("bad expression");
   1327         }
   1328         if (!eval_expr_range((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, out)) {
   1329             pool_used = mark;
   1330             return 0;
   1331         }
   1332         pool_used = mark;
   1333         *after_out = after;
   1334         return 1;
   1335     }
   1336 
   1337     if (!parse_int_token(tok, out)) {
   1338         return 0;
   1339     }
   1340     *after_out = tok + 1;
   1341     return 1;
   1342 }
   1343 
   1344 static int eval_expr_range(struct TokenSpan span, long long *out)
   1345 {
   1346     struct ExprFrame frames[MAX_EXPR_FRAMES];
   1347     int frame_top = 0;
   1348     struct Token *pos = span.start;
   1349     long long value = 0;
   1350     long long result = 0;
   1351     int have_value = 0;
   1352     int have_result = 0;
   1353 
   1354     for (;;) {
   1355         if (have_value) {
   1356             if (frame_top > 0) {
   1357                 struct ExprFrame *frame = &frames[frame_top - 1];
   1358 
   1359                 if (frame->argc >= MAX_PARAMS) {
   1360                     return fail("bad expression");
   1361                 }
   1362                 frame->args[frame->argc++] = value;
   1363                 have_value = 0;
   1364                 continue;
   1365             }
   1366             if (have_result) {
   1367                 return fail("bad expression");
   1368             }
   1369             result = value;
   1370             have_result = 1;
   1371             have_value = 0;
   1372             continue;
   1373         }
   1374 
   1375         skip_expr_newlines(&pos, span.end);
   1376         if (pos >= span.end) {
   1377             break;
   1378         }
   1379 
   1380         if (pos->kind == TOK_LPAREN) {
   1381             enum ExprOp op;
   1382 
   1383             pos++;
   1384             skip_expr_newlines(&pos, span.end);
   1385             if (pos >= span.end) {
   1386                 return fail("bad expression");
   1387             }
   1388             op = expr_op_code(pos);
   1389             if (op == EXPR_INVALID) {
   1390                 return fail("bad expression");
   1391             }
   1392             pos++;
   1393             if (op == EXPR_STRLEN) {
   1394                 /* strlen is degenerate: argument is a TOK_STRING atom,
   1395                  * not a recursive expression. Handle inline and yield
   1396                  * the string's raw byte count (span.len - 2). */
   1397                 skip_expr_newlines(&pos, span.end);
   1398                 if (pos >= span.end || pos->kind != TOK_STRING) {
   1399                     return fail("bad expression");
   1400                 }
   1401                 if (pos->text.len < 2 || pos->text.ptr[0] != '"') {
   1402                     return fail("bad expression");
   1403                 }
   1404                 value = (long long)(pos->text.len - 2);
   1405                 pos++;
   1406                 skip_expr_newlines(&pos, span.end);
   1407                 if (pos >= span.end || pos->kind != TOK_RPAREN) {
   1408                     return fail("bad expression");
   1409                 }
   1410                 pos++;
   1411                 have_value = 1;
   1412                 continue;
   1413             }
   1414             if (frame_top >= MAX_EXPR_FRAMES) {
   1415                 return fail("expression overflow");
   1416             }
   1417             frames[frame_top].op = op;
   1418             frames[frame_top].argc = 0;
   1419             frame_top++;
   1420             continue;
   1421         }
   1422 
   1423         if (pos->kind == TOK_RPAREN) {
   1424             if (frame_top <= 0) {
   1425                 return fail("bad expression");
   1426             }
   1427             if (!apply_expr_op(frames[frame_top - 1].op,
   1428                                frames[frame_top - 1].args,
   1429                                frames[frame_top - 1].argc,
   1430                                &value)) {
   1431                 return 0;
   1432             }
   1433             frame_top--;
   1434             pos++;
   1435             have_value = 1;
   1436             continue;
   1437         }
   1438 
   1439         if (!eval_expr_atom(pos, span.end, &pos, &value)) {
   1440             return 0;
   1441         }
   1442         have_value = 1;
   1443     }
   1444 
   1445     if (frame_top != 0 || !have_result) {
   1446         return fail("bad expression");
   1447     }
   1448     if (pos != span.end) {
   1449         return fail("bad expression");
   1450     }
   1451 
   1452     *out = result;
   1453     return 1;
   1454 }
   1455 
   1456 static int emit_hex_value(unsigned long long value, int bytes)
   1457 {
   1458     /* Wrap the hex digits in single quotes so M0 sees a STRING-literal
   1459      * hex token, not a numeric token (which it would parse as decimal). */
   1460     char tmp[19];
   1461     static const char hex[] = "0123456789ABCDEF";
   1462     struct Token tok;
   1463     int i;
   1464     char *text_ptr;
   1465     int total_len = 2 + 2 * bytes;
   1466 
   1467     tmp[0] = '\'';
   1468     for (i = 0; i < bytes; i++) {
   1469         unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF);
   1470         tmp[1 + 2 * i] = hex[b >> 4];
   1471         tmp[1 + 2 * i + 1] = hex[b & 0x0F];
   1472     }
   1473     tmp[1 + 2 * bytes] = '\'';
   1474     tmp[total_len] = '\0';
   1475 
   1476     text_ptr = append_text_len(tmp, total_len);
   1477     if (text_ptr == NULL) {
   1478         return 0;
   1479     }
   1480     tok.kind = TOK_STRING;
   1481     tok.text.ptr = text_ptr;
   1482     tok.text.len = total_len;
   1483     return emit_token(&tok);
   1484 }
   1485 
   1486 static int expand_builtin_call(struct Stream *s, const struct Token *tok)
   1487 {
   1488     long long value;
   1489 
   1490     if (tok + 1 >= s->end || (tok + 1)->kind != TOK_LPAREN) {
   1491         return fail("bad builtin");
   1492     }
   1493     if (!parse_args((struct Token *)tok + 1, s->end)) {
   1494         return 0;
   1495     }
   1496 
   1497     if (token_text_eq(tok, "!") || token_text_eq(tok, "@") ||
   1498         token_text_eq(tok, "%") || token_text_eq(tok, "$")) {
   1499         struct TokenSpan arg;
   1500         struct Token *end_pos;
   1501         int bytes;
   1502 
   1503         if (arg_count != 1) {
   1504             return fail("bad builtin");
   1505         }
   1506         arg.start = arg_starts[0];
   1507         arg.end = arg_ends[0];
   1508         end_pos = call_end_pos;
   1509         if (!eval_expr_range(arg, &value)) {
   1510             return 0;
   1511         }
   1512         s->pos = end_pos;
   1513         s->line_start = 0;
   1514         bytes = token_text_eq(tok, "!") ? 1 :
   1515                 token_text_eq(tok, "@") ? 2 :
   1516                 token_text_eq(tok, "%") ? 4 : 8;
   1517         return emit_hex_value((unsigned long long)value, bytes);
   1518     }
   1519 
   1520     if (token_text_eq(tok, "%select")) {
   1521         struct TokenSpan cond_arg, then_arg, else_arg, chosen;
   1522         struct Token *end_pos;
   1523         int mark;
   1524 
   1525         if (arg_count != 3) {
   1526             return fail("bad builtin");
   1527         }
   1528         cond_arg.start = arg_starts[0]; cond_arg.end = arg_ends[0];
   1529         then_arg.start = arg_starts[1]; then_arg.end = arg_ends[1];
   1530         else_arg.start = arg_starts[2]; else_arg.end = arg_ends[2];
   1531         end_pos = call_end_pos;
   1532         if (!eval_expr_range(cond_arg, &value)) {
   1533             return 0;
   1534         }
   1535         chosen = (value != 0) ? then_arg : else_arg;
   1536         s->pos = end_pos;
   1537         s->line_start = 0;
   1538         if (chosen.start == chosen.end) {
   1539             return 1;
   1540         }
   1541         mark = pool_used;
   1542         if (!copy_span_to_pool(chosen)) {
   1543             pool_used = mark;
   1544             return 0;
   1545         }
   1546         return push_pool_stream_from_mark(mark);
   1547     }
   1548 
   1549     if (token_text_eq(tok, "%str")) {
   1550         struct Token *arg_tok;
   1551         struct Token *end_pos;
   1552         struct Token out_tok;
   1553         char *text_ptr;
   1554         int orig_len;
   1555         int out_len;
   1556 
   1557         if (arg_count != 1) {
   1558             return fail("bad builtin");
   1559         }
   1560         if (arg_ends[0] - arg_starts[0] != 1) {
   1561             return fail("bad builtin");
   1562         }
   1563         arg_tok = arg_starts[0];
   1564         if (arg_tok->kind != TOK_WORD) {
   1565             return fail("bad builtin");
   1566         }
   1567         end_pos = call_end_pos;
   1568 
   1569         orig_len = arg_tok->text.len;
   1570         out_len = orig_len + 2;
   1571         if (text_used + out_len + 1 > MAX_TEXT) {
   1572             return fail("text overflow");
   1573         }
   1574         text_ptr = text_buf + text_used;
   1575         text_buf[text_used++] = '"';
   1576         memcpy(text_buf + text_used, arg_tok->text.ptr, (size_t)orig_len);
   1577         text_used += orig_len;
   1578         text_buf[text_used++] = '"';
   1579         text_buf[text_used++] = '\0';
   1580 
   1581         out_tok.kind = TOK_STRING;
   1582         out_tok.text.ptr = text_ptr;
   1583         out_tok.text.len = out_len;
   1584         s->pos = end_pos;
   1585         s->line_start = 0;
   1586         return emit_token(&out_tok);
   1587     }
   1588 
   1589     return fail("bad builtin");
   1590 }
   1591 
   1592 static int expand_call(struct Stream *s, const struct Macro *macro)
   1593 {
   1594     struct Token *after;
   1595     int mark;
   1596 
   1597     if (!expand_macro_tokens(s->pos, s->end, macro, &after, &mark)) {
   1598         return 0;
   1599     }
   1600     s->pos = after;
   1601     s->line_start = 0;
   1602     return push_pool_stream_from_mark(mark);
   1603 }
   1604 
   1605 static int push_scope(struct Stream *s)
   1606 {
   1607     s->pos++;
   1608     if (s->pos >= s->end || s->pos->kind != TOK_WORD) {
   1609         return fail("bad scope header");
   1610     }
   1611     if (scope_depth >= MAX_SCOPE_DEPTH) {
   1612         return fail("scope depth overflow");
   1613     }
   1614     scope_stack[scope_depth++] = s->pos->text;
   1615     s->pos++;
   1616     if (s->pos < s->end && s->pos->kind != TOK_NEWLINE) {
   1617         return fail("bad scope header");
   1618     }
   1619     if (s->pos < s->end) {
   1620         s->pos++;
   1621     }
   1622     s->line_start = 1;
   1623     return 1;
   1624 }
   1625 
   1626 static int pop_scope(struct Stream *s)
   1627 {
   1628     s->pos++;
   1629     if (scope_depth <= 0) {
   1630         return fail("scope underflow");
   1631     }
   1632     scope_depth--;
   1633     while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) {
   1634         s->pos++;
   1635     }
   1636     if (s->pos < s->end) {
   1637         s->pos++;
   1638     }
   1639     s->line_start = 1;
   1640     return 1;
   1641 }
   1642 
   1643 static int process_tokens(void)
   1644 {
   1645     if (!push_stream_span((struct TokenSpan){source_tokens, source_tokens + source_count}, -1)) {
   1646         return 0;
   1647     }
   1648 
   1649     for (;;) {
   1650         struct Stream *s;
   1651         struct Token *tok;
   1652         const struct Macro *macro;
   1653 
   1654         s = current_stream();
   1655         if (s == NULL) {
   1656             break;
   1657         }
   1658         if (s->pos >= s->end) {
   1659             pop_stream();
   1660             continue;
   1661         }
   1662 
   1663         tok = s->pos;
   1664 
   1665         if (s->line_start &&
   1666             tok->kind == TOK_WORD &&
   1667             token_text_eq(tok, "%macro")) {
   1668             if (!define_macro(s)) {
   1669                 return 0;
   1670             }
   1671             continue;
   1672         }
   1673 
   1674         if (s->line_start &&
   1675             tok->kind == TOK_WORD &&
   1676             token_text_eq(tok, "%struct")) {
   1677             if (!define_fielded(s, 8, "SIZE", 4)) {
   1678                 return 0;
   1679             }
   1680             continue;
   1681         }
   1682 
   1683         if (s->line_start &&
   1684             tok->kind == TOK_WORD &&
   1685             token_text_eq(tok, "%enum")) {
   1686             if (!define_fielded(s, 1, "COUNT", 5)) {
   1687                 return 0;
   1688             }
   1689             continue;
   1690         }
   1691 
   1692         if (s->line_start &&
   1693             tok->kind == TOK_WORD &&
   1694             token_text_eq(tok, "%scope")) {
   1695             if (!push_scope(s)) {
   1696                 return 0;
   1697             }
   1698             continue;
   1699         }
   1700 
   1701         if (s->line_start &&
   1702             tok->kind == TOK_WORD &&
   1703             token_text_eq(tok, "%endscope")) {
   1704             if (!pop_scope(s)) {
   1705                 return 0;
   1706             }
   1707             continue;
   1708         }
   1709 
   1710         if (tok->kind == TOK_NEWLINE) {
   1711             s->pos++;
   1712             s->line_start = 1;
   1713             if (!emit_newline()) {
   1714                 return 0;
   1715             }
   1716             continue;
   1717         }
   1718 
   1719         if (tok->kind == TOK_WORD &&
   1720             tok + 1 < s->end &&
   1721             (tok + 1)->kind == TOK_LPAREN &&
   1722             (token_text_eq(tok, "!") ||
   1723              token_text_eq(tok, "@") ||
   1724              token_text_eq(tok, "%") ||
   1725              token_text_eq(tok, "$") ||
   1726              token_text_eq(tok, "%select") ||
   1727              token_text_eq(tok, "%str"))) {
   1728             if (!expand_builtin_call(s, tok)) {
   1729                 return 0;
   1730             }
   1731             continue;
   1732         }
   1733 
   1734         macro = find_macro(tok);
   1735         if (macro != NULL &&
   1736             ((tok + 1 < s->end && (tok + 1)->kind == TOK_LPAREN) ||
   1737              macro->param_count == 0)) {
   1738             if (!expand_call(s, macro)) {
   1739                 return 0;
   1740             }
   1741             continue;
   1742         }
   1743 
   1744         s->pos++;
   1745         s->line_start = 0;
   1746         if (!emit_token(tok)) {
   1747             return 0;
   1748         }
   1749     }
   1750 
   1751     if (scope_depth != 0) {
   1752         return fail("scope not closed");
   1753     }
   1754 
   1755     if (output_used >= MAX_OUTPUT) {
   1756         return fail("output overflow");
   1757     }
   1758     output_buf[output_used] = '\0';
   1759     return 1;
   1760 }
   1761 
   1762 int main(int argc, char **argv)
   1763 {
   1764     FILE *in;
   1765     FILE *out;
   1766     size_t nread;
   1767 
   1768     if (argc != 3) {
   1769         fprintf(stderr, "usage: %s input.M1 output.M1\n", argv[0]);
   1770         return 1;
   1771     }
   1772 
   1773     in = fopen(argv[1], "rb");
   1774     if (in == NULL) {
   1775         perror(argv[1]);
   1776         return 1;
   1777     }
   1778     nread = fread(input_buf, 1, MAX_INPUT, in);
   1779     if (ferror(in)) {
   1780         perror(argv[1]);
   1781         fclose(in);
   1782         return 1;
   1783     }
   1784     fclose(in);
   1785     if (nread >= MAX_INPUT) {
   1786         fprintf(stderr, "input too large\n");
   1787         return 1;
   1788     }
   1789     input_buf[nread] = '\0';
   1790 
   1791     if (!lex_source(input_buf) || !process_tokens()) {
   1792         fprintf(stderr, "m1macro: %s\n", error_msg != NULL ? error_msg : "failed");
   1793         return 1;
   1794     }
   1795 
   1796     out = fopen(argv[2], "wb");
   1797     if (out == NULL) {
   1798         perror(argv[2]);
   1799         return 1;
   1800     }
   1801     if (fwrite(output_buf, 1, (size_t)output_used, out) != (size_t)output_used) {
   1802         perror(argv[2]);
   1803         fclose(out);
   1804         return 1;
   1805     }
   1806     fclose(out);
   1807     return 0;
   1808 }
	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README