M1pp.c (48376B)
1 /* 2 * Tiny single-pass M1 macro expander. 3 * 4 * Syntax: 5 * %macro NAME(a, b) 6 * ... body ... 7 * %endm 8 * 9 * %struct NAME { f1 f2 ... } fixed-layout 8-byte-field aggregate 10 * %enum NAME { l1 l2 ... } incrementing integer constants 11 * 12 * %NAME(x, y) function-like macro call 13 * ## token pasting inside macro bodies 14 * !(expr) evaluate an integer S-expression, emit LE 8-bit hex 15 * @(expr) evaluate an integer S-expression, emit LE 16-bit hex 16 * %(expr) evaluate an integer S-expression, emit LE 32-bit hex 17 * $(expr) evaluate an integer S-expression, emit LE 64-bit hex 18 * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e 19 * %str(IDENT) stringify a single WORD token into a "..."-quoted literal 20 * 21 * Expression syntax is intentionally Lisp-shaped: 22 * atoms: decimal or 0x-prefixed integer literals 23 * calls: (+ a b), (- a b), (* a b), (/ a b), (% a b), (<< a b), (>> a b) 24 * (& a b), (| a b), (^ a b), (~ a), (= a b), (!= a b), 25 * (< a b), (<= a b), (> a b), (>= a b) 26 * 27 * Flow: 28 * 1. lex_source(): scan input_buf into source_tokens[]. Tokens are words, 29 * strings, newlines, parens, commas, and ## paste markers. Whitespace 30 * (excluding newlines) is dropped; # and ; comments are dropped. 31 * 32 * 2. process_tokens(): main loop driven by a stream stack (streams[]). 33 * The source token array is pushed as the initial stream. Each iteration 34 * pops a token from the top stream: 35 * 36 * %macro NAME(p,...) / %endm at line-start 37 * -> define_macro(): consume header + body tokens into macros[] and 38 * macro_body_tokens[]; register name and param list. 39 * 40 * !(e) / @(e) / %(e) / $(e) / %select(c,t,e) 41 * -> expand_builtin_call(): parse arg spans, eval S-expression(s) via 42 * eval_expr_range(), emit LE hex or push the chosen token span. 43 * 44 * %NAME(...) matching a defined macro 45 * -> expand_call() -> expand_macro_tokens(): substitute arguments, 46 * apply ## paste via paste_pool_range(), write result into 47 * expand_pool[], then push that slice as a new stream (rescan). 48 * 49 * Anything else 50 * -> emit_token() / emit_newline() directly into output_buf. 51 * 52 * When a stream is exhausted it is popped; pool_used is rewound to the 53 * stream's pool_mark, reclaiming the expand_pool space it used. 54 * 55 * 3. Write output_buf to the output file. 56 * 57 * Notes: 58 * - Macros are define-before-use. There is no prescan. 59 * - Expansion rescans by pushing expanded tokens back through the same loop. 60 * - There is no cycle detection. Recursive macros will loop until a limit. 61 * - Only recognized %NAME(...) calls expand. Other text passes through. 62 * - Output formatting is normalized to tokens plus '\n', not preserved. 63 */ 64 65 #include <errno.h> 66 #include <stdio.h> 67 #include <stdlib.h> 68 #include <string.h> 69 70 #define MAX_INPUT 262144 71 #define MAX_OUTPUT 524288 72 #define MAX_TEXT 524288 73 #define MAX_TOKENS 65536 74 #define MAX_MACROS 512 75 #define MAX_PARAMS 16 76 #define MAX_MACRO_BODY_TOKENS MAX_TOKENS 77 #define MAX_EXPAND 65536 78 #define MAX_STACK 64 79 #define MAX_EXPR_FRAMES 256 80 #define MAX_SCOPE_DEPTH 32 81 82 enum { 83 TOK_WORD, 84 TOK_STRING, 85 TOK_NEWLINE, 86 TOK_LPAREN, 87 TOK_RPAREN, 88 TOK_COMMA, 89 TOK_PASTE, 90 TOK_LBRACE, 91 TOK_RBRACE 92 }; 93 94 enum ExprOp { 95 EXPR_ADD, 96 EXPR_SUB, 97 EXPR_MUL, 98 EXPR_DIV, 99 EXPR_MOD, 100 EXPR_SHL, 101 EXPR_SHR, 102 EXPR_AND, 103 EXPR_OR, 104 EXPR_XOR, 105 EXPR_NOT, 106 EXPR_EQ, 107 EXPR_NE, 108 EXPR_LT, 109 EXPR_LE, 110 EXPR_GT, 111 EXPR_GE, 112 EXPR_STRLEN, 113 EXPR_INVALID 114 }; 115 116 struct TextSpan { 117 const char *ptr; 118 int len; 119 }; 120 121 struct Token { 122 int kind; 123 struct TextSpan text; 124 }; 125 126 struct TokenSpan { 127 struct Token *start; 128 struct Token *end; 129 }; 130 131 struct Macro { 132 struct TextSpan name; 133 int param_count; 134 struct TextSpan params[MAX_PARAMS]; 135 struct Token *body_start; 136 struct Token *body_end; 137 }; 138 139 struct Stream { 140 struct Token *start; 141 struct Token *end; 142 struct Token *pos; 143 int line_start; 144 int pool_mark; 145 }; 146 147 struct ExprFrame { 148 enum ExprOp op; 149 long long args[MAX_PARAMS]; 150 int argc; 151 }; 152 153 static char input_buf[MAX_INPUT + 1]; 154 static char output_buf[MAX_OUTPUT + 1]; 155 static char text_buf[MAX_TEXT]; 156 157 static struct Token source_tokens[MAX_TOKENS]; 158 static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS]; 159 static struct Token expand_pool[MAX_EXPAND]; 160 static struct Macro macros[MAX_MACROS]; 161 static struct Stream streams[MAX_STACK]; 162 static struct TextSpan scope_stack[MAX_SCOPE_DEPTH]; 163 164 static int text_used; 165 static int source_count; 166 static int macro_count; 167 static int macro_body_used; 168 static int pool_used; 169 static int output_used; 170 static int output_need_space; 171 static int stream_top; 172 static int next_expansion_id; 173 static int scope_depth; 174 175 static struct Token *arg_starts[MAX_PARAMS]; 176 static struct Token *arg_ends[MAX_PARAMS]; 177 static int arg_count; 178 static struct Token *call_end_pos; 179 180 static const char *error_msg; 181 182 static int fail(const char *msg) 183 { 184 error_msg = msg; 185 return 0; 186 } 187 188 static int is_space_no_nl(int c) 189 { 190 return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'; 191 } 192 193 static char *append_text_len(const char *s, int len) 194 { 195 int start; 196 197 if (text_used + len + 1 > MAX_TEXT) { 198 fail("text overflow"); 199 return NULL; 200 } 201 start = text_used; 202 memcpy(text_buf + text_used, s, (size_t)len); 203 text_used += len; 204 text_buf[text_used++] = '\0'; 205 return text_buf + start; 206 } 207 208 static int push_token(struct Token *buf, int *count, int max_count, 209 int kind, struct TextSpan text) 210 { 211 if (*count >= max_count) { 212 return fail("token overflow"); 213 } 214 buf[*count].kind = kind; 215 buf[*count].text = text; 216 *count += 1; 217 return 1; 218 } 219 220 static int push_pool_token(struct Token tok) 221 { 222 if (pool_used >= MAX_EXPAND) { 223 return fail("expansion overflow"); 224 } 225 expand_pool[pool_used++] = tok; 226 return 1; 227 } 228 229 static int token_text_eq(const struct Token *tok, const char *s) 230 { 231 int len = (int)strlen(s); 232 233 return tok->text.len == len && 234 memcmp(tok->text.ptr, s, (size_t)len) == 0; 235 } 236 237 static int span_eq_token(struct TextSpan span, const struct Token *tok) 238 { 239 return span.len == tok->text.len && 240 memcmp(span.ptr, tok->text.ptr, (size_t)span.len) == 0; 241 } 242 243 static int lex_source(const char *src) 244 { 245 int i = 0; 246 247 while (src[i] != '\0') { 248 int start; 249 int len; 250 251 if (is_space_no_nl((unsigned char)src[i])) { 252 i++; 253 continue; 254 } 255 if (src[i] == '\n') { 256 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 257 TOK_NEWLINE, (struct TextSpan){src + i, 1})) { 258 return 0; 259 } 260 i++; 261 continue; 262 } 263 if (src[i] == '"' || src[i] == '\'') { 264 int quote = src[i]; 265 266 start = i; 267 i++; 268 while (src[i] != '\0' && src[i] != quote) { 269 i++; 270 } 271 if (src[i] == quote) { 272 i++; 273 } 274 len = i - start; 275 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 276 TOK_STRING, (struct TextSpan){src + start, len})) { 277 return 0; 278 } 279 continue; 280 } 281 if (src[i] == '#' && src[i + 1] == '#') { 282 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 283 TOK_PASTE, (struct TextSpan){src + i, 2})) { 284 return 0; 285 } 286 i += 2; 287 continue; 288 } 289 if (src[i] == '#' || src[i] == ';') { 290 while (src[i] != '\0' && src[i] != '\n') { 291 i++; 292 } 293 continue; 294 } 295 if (src[i] == '(') { 296 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 297 TOK_LPAREN, (struct TextSpan){src + i, 1})) { 298 return 0; 299 } 300 i++; 301 continue; 302 } 303 if (src[i] == ')') { 304 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 305 TOK_RPAREN, (struct TextSpan){src + i, 1})) { 306 return 0; 307 } 308 i++; 309 continue; 310 } 311 if (src[i] == ',') { 312 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 313 TOK_COMMA, (struct TextSpan){src + i, 1})) { 314 return 0; 315 } 316 i++; 317 continue; 318 } 319 if (src[i] == '{') { 320 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 321 TOK_LBRACE, (struct TextSpan){src + i, 1})) { 322 return 0; 323 } 324 i++; 325 continue; 326 } 327 if (src[i] == '}') { 328 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 329 TOK_RBRACE, (struct TextSpan){src + i, 1})) { 330 return 0; 331 } 332 i++; 333 continue; 334 } 335 336 start = i; 337 while (src[i] != '\0' && 338 !is_space_no_nl((unsigned char)src[i]) && 339 src[i] != '\n' && 340 src[i] != '#' && 341 src[i] != ';' && 342 src[i] != '(' && 343 src[i] != ')' && 344 src[i] != ',' && 345 src[i] != '{' && 346 src[i] != '}' && 347 !(src[i] == '#' && src[i + 1] == '#')) { 348 i++; 349 } 350 len = i - start; 351 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 352 TOK_WORD, (struct TextSpan){src + start, len})) { 353 return 0; 354 } 355 } 356 357 return 1; 358 } 359 360 static const struct Macro *find_macro(const struct Token *tok) 361 { 362 int i; 363 364 if (tok->kind != TOK_WORD || tok->text.len < 2) { 365 return NULL; 366 } 367 if (tok->text.ptr[0] != '%') { 368 return NULL; 369 } 370 for (i = 0; i < macro_count; i++) { 371 if (macros[i].name.len == tok->text.len - 1 && 372 memcmp(tok->text.ptr + 1, 373 macros[i].name.ptr, 374 (size_t)macros[i].name.len) == 0) { 375 return ¯os[i]; 376 } 377 } 378 return NULL; 379 } 380 381 static int find_param(const struct Macro *m, const struct Token *tok) 382 { 383 int i; 384 385 if (tok->kind != TOK_WORD) { 386 return 0; 387 } 388 for (i = 0; i < m->param_count; i++) { 389 if (span_eq_token(m->params[i], tok)) { 390 return i + 1; 391 } 392 } 393 return 0; 394 } 395 396 static int emit_newline(void) 397 { 398 if (output_used + 1 >= MAX_OUTPUT) { 399 return fail("output overflow"); 400 } 401 output_buf[output_used++] = '\n'; 402 output_need_space = 0; 403 return 1; 404 } 405 406 static int emit_scoped_label(const struct Token *tok, int skip, char sigil) 407 { 408 /* Rewrite `::name` or `&::name` against the current scope stack. 409 * skip is the number of leading chars to drop (`::` -> 2, `&::` -> 3); 410 * sigil is the single-char prefix to emit (`:` for definitions, `&` 411 * for references). With a non-empty scope stack the output is 412 * sigil + scope1 + "__" + ... + scopeN + "__" + name; with an empty 413 * stack it degrades to sigil + name (pass-through). */ 414 int name_len = tok->text.len - skip; 415 int i; 416 417 if (name_len <= 0) { 418 return fail("bad scope label"); 419 } 420 421 if (output_need_space) { 422 if (output_used + 1 >= MAX_OUTPUT) { 423 return fail("output overflow"); 424 } 425 output_buf[output_used++] = ' '; 426 } 427 428 if (output_used + 1 >= MAX_OUTPUT) { 429 return fail("output overflow"); 430 } 431 output_buf[output_used++] = sigil; 432 433 for (i = 0; i < scope_depth; i++) { 434 int span_len = scope_stack[i].len; 435 if (output_used + span_len + 2 >= MAX_OUTPUT) { 436 return fail("output overflow"); 437 } 438 memcpy(output_buf + output_used, scope_stack[i].ptr, 439 (size_t)span_len); 440 output_used += span_len; 441 output_buf[output_used++] = '_'; 442 output_buf[output_used++] = '_'; 443 } 444 445 if (output_used + name_len >= MAX_OUTPUT) { 446 return fail("output overflow"); 447 } 448 memcpy(output_buf + output_used, tok->text.ptr + skip, (size_t)name_len); 449 output_used += name_len; 450 output_need_space = 1; 451 return 1; 452 } 453 454 static int emit_token(const struct Token *tok) 455 { 456 if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) { 457 return 1; 458 } 459 if (tok->kind == TOK_WORD && tok->text.len >= 2 && 460 tok->text.ptr[0] == ':' && tok->text.ptr[1] == ':') { 461 return emit_scoped_label(tok, 2, ':'); 462 } 463 if (tok->kind == TOK_WORD && tok->text.len >= 3 && 464 tok->text.ptr[0] == '&' && 465 tok->text.ptr[1] == ':' && tok->text.ptr[2] == ':') { 466 return emit_scoped_label(tok, 3, '&'); 467 } 468 if (output_need_space) { 469 if (output_used + 1 >= MAX_OUTPUT) { 470 return fail("output overflow"); 471 } 472 output_buf[output_used++] = ' '; 473 } 474 if (output_used + tok->text.len >= MAX_OUTPUT) { 475 return fail("output overflow"); 476 } 477 memcpy(output_buf + output_used, tok->text.ptr, 478 (size_t)tok->text.len); 479 output_used += tok->text.len; 480 output_need_space = 1; 481 return 1; 482 } 483 484 static int push_stream_span(struct TokenSpan span, int pool_mark) 485 { 486 struct Stream *s; 487 488 if (stream_top >= MAX_STACK) { 489 return fail("stream overflow"); 490 } 491 s = &streams[stream_top++]; 492 s->start = span.start; 493 s->end = span.end; 494 s->pos = span.start; 495 s->line_start = 1; 496 s->pool_mark = pool_mark; 497 return 1; 498 } 499 500 static struct Stream *current_stream(void) 501 { 502 if (stream_top <= 0) { 503 return NULL; 504 } 505 return &streams[stream_top - 1]; 506 } 507 508 static void pop_stream(void) 509 { 510 if (stream_top <= 0) { 511 return; 512 } 513 stream_top--; 514 if (streams[stream_top].pool_mark >= 0) { 515 pool_used = streams[stream_top].pool_mark; 516 } 517 } 518 519 static int copy_span_to_pool(struct TokenSpan span) 520 { 521 struct Token *tok; 522 523 for (tok = span.start; tok < span.end; tok++) { 524 if (!push_pool_token(*tok)) { 525 return 0; 526 } 527 } 528 return 1; 529 } 530 531 static int push_pool_stream_from_mark(int mark) 532 { 533 if (pool_used == mark) { 534 pool_used = mark; 535 return 1; 536 } 537 return push_stream_span((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, 538 mark); 539 } 540 541 static void skip_expr_newlines(struct Token **pos, struct Token *end) 542 { 543 while (*pos < end && (*pos)->kind == TOK_NEWLINE) { 544 *pos += 1; 545 } 546 } 547 548 static int emit_decimal_text(long long value, struct TextSpan *out) 549 { 550 /* Render a non-negative integer as decimal into text_buf and 551 * return the span. No snprintf; plain reverse-fill. */ 552 char digits[24]; 553 int digit_count = 0; 554 long long v = value; 555 int start; 556 int i; 557 558 if (v < 0) { 559 return fail("bad directive"); 560 } 561 if (v == 0) { 562 digits[digit_count++] = '0'; 563 } else { 564 while (v > 0) { 565 digits[digit_count++] = (char)('0' + (v % 10)); 566 v /= 10; 567 } 568 } 569 570 if (text_used + digit_count + 1 > MAX_TEXT) { 571 return fail("text overflow"); 572 } 573 start = text_used; 574 for (i = digit_count - 1; i >= 0; i--) { 575 text_buf[text_used++] = digits[i]; 576 } 577 text_buf[text_used++] = '\0'; 578 out->ptr = text_buf + start; 579 out->len = digit_count; 580 return 1; 581 } 582 583 static int emit_dotted_name(struct TextSpan base, const char *suffix, 584 int suffix_len, struct TextSpan *out) 585 { 586 int total = base.len + 1 + suffix_len; 587 int start; 588 589 if (text_used + total + 1 > MAX_TEXT) { 590 return fail("text overflow"); 591 } 592 start = text_used; 593 memcpy(text_buf + text_used, base.ptr, (size_t)base.len); 594 text_used += base.len; 595 text_buf[text_used++] = '.'; 596 memcpy(text_buf + text_used, suffix, (size_t)suffix_len); 597 text_used += suffix_len; 598 text_buf[text_used++] = '\0'; 599 out->ptr = text_buf + start; 600 out->len = total; 601 return 1; 602 } 603 604 static int define_fielded_macro(struct TextSpan base, const char *suffix, 605 int suffix_len, long long value) 606 { 607 struct Macro *m; 608 struct Token body_tok; 609 610 if (macro_count >= MAX_MACROS) { 611 return fail("too many macros"); 612 } 613 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 614 return fail("macro body overflow"); 615 } 616 m = ¯os[macro_count]; 617 memset(m, 0, sizeof(*m)); 618 if (!emit_dotted_name(base, suffix, suffix_len, &m->name)) { 619 return 0; 620 } 621 m->param_count = 0; 622 body_tok.kind = TOK_WORD; 623 if (!emit_decimal_text(value, &body_tok.text)) { 624 return 0; 625 } 626 m->body_start = macro_body_tokens + macro_body_used; 627 macro_body_tokens[macro_body_used++] = body_tok; 628 m->body_end = macro_body_tokens + macro_body_used; 629 macro_count++; 630 return 1; 631 } 632 633 static int define_fielded(struct Stream *s, long long stride, 634 const char *total_name, int total_name_len) 635 { 636 /* Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` and 637 * synthesizes N+1 zero-parameter macros: 638 * NAME.field_k -> k * stride 639 * NAME.<total> -> N * stride (SIZE for struct, COUNT for enum) */ 640 struct TextSpan base; 641 long long index = 0; 642 643 s->pos++; 644 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 645 return fail("bad directive"); 646 } 647 base = s->pos->text; 648 s->pos++; 649 650 while (s->pos < s->end && s->pos->kind == TOK_NEWLINE) { 651 s->pos++; 652 } 653 if (s->pos >= s->end || s->pos->kind != TOK_LBRACE) { 654 return fail("bad directive"); 655 } 656 s->pos++; 657 658 for (;;) { 659 while (s->pos < s->end && 660 (s->pos->kind == TOK_COMMA || s->pos->kind == TOK_NEWLINE)) { 661 s->pos++; 662 } 663 if (s->pos >= s->end) { 664 return fail("unterminated directive"); 665 } 666 if (s->pos->kind == TOK_RBRACE) { 667 s->pos++; 668 break; 669 } 670 if (s->pos->kind != TOK_WORD) { 671 return fail("bad directive"); 672 } 673 if (!define_fielded_macro(base, s->pos->text.ptr, s->pos->text.len, 674 index * stride)) { 675 return 0; 676 } 677 s->pos++; 678 index++; 679 } 680 681 if (!define_fielded_macro(base, total_name, total_name_len, index * stride)) { 682 return 0; 683 } 684 685 while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) { 686 s->pos++; 687 } 688 if (s->pos < s->end && s->pos->kind == TOK_NEWLINE) { 689 s->pos++; 690 } 691 s->line_start = 1; 692 return 1; 693 } 694 695 static int define_macro(struct Stream *s) 696 { 697 struct Macro *m; 698 int line_start; 699 700 if (macro_count >= MAX_MACROS) { 701 return fail("too many macros"); 702 } 703 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 704 return fail("macro body overflow"); 705 } 706 707 m = ¯os[macro_count]; 708 memset(m, 0, sizeof(*m)); 709 s->pos++; 710 711 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 712 return fail("bad macro header"); 713 } 714 m->name = s->pos->text; 715 s->pos++; 716 717 if (s->pos >= s->end || s->pos->kind != TOK_LPAREN) { 718 return fail("bad macro header"); 719 } 720 s->pos++; 721 722 if (s->pos < s->end && s->pos->kind != TOK_RPAREN) { 723 while (1) { 724 if (m->param_count >= MAX_PARAMS) { 725 return fail("bad macro header"); 726 } 727 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 728 return fail("bad macro header"); 729 } 730 m->params[m->param_count] = s->pos->text; 731 m->param_count++; 732 s->pos++; 733 if (s->pos < s->end && s->pos->kind == TOK_COMMA) { 734 s->pos++; 735 continue; 736 } 737 break; 738 } 739 } 740 741 if (s->pos >= s->end || s->pos->kind != TOK_RPAREN) { 742 return fail("bad macro header"); 743 } 744 s->pos++; 745 746 if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { 747 return fail("bad macro header"); 748 } 749 s->pos++; 750 751 m->body_start = macro_body_tokens + macro_body_used; 752 line_start = 1; 753 while (s->pos < s->end) { 754 if (line_start && 755 s->pos->kind == TOK_WORD && 756 token_text_eq(s->pos, "%endm")) { 757 while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) { 758 s->pos++; 759 } 760 if (s->pos < s->end && s->pos->kind == TOK_NEWLINE) { 761 s->pos++; 762 } 763 m->body_end = macro_body_tokens + macro_body_used; 764 s->line_start = 1; 765 macro_count++; 766 return 1; 767 } 768 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 769 return fail("macro body overflow"); 770 } 771 macro_body_tokens[macro_body_used++] = *s->pos; 772 line_start = (s->pos->kind == TOK_NEWLINE); 773 s->pos++; 774 } 775 776 return fail("unterminated macro"); 777 } 778 779 static int parse_args(struct Token *lparen, struct Token *limit) 780 { 781 struct Token *tok = lparen + 1; 782 struct Token *arg_start = tok; 783 int depth = 1; 784 int brace_depth = 0; 785 int arg_index = 0; 786 787 while (tok < limit) { 788 if (tok->kind == TOK_LPAREN) { 789 depth++; 790 tok++; 791 continue; 792 } 793 if (tok->kind == TOK_RPAREN) { 794 depth--; 795 if (depth == 0) { 796 if (brace_depth != 0) { 797 return fail("unbalanced braces"); 798 } 799 if (arg_start == tok && arg_index == 0) { 800 arg_count = 0; 801 } else { 802 if (arg_index >= MAX_PARAMS) { 803 return fail("too many args"); 804 } 805 arg_starts[arg_index] = arg_start; 806 arg_ends[arg_index] = tok; 807 arg_count = arg_index + 1; 808 } 809 call_end_pos = tok + 1; 810 return 1; 811 } 812 tok++; 813 continue; 814 } 815 if (tok->kind == TOK_LBRACE) { 816 brace_depth++; 817 tok++; 818 continue; 819 } 820 if (tok->kind == TOK_RBRACE) { 821 if (brace_depth <= 0) { 822 return fail("unbalanced braces"); 823 } 824 brace_depth--; 825 tok++; 826 continue; 827 } 828 if (tok->kind == TOK_COMMA && depth == 1 && brace_depth == 0) { 829 if (arg_index >= MAX_PARAMS) { 830 return fail("too many args"); 831 } 832 arg_starts[arg_index] = arg_start; 833 arg_ends[arg_index] = tok; 834 arg_index++; 835 arg_start = tok + 1; 836 tok++; 837 continue; 838 } 839 tok++; 840 } 841 842 return fail("unterminated macro call"); 843 } 844 845 static int arg_is_braced(struct TokenSpan span) 846 { 847 struct Token *tok; 848 int depth; 849 850 if (span.end - span.start < 2) { 851 return 0; 852 } 853 if (span.start->kind != TOK_LBRACE || 854 (span.end - 1)->kind != TOK_RBRACE) { 855 return 0; 856 } 857 depth = 0; 858 for (tok = span.start; tok < span.end; tok++) { 859 if (tok->kind == TOK_LBRACE) { 860 depth++; 861 } else if (tok->kind == TOK_RBRACE) { 862 depth--; 863 if (depth == 0 && tok != span.end - 1) { 864 return 0; 865 } 866 } 867 } 868 return depth == 0; 869 } 870 871 static int copy_arg_tokens_to_pool(struct TokenSpan span) 872 { 873 if (span.start == span.end) { 874 return fail("bad macro argument"); 875 } 876 if (arg_is_braced(span)) { 877 struct TokenSpan inner; 878 inner.start = span.start + 1; 879 inner.end = span.end - 1; 880 if (inner.start == inner.end) { 881 return 1; 882 } 883 return copy_span_to_pool(inner); 884 } 885 return copy_span_to_pool(span); 886 } 887 888 static int copy_paste_arg_to_pool(struct TokenSpan span) 889 { 890 if (arg_is_braced(span)) { 891 return fail("bad macro argument"); 892 } 893 if (span.end - span.start != 1) { 894 return fail("bad macro argument"); 895 } 896 return copy_span_to_pool(span); 897 } 898 899 static int append_pasted_token(struct Token *dst, 900 const struct Token *left, 901 const struct Token *right) 902 { 903 char tmp[512]; 904 char *text_ptr; 905 int n; 906 907 n = snprintf(tmp, sizeof(tmp), "%.*s%.*s", 908 left->text.len, left->text.ptr, 909 right->text.len, right->text.ptr); 910 if (n < 0 || n >= (int)sizeof(tmp)) { 911 return fail("bad paste"); 912 } 913 text_ptr = append_text_len(tmp, n); 914 if (text_ptr == NULL) { 915 return 0; 916 } 917 dst->kind = TOK_WORD; 918 dst->text.ptr = text_ptr; 919 dst->text.len = n; 920 return 1; 921 } 922 923 static int paste_pool_range(int mark) 924 { 925 struct Token *start = expand_pool + mark; 926 struct Token *in = start; 927 struct Token *out = start; 928 struct Token *end = expand_pool + pool_used; 929 930 while (in < end) { 931 if (in->kind == TOK_PASTE) { 932 if (out == start || in + 1 >= end) { 933 pool_used = mark; 934 return fail("bad paste"); 935 } 936 if ((out - 1)->kind == TOK_NEWLINE || 937 (out - 1)->kind == TOK_PASTE || 938 (in + 1)->kind == TOK_NEWLINE || 939 (in + 1)->kind == TOK_PASTE) { 940 pool_used = mark; 941 return fail("bad paste"); 942 } 943 if (!append_pasted_token(out - 1, out - 1, in + 1)) { 944 pool_used = mark; 945 return 0; 946 } 947 in += 2; 948 continue; 949 } 950 if (out != in) { 951 *out = *in; 952 } 953 out++; 954 in++; 955 } 956 957 pool_used = (int)(out - expand_pool); 958 return 1; 959 } 960 961 static int is_local_label_token(const struct Token *tok) 962 { 963 if (tok->kind != TOK_WORD || tok->text.len < 3) { 964 return 0; 965 } 966 if (tok->text.ptr[0] != ':' && tok->text.ptr[0] != '&') { 967 return 0; 968 } 969 if (tok->text.ptr[1] != '@') { 970 return 0; 971 } 972 return 1; 973 } 974 975 static int push_local_label_token(const struct Token *tok, int expansion_id) 976 { 977 /* Rewrite ":@name" -> ":name__NN", "&@name" -> "&name__NN". 978 * Build the text directly in text_buf so the resulting span is stable. */ 979 char digits[16]; 980 int digit_count = 0; 981 int unsigned_id; 982 int start; 983 int total; 984 int i; 985 struct Token out; 986 987 unsigned_id = expansion_id; 988 if (unsigned_id == 0) { 989 digits[digit_count++] = '0'; 990 } else { 991 while (unsigned_id > 0) { 992 digits[digit_count++] = (char)('0' + (unsigned_id % 10)); 993 unsigned_id /= 10; 994 } 995 } 996 997 /* Reserve: sigil(1) + tail(len-2) + "__"(2) + digits + NUL. */ 998 total = 1 + (tok->text.len - 2) + 2 + digit_count; 999 if (text_used + total + 1 > MAX_TEXT) { 1000 return fail("text overflow"); 1001 } 1002 start = text_used; 1003 text_buf[text_used++] = tok->text.ptr[0]; 1004 memcpy(text_buf + text_used, tok->text.ptr + 2, (size_t)(tok->text.len - 2)); 1005 text_used += tok->text.len - 2; 1006 text_buf[text_used++] = '_'; 1007 text_buf[text_used++] = '_'; 1008 for (i = digit_count - 1; i >= 0; i--) { 1009 text_buf[text_used++] = digits[i]; 1010 } 1011 text_buf[text_used++] = '\0'; 1012 1013 out.kind = TOK_WORD; 1014 out.text.ptr = text_buf + start; 1015 out.text.len = total; 1016 return push_pool_token(out); 1017 } 1018 1019 static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, 1020 const struct Macro *m, struct Token **after_out, 1021 int *mark_out) 1022 { 1023 struct Token *body_tok; 1024 struct Token *end_pos; 1025 int mark; 1026 int expansion_id; 1027 1028 if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN) { 1029 if (!parse_args(call_tok + 1, limit)) { 1030 return 0; 1031 } 1032 if (arg_count != m->param_count) { 1033 return fail("wrong arg count"); 1034 } 1035 end_pos = call_end_pos; 1036 } else if (m->param_count == 0) { 1037 arg_count = 0; 1038 end_pos = call_tok + 1; 1039 } else { 1040 return fail("bad macro call"); 1041 } 1042 1043 expansion_id = ++next_expansion_id; 1044 mark = pool_used; 1045 for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) { 1046 int param_idx = find_param(m, body_tok); 1047 int pasted = 0; 1048 int ok; 1049 1050 if (param_idx != 0) { 1051 struct TokenSpan arg = {arg_starts[param_idx - 1], arg_ends[param_idx - 1]}; 1052 pasted = (body_tok > m->body_start && (body_tok - 1)->kind == TOK_PASTE) || 1053 (body_tok + 1 < m->body_end && (body_tok + 1)->kind == TOK_PASTE); 1054 ok = pasted ? copy_paste_arg_to_pool(arg) : copy_arg_tokens_to_pool(arg); 1055 if (!ok) { 1056 pool_used = mark; 1057 return 0; 1058 } 1059 continue; 1060 } 1061 if (is_local_label_token(body_tok)) { 1062 if (!push_local_label_token(body_tok, expansion_id)) { 1063 pool_used = mark; 1064 return 0; 1065 } 1066 continue; 1067 } 1068 if (!push_pool_token(*body_tok)) { 1069 pool_used = mark; 1070 return 0; 1071 } 1072 } 1073 1074 if (!paste_pool_range(mark)) { 1075 return 0; 1076 } 1077 *after_out = end_pos; 1078 *mark_out = mark; 1079 return 1; 1080 } 1081 1082 static int parse_int_token(const struct Token *tok, long long *out) 1083 { 1084 char tmp[128]; 1085 char *end; 1086 unsigned long long uv; 1087 long long sv; 1088 1089 if (tok->kind != TOK_WORD || tok->text.len <= 0 || tok->text.len >= (int)sizeof(tmp)) { 1090 return fail("bad integer"); 1091 } 1092 memcpy(tmp, tok->text.ptr, (size_t)tok->text.len); 1093 tmp[tok->text.len] = '\0'; 1094 1095 errno = 0; 1096 if (tmp[0] == '-') { 1097 sv = strtoll(tmp, &end, 0); 1098 if (errno != 0 || *end != '\0') { 1099 return fail("bad integer"); 1100 } 1101 *out = sv; 1102 return 1; 1103 } 1104 1105 uv = strtoull(tmp, &end, 0); 1106 if (errno != 0 || *end != '\0') { 1107 return fail("bad integer"); 1108 } 1109 *out = (long long)uv; 1110 return 1; 1111 } 1112 1113 static enum ExprOp expr_op_code(const struct Token *tok) 1114 { 1115 if (tok->kind != TOK_WORD) { 1116 return EXPR_INVALID; 1117 } 1118 if (token_text_eq(tok, "+")) { 1119 return EXPR_ADD; 1120 } 1121 if (token_text_eq(tok, "-")) { 1122 return EXPR_SUB; 1123 } 1124 if (token_text_eq(tok, "*")) { 1125 return EXPR_MUL; 1126 } 1127 if (token_text_eq(tok, "/")) { 1128 return EXPR_DIV; 1129 } 1130 if (token_text_eq(tok, "%")) { 1131 return EXPR_MOD; 1132 } 1133 if (token_text_eq(tok, "<<")) { 1134 return EXPR_SHL; 1135 } 1136 if (token_text_eq(tok, ">>")) { 1137 return EXPR_SHR; 1138 } 1139 if (token_text_eq(tok, "&")) { 1140 return EXPR_AND; 1141 } 1142 if (token_text_eq(tok, "|")) { 1143 return EXPR_OR; 1144 } 1145 if (token_text_eq(tok, "^")) { 1146 return EXPR_XOR; 1147 } 1148 if (token_text_eq(tok, "~")) { 1149 return EXPR_NOT; 1150 } 1151 if (token_text_eq(tok, "=")) { 1152 return EXPR_EQ; 1153 } 1154 if (token_text_eq(tok, "!=")) { 1155 return EXPR_NE; 1156 } 1157 if (token_text_eq(tok, "<")) { 1158 return EXPR_LT; 1159 } 1160 if (token_text_eq(tok, "<=")) { 1161 return EXPR_LE; 1162 } 1163 if (token_text_eq(tok, ">")) { 1164 return EXPR_GT; 1165 } 1166 if (token_text_eq(tok, ">=")) { 1167 return EXPR_GE; 1168 } 1169 if (token_text_eq(tok, "strlen")) { 1170 return EXPR_STRLEN; 1171 } 1172 return EXPR_INVALID; 1173 } 1174 1175 static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long long *out) 1176 { 1177 int i; 1178 1179 switch (op) { 1180 case EXPR_ADD: 1181 if (argc < 1) { 1182 return fail("bad expression"); 1183 } 1184 *out = args[0]; 1185 for (i = 1; i < argc; i++) { 1186 *out += args[i]; 1187 } 1188 return 1; 1189 case EXPR_SUB: 1190 if (argc < 1) { 1191 return fail("bad expression"); 1192 } 1193 *out = (argc == 1) ? -args[0] : args[0]; 1194 for (i = 1; i < argc; i++) { 1195 *out -= args[i]; 1196 } 1197 return 1; 1198 case EXPR_MUL: 1199 if (argc < 1) { 1200 return fail("bad expression"); 1201 } 1202 *out = args[0]; 1203 for (i = 1; i < argc; i++) { 1204 *out *= args[i]; 1205 } 1206 return 1; 1207 case EXPR_DIV: 1208 if (argc != 2 || args[1] == 0) { 1209 return fail("bad expression"); 1210 } 1211 *out = args[0] / args[1]; 1212 return 1; 1213 case EXPR_MOD: 1214 if (argc != 2 || args[1] == 0) { 1215 return fail("bad expression"); 1216 } 1217 *out = args[0] % args[1]; 1218 return 1; 1219 case EXPR_SHL: 1220 if (argc != 2) { 1221 return fail("bad expression"); 1222 } 1223 *out = (long long)((unsigned long long)args[0] << args[1]); 1224 return 1; 1225 case EXPR_SHR: 1226 if (argc != 2) { 1227 return fail("bad expression"); 1228 } 1229 *out = args[0] >> args[1]; 1230 return 1; 1231 case EXPR_AND: 1232 if (argc < 1) { 1233 return fail("bad expression"); 1234 } 1235 *out = args[0]; 1236 for (i = 1; i < argc; i++) { 1237 *out &= args[i]; 1238 } 1239 return 1; 1240 case EXPR_OR: 1241 if (argc < 1) { 1242 return fail("bad expression"); 1243 } 1244 *out = args[0]; 1245 for (i = 1; i < argc; i++) { 1246 *out |= args[i]; 1247 } 1248 return 1; 1249 case EXPR_XOR: 1250 if (argc < 1) { 1251 return fail("bad expression"); 1252 } 1253 *out = args[0]; 1254 for (i = 1; i < argc; i++) { 1255 *out ^= args[i]; 1256 } 1257 return 1; 1258 case EXPR_NOT: 1259 if (argc != 1) { 1260 return fail("bad expression"); 1261 } 1262 *out = ~args[0]; 1263 return 1; 1264 case EXPR_EQ: 1265 if (argc != 2) { 1266 return fail("bad expression"); 1267 } 1268 *out = (args[0] == args[1]); 1269 return 1; 1270 case EXPR_NE: 1271 if (argc != 2) { 1272 return fail("bad expression"); 1273 } 1274 *out = (args[0] != args[1]); 1275 return 1; 1276 case EXPR_LT: 1277 if (argc != 2) { 1278 return fail("bad expression"); 1279 } 1280 *out = (args[0] < args[1]); 1281 return 1; 1282 case EXPR_LE: 1283 if (argc != 2) { 1284 return fail("bad expression"); 1285 } 1286 *out = (args[0] <= args[1]); 1287 return 1; 1288 case EXPR_GT: 1289 if (argc != 2) { 1290 return fail("bad expression"); 1291 } 1292 *out = (args[0] > args[1]); 1293 return 1; 1294 case EXPR_GE: 1295 if (argc != 2) { 1296 return fail("bad expression"); 1297 } 1298 *out = (args[0] >= args[1]); 1299 return 1; 1300 case EXPR_STRLEN: 1301 case EXPR_INVALID: 1302 break; 1303 } 1304 1305 return fail("bad expression"); 1306 } 1307 1308 static int eval_expr_range(struct TokenSpan span, long long *out); 1309 1310 static int eval_expr_atom(struct Token *tok, struct Token *limit, 1311 struct Token **after_out, long long *out) 1312 { 1313 const struct Macro *macro; 1314 struct Token *after; 1315 int mark; 1316 1317 macro = find_macro(tok); 1318 if (macro != NULL && 1319 ((tok + 1 < limit && (tok + 1)->kind == TOK_LPAREN) || 1320 macro->param_count == 0)) { 1321 if (!expand_macro_tokens(tok, limit, macro, &after, &mark)) { 1322 return 0; 1323 } 1324 if (pool_used == mark) { 1325 pool_used = mark; 1326 return fail("bad expression"); 1327 } 1328 if (!eval_expr_range((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, out)) { 1329 pool_used = mark; 1330 return 0; 1331 } 1332 pool_used = mark; 1333 *after_out = after; 1334 return 1; 1335 } 1336 1337 if (!parse_int_token(tok, out)) { 1338 return 0; 1339 } 1340 *after_out = tok + 1; 1341 return 1; 1342 } 1343 1344 static int eval_expr_range(struct TokenSpan span, long long *out) 1345 { 1346 struct ExprFrame frames[MAX_EXPR_FRAMES]; 1347 int frame_top = 0; 1348 struct Token *pos = span.start; 1349 long long value = 0; 1350 long long result = 0; 1351 int have_value = 0; 1352 int have_result = 0; 1353 1354 for (;;) { 1355 if (have_value) { 1356 if (frame_top > 0) { 1357 struct ExprFrame *frame = &frames[frame_top - 1]; 1358 1359 if (frame->argc >= MAX_PARAMS) { 1360 return fail("bad expression"); 1361 } 1362 frame->args[frame->argc++] = value; 1363 have_value = 0; 1364 continue; 1365 } 1366 if (have_result) { 1367 return fail("bad expression"); 1368 } 1369 result = value; 1370 have_result = 1; 1371 have_value = 0; 1372 continue; 1373 } 1374 1375 skip_expr_newlines(&pos, span.end); 1376 if (pos >= span.end) { 1377 break; 1378 } 1379 1380 if (pos->kind == TOK_LPAREN) { 1381 enum ExprOp op; 1382 1383 pos++; 1384 skip_expr_newlines(&pos, span.end); 1385 if (pos >= span.end) { 1386 return fail("bad expression"); 1387 } 1388 op = expr_op_code(pos); 1389 if (op == EXPR_INVALID) { 1390 return fail("bad expression"); 1391 } 1392 pos++; 1393 if (op == EXPR_STRLEN) { 1394 /* strlen is degenerate: argument is a TOK_STRING atom, 1395 * not a recursive expression. Handle inline and yield 1396 * the string's raw byte count (span.len - 2). */ 1397 skip_expr_newlines(&pos, span.end); 1398 if (pos >= span.end || pos->kind != TOK_STRING) { 1399 return fail("bad expression"); 1400 } 1401 if (pos->text.len < 2 || pos->text.ptr[0] != '"') { 1402 return fail("bad expression"); 1403 } 1404 value = (long long)(pos->text.len - 2); 1405 pos++; 1406 skip_expr_newlines(&pos, span.end); 1407 if (pos >= span.end || pos->kind != TOK_RPAREN) { 1408 return fail("bad expression"); 1409 } 1410 pos++; 1411 have_value = 1; 1412 continue; 1413 } 1414 if (frame_top >= MAX_EXPR_FRAMES) { 1415 return fail("expression overflow"); 1416 } 1417 frames[frame_top].op = op; 1418 frames[frame_top].argc = 0; 1419 frame_top++; 1420 continue; 1421 } 1422 1423 if (pos->kind == TOK_RPAREN) { 1424 if (frame_top <= 0) { 1425 return fail("bad expression"); 1426 } 1427 if (!apply_expr_op(frames[frame_top - 1].op, 1428 frames[frame_top - 1].args, 1429 frames[frame_top - 1].argc, 1430 &value)) { 1431 return 0; 1432 } 1433 frame_top--; 1434 pos++; 1435 have_value = 1; 1436 continue; 1437 } 1438 1439 if (!eval_expr_atom(pos, span.end, &pos, &value)) { 1440 return 0; 1441 } 1442 have_value = 1; 1443 } 1444 1445 if (frame_top != 0 || !have_result) { 1446 return fail("bad expression"); 1447 } 1448 if (pos != span.end) { 1449 return fail("bad expression"); 1450 } 1451 1452 *out = result; 1453 return 1; 1454 } 1455 1456 static int emit_hex_value(unsigned long long value, int bytes) 1457 { 1458 /* Wrap the hex digits in single quotes so M0 sees a STRING-literal 1459 * hex token, not a numeric token (which it would parse as decimal). */ 1460 char tmp[19]; 1461 static const char hex[] = "0123456789ABCDEF"; 1462 struct Token tok; 1463 int i; 1464 char *text_ptr; 1465 int total_len = 2 + 2 * bytes; 1466 1467 tmp[0] = '\''; 1468 for (i = 0; i < bytes; i++) { 1469 unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF); 1470 tmp[1 + 2 * i] = hex[b >> 4]; 1471 tmp[1 + 2 * i + 1] = hex[b & 0x0F]; 1472 } 1473 tmp[1 + 2 * bytes] = '\''; 1474 tmp[total_len] = '\0'; 1475 1476 text_ptr = append_text_len(tmp, total_len); 1477 if (text_ptr == NULL) { 1478 return 0; 1479 } 1480 tok.kind = TOK_STRING; 1481 tok.text.ptr = text_ptr; 1482 tok.text.len = total_len; 1483 return emit_token(&tok); 1484 } 1485 1486 static int expand_builtin_call(struct Stream *s, const struct Token *tok) 1487 { 1488 long long value; 1489 1490 if (tok + 1 >= s->end || (tok + 1)->kind != TOK_LPAREN) { 1491 return fail("bad builtin"); 1492 } 1493 if (!parse_args((struct Token *)tok + 1, s->end)) { 1494 return 0; 1495 } 1496 1497 if (token_text_eq(tok, "!") || token_text_eq(tok, "@") || 1498 token_text_eq(tok, "%") || token_text_eq(tok, "$")) { 1499 struct TokenSpan arg; 1500 struct Token *end_pos; 1501 int bytes; 1502 1503 if (arg_count != 1) { 1504 return fail("bad builtin"); 1505 } 1506 arg.start = arg_starts[0]; 1507 arg.end = arg_ends[0]; 1508 end_pos = call_end_pos; 1509 if (!eval_expr_range(arg, &value)) { 1510 return 0; 1511 } 1512 s->pos = end_pos; 1513 s->line_start = 0; 1514 bytes = token_text_eq(tok, "!") ? 1 : 1515 token_text_eq(tok, "@") ? 2 : 1516 token_text_eq(tok, "%") ? 4 : 8; 1517 return emit_hex_value((unsigned long long)value, bytes); 1518 } 1519 1520 if (token_text_eq(tok, "%select")) { 1521 struct TokenSpan cond_arg, then_arg, else_arg, chosen; 1522 struct Token *end_pos; 1523 int mark; 1524 1525 if (arg_count != 3) { 1526 return fail("bad builtin"); 1527 } 1528 cond_arg.start = arg_starts[0]; cond_arg.end = arg_ends[0]; 1529 then_arg.start = arg_starts[1]; then_arg.end = arg_ends[1]; 1530 else_arg.start = arg_starts[2]; else_arg.end = arg_ends[2]; 1531 end_pos = call_end_pos; 1532 if (!eval_expr_range(cond_arg, &value)) { 1533 return 0; 1534 } 1535 chosen = (value != 0) ? then_arg : else_arg; 1536 s->pos = end_pos; 1537 s->line_start = 0; 1538 if (chosen.start == chosen.end) { 1539 return 1; 1540 } 1541 mark = pool_used; 1542 if (!copy_span_to_pool(chosen)) { 1543 pool_used = mark; 1544 return 0; 1545 } 1546 return push_pool_stream_from_mark(mark); 1547 } 1548 1549 if (token_text_eq(tok, "%str")) { 1550 struct Token *arg_tok; 1551 struct Token *end_pos; 1552 struct Token out_tok; 1553 char *text_ptr; 1554 int orig_len; 1555 int out_len; 1556 1557 if (arg_count != 1) { 1558 return fail("bad builtin"); 1559 } 1560 if (arg_ends[0] - arg_starts[0] != 1) { 1561 return fail("bad builtin"); 1562 } 1563 arg_tok = arg_starts[0]; 1564 if (arg_tok->kind != TOK_WORD) { 1565 return fail("bad builtin"); 1566 } 1567 end_pos = call_end_pos; 1568 1569 orig_len = arg_tok->text.len; 1570 out_len = orig_len + 2; 1571 if (text_used + out_len + 1 > MAX_TEXT) { 1572 return fail("text overflow"); 1573 } 1574 text_ptr = text_buf + text_used; 1575 text_buf[text_used++] = '"'; 1576 memcpy(text_buf + text_used, arg_tok->text.ptr, (size_t)orig_len); 1577 text_used += orig_len; 1578 text_buf[text_used++] = '"'; 1579 text_buf[text_used++] = '\0'; 1580 1581 out_tok.kind = TOK_STRING; 1582 out_tok.text.ptr = text_ptr; 1583 out_tok.text.len = out_len; 1584 s->pos = end_pos; 1585 s->line_start = 0; 1586 return emit_token(&out_tok); 1587 } 1588 1589 return fail("bad builtin"); 1590 } 1591 1592 static int expand_call(struct Stream *s, const struct Macro *macro) 1593 { 1594 struct Token *after; 1595 int mark; 1596 1597 if (!expand_macro_tokens(s->pos, s->end, macro, &after, &mark)) { 1598 return 0; 1599 } 1600 s->pos = after; 1601 s->line_start = 0; 1602 return push_pool_stream_from_mark(mark); 1603 } 1604 1605 static int push_scope(struct Stream *s) 1606 { 1607 s->pos++; 1608 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 1609 return fail("bad scope header"); 1610 } 1611 if (scope_depth >= MAX_SCOPE_DEPTH) { 1612 return fail("scope depth overflow"); 1613 } 1614 scope_stack[scope_depth++] = s->pos->text; 1615 s->pos++; 1616 if (s->pos < s->end && s->pos->kind != TOK_NEWLINE) { 1617 return fail("bad scope header"); 1618 } 1619 if (s->pos < s->end) { 1620 s->pos++; 1621 } 1622 s->line_start = 1; 1623 return 1; 1624 } 1625 1626 static int pop_scope(struct Stream *s) 1627 { 1628 s->pos++; 1629 if (scope_depth <= 0) { 1630 return fail("scope underflow"); 1631 } 1632 scope_depth--; 1633 while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) { 1634 s->pos++; 1635 } 1636 if (s->pos < s->end) { 1637 s->pos++; 1638 } 1639 s->line_start = 1; 1640 return 1; 1641 } 1642 1643 static int process_tokens(void) 1644 { 1645 if (!push_stream_span((struct TokenSpan){source_tokens, source_tokens + source_count}, -1)) { 1646 return 0; 1647 } 1648 1649 for (;;) { 1650 struct Stream *s; 1651 struct Token *tok; 1652 const struct Macro *macro; 1653 1654 s = current_stream(); 1655 if (s == NULL) { 1656 break; 1657 } 1658 if (s->pos >= s->end) { 1659 pop_stream(); 1660 continue; 1661 } 1662 1663 tok = s->pos; 1664 1665 if (s->line_start && 1666 tok->kind == TOK_WORD && 1667 token_text_eq(tok, "%macro")) { 1668 if (!define_macro(s)) { 1669 return 0; 1670 } 1671 continue; 1672 } 1673 1674 if (s->line_start && 1675 tok->kind == TOK_WORD && 1676 token_text_eq(tok, "%struct")) { 1677 if (!define_fielded(s, 8, "SIZE", 4)) { 1678 return 0; 1679 } 1680 continue; 1681 } 1682 1683 if (s->line_start && 1684 tok->kind == TOK_WORD && 1685 token_text_eq(tok, "%enum")) { 1686 if (!define_fielded(s, 1, "COUNT", 5)) { 1687 return 0; 1688 } 1689 continue; 1690 } 1691 1692 if (s->line_start && 1693 tok->kind == TOK_WORD && 1694 token_text_eq(tok, "%scope")) { 1695 if (!push_scope(s)) { 1696 return 0; 1697 } 1698 continue; 1699 } 1700 1701 if (s->line_start && 1702 tok->kind == TOK_WORD && 1703 token_text_eq(tok, "%endscope")) { 1704 if (!pop_scope(s)) { 1705 return 0; 1706 } 1707 continue; 1708 } 1709 1710 if (tok->kind == TOK_NEWLINE) { 1711 s->pos++; 1712 s->line_start = 1; 1713 if (!emit_newline()) { 1714 return 0; 1715 } 1716 continue; 1717 } 1718 1719 if (tok->kind == TOK_WORD && 1720 tok + 1 < s->end && 1721 (tok + 1)->kind == TOK_LPAREN && 1722 (token_text_eq(tok, "!") || 1723 token_text_eq(tok, "@") || 1724 token_text_eq(tok, "%") || 1725 token_text_eq(tok, "$") || 1726 token_text_eq(tok, "%select") || 1727 token_text_eq(tok, "%str"))) { 1728 if (!expand_builtin_call(s, tok)) { 1729 return 0; 1730 } 1731 continue; 1732 } 1733 1734 macro = find_macro(tok); 1735 if (macro != NULL && 1736 ((tok + 1 < s->end && (tok + 1)->kind == TOK_LPAREN) || 1737 macro->param_count == 0)) { 1738 if (!expand_call(s, macro)) { 1739 return 0; 1740 } 1741 continue; 1742 } 1743 1744 s->pos++; 1745 s->line_start = 0; 1746 if (!emit_token(tok)) { 1747 return 0; 1748 } 1749 } 1750 1751 if (scope_depth != 0) { 1752 return fail("scope not closed"); 1753 } 1754 1755 if (output_used >= MAX_OUTPUT) { 1756 return fail("output overflow"); 1757 } 1758 output_buf[output_used] = '\0'; 1759 return 1; 1760 } 1761 1762 int main(int argc, char **argv) 1763 { 1764 FILE *in; 1765 FILE *out; 1766 size_t nread; 1767 1768 if (argc != 3) { 1769 fprintf(stderr, "usage: %s input.M1 output.M1\n", argv[0]); 1770 return 1; 1771 } 1772 1773 in = fopen(argv[1], "rb"); 1774 if (in == NULL) { 1775 perror(argv[1]); 1776 return 1; 1777 } 1778 nread = fread(input_buf, 1, MAX_INPUT, in); 1779 if (ferror(in)) { 1780 perror(argv[1]); 1781 fclose(in); 1782 return 1; 1783 } 1784 fclose(in); 1785 if (nread >= MAX_INPUT) { 1786 fprintf(stderr, "input too large\n"); 1787 return 1; 1788 } 1789 input_buf[nread] = '\0'; 1790 1791 if (!lex_source(input_buf) || !process_tokens()) { 1792 fprintf(stderr, "m1macro: %s\n", error_msg != NULL ? error_msg : "failed"); 1793 return 1; 1794 } 1795 1796 out = fopen(argv[2], "wb"); 1797 if (out == NULL) { 1798 perror(argv[2]); 1799 return 1; 1800 } 1801 if (fwrite(output_buf, 1, (size_t)output_used, out) != (size_t)output_used) { 1802 perror(argv[2]); 1803 fclose(out); 1804 return 1; 1805 } 1806 fclose(out); 1807 return 0; 1808 }