M1pp.c (62117B)
1 /* 2 * Tiny single-pass M1pp macro expander. Output is consumed directly by 3 * hex2pp -- there is no intermediate M0/hex2 stage. All emission is in 4 * the byte/label/directive vocabulary hex2pp accepts. 5 * 6 * Syntax: 7 * %macro NAME(a, b) 8 * ... body ... 9 * %endm 10 * 11 * %struct NAME { f1 f2 ... } fixed-layout 8-byte-field aggregate 12 * %enum NAME { l1 l2 ... } incrementing integer constants 13 * 14 * %NAME(x, y) function-like macro call 15 * ## token pasting inside macro bodies 16 * !(expr) evaluate an integer S-expression, emit LE 8-bit hex 17 * @(expr) evaluate an integer S-expression, emit LE 16-bit hex 18 * %(expr) evaluate an integer S-expression, emit LE 32-bit hex 19 * $(expr) evaluate an integer S-expression, emit LE 64-bit hex 20 * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e 21 * %str(IDENT) stringify a single WORD token into a "..."-quoted literal 22 * %bytes(STR) emit the raw bytes of STR as contiguous hex digits 23 * 24 * %frame NAME / %endframe set/clear a single-slot "current frame" 25 * %local(NAME) expand to the body of <frame>_FRAME.<NAME> 26 * 27 * Lexical scoping for control-flow labels is delegated to hex2pp's 28 * `.scope` / `.endscope` (which nest). M1pp itself only handles 29 * per-expansion macro hygiene labels (`:@name` / `&@name`). 30 * 31 * Expression syntax is intentionally Lisp-shaped: 32 * atoms: decimal or 0x-prefixed integer literals 33 * calls: (+ a b), (- a b), (* a b), (/ a b), (% a b), (<< a b), (>> a b) 34 * (& a b), (| a b), (^ a b), (~ a), (= a b), (!= a b), 35 * (< a b), (<= a b), (> a b), (>= a b) 36 * 37 * Flow: 38 * 1. lex_source(): scan input_buf into source_tokens[]. Tokens are words, 39 * strings, newlines, parens, commas, and ## paste markers. Whitespace 40 * (excluding newlines) is dropped; # and ; comments are dropped. 41 * 42 * 2. process_tokens(): main loop driven by a stream stack (streams[]). 43 * The source token array is pushed as the initial stream. Each iteration 44 * pops a token from the top stream: 45 * 46 * %macro NAME(p,...) / %endm 47 * -> define_macro(): consume header + body tokens into macros[] and 48 * macro_body_tokens[]; register name and param list. Header is 49 * whitespace-insensitive (newlines inside (...) are skipped); 50 * %endm is recognized anywhere and must be followed by NEWLINE. 51 * A directive that started at line_start consumes its trailing 52 * newline; mid-line directives leave it for the main loop. 53 * 54 * !(e) / @(e) / %(e) / $(e) / %select(c,t,e) 55 * -> expand_builtin_call(): parse arg spans, eval S-expression(s) via 56 * eval_expr_range(), emit LE hex or push the chosen token span. 57 * Only fuses when ( is tight against the name (no whitespace). 58 * 59 * %NAME(...) matching a defined macro 60 * -> expand_call() -> expand_macro_tokens(): substitute arguments, 61 * apply ## paste via paste_pool_range(), write result into 62 * expand_pool[], then push that slice as a new stream (rescan). 63 * Tight ( required for paren-form; otherwise treated as 0-arg. 64 * 65 * Anything else 66 * -> emit_token() / emit_newline() directly into output_buf. 67 * 68 * When a stream is exhausted it is popped; pool_used is rewound to the 69 * stream's pool_mark, reclaiming the expand_pool space it used. 70 * 71 * 3. Write output_buf to the output file. 72 * 73 * Notes: 74 * - Macros are define-before-use. There is no prescan. 75 * - Expansion rescans by pushing expanded tokens back through the same loop. 76 * - There is no cycle detection. Recursive macros will loop until a limit. 77 * - Only recognized %NAME(...) calls expand. Other text passes through. 78 * - Output formatting is normalized to tokens plus '\n', not preserved. 79 */ 80 81 #include <errno.h> 82 #include <stdio.h> 83 #include <stdlib.h> 84 #include <string.h> 85 86 /* Caps chosen to mirror the M1pp.P1 BSS layout, sized so the cc.scm 87 * emission of tcc.flat.c (~6.5 MB of macro-rich .P1pp) lexes cleanly. 88 * The native binary is host-side, so static globals at these sizes 89 * just live in .bss / anonymous mmap without any of the ELF-segment 90 * sizing dance the bootstrap m1pp has to do. */ 91 #define MAX_INPUT 16777216 /* 16 MiB */ 92 #define MAX_OUTPUT 134217728 /* 128 MiB */ 93 #define MAX_TEXT 67108864 /* 64 MiB: 94 * paste tokens, hex literals from 95 * %(EXPR) evaluation, and per-call 96 * @local label rewrites all live 97 * here for the run's lifetime. cc.scm 98 * triggers hundreds of thousands of 99 * each across the tcc.c expansion. */ 100 #define MAX_TOKENS 8388608 /* 8 M slots × 32 B = 256 MiB */ 101 #define MAX_MACROS 1024 102 #define MAX_PARAMS 16 103 #define MAX_MACRO_BODY_TOKENS MAX_TOKENS 104 #define MAX_EXPAND 524288 /* 512 K × 32 B = 16 MiB: 105 * cc.scm wraps each C function in 106 * %fn(... { body }), and m1pp's 107 * expand_macro_tokens copies the 108 * argument tokens into the pool — 109 * so the entire body of a long 110 * function is resident in the pool 111 * while its outer %fn is active. 112 * tcc.c's next_nomacro1 (~5900 113 * lines × ~13 m1pp tokens/line ≈ 114 * 77 K tokens, ~2.5 MiB) plus 115 * inner expansions sit comfortably 116 * under 16 MiB. */ 117 #define MAX_STACK 64 118 #define MAX_EXPR_FRAMES 256 119 120 enum { 121 TOK_WORD, 122 TOK_STRING, 123 TOK_NEWLINE, 124 TOK_LPAREN, 125 TOK_RPAREN, 126 TOK_COMMA, 127 TOK_PASTE, 128 TOK_LBRACE, 129 TOK_RBRACE 130 }; 131 132 enum ExprOp { 133 EXPR_ADD, 134 EXPR_SUB, 135 EXPR_MUL, 136 EXPR_DIV, 137 EXPR_MOD, 138 EXPR_SHL, 139 EXPR_SHR, 140 EXPR_AND, 141 EXPR_OR, 142 EXPR_XOR, 143 EXPR_NOT, 144 EXPR_EQ, 145 EXPR_NE, 146 EXPR_LT, 147 EXPR_LE, 148 EXPR_GT, 149 EXPR_GE, 150 EXPR_STRLEN, 151 EXPR_INVALID 152 }; 153 154 struct TextSpan { 155 const char *ptr; 156 int len; 157 }; 158 159 struct Token { 160 int kind; 161 int tight; 162 int line; 163 struct TextSpan text; 164 }; 165 166 struct TokenSpan { 167 struct Token *start; 168 struct Token *end; 169 }; 170 171 struct Macro { 172 struct TextSpan name; 173 int param_count; 174 int has_paste; 175 struct TextSpan params[MAX_PARAMS]; 176 struct Token *body_start; 177 struct Token *body_end; 178 }; 179 180 struct Stream { 181 struct Token *start; 182 struct Token *end; 183 struct Token *pos; 184 int line_start; 185 int pool_mark; 186 }; 187 188 struct ExprFrame { 189 enum ExprOp op; 190 long long args[MAX_PARAMS]; 191 int argc; 192 }; 193 194 static char input_buf[MAX_INPUT + 1]; 195 static char output_buf[MAX_OUTPUT + 1]; 196 static char text_buf[MAX_TEXT]; 197 198 static struct Token source_tokens[MAX_TOKENS]; 199 static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS]; 200 /* Per-body-token classification cached at %macro definition time, so 201 * expand_macro_tokens never re-runs find_param / is_local_label_token in 202 * its hot loop. param_idx: 0 = not a param, k = params[k-1]. */ 203 static unsigned char macro_body_param_idx[MAX_MACRO_BODY_TOKENS]; 204 static unsigned char macro_body_is_local_label[MAX_MACRO_BODY_TOKENS]; 205 static struct Token expand_pool[MAX_EXPAND]; 206 static struct Macro macros[MAX_MACROS]; 207 static struct Stream streams[MAX_STACK]; 208 static struct TextSpan current_frame; 209 static int frame_active; 210 211 static int text_used; 212 static int source_count; 213 static int macro_count; 214 static int macro_body_used; 215 static int pool_used; 216 static int output_used; 217 static int output_need_space; 218 static int stream_top; 219 static int next_expansion_id; 220 static int current_line; 221 static int error_line; 222 static const char *input_path; 223 224 static struct Token *arg_starts[MAX_PARAMS]; 225 static struct Token *arg_ends[MAX_PARAMS]; 226 static int arg_count; 227 static struct Token *call_end_pos; 228 static int args_have_paste; 229 230 static const char *error_msg; 231 232 static int fail(const char *msg) 233 { 234 error_msg = msg; 235 error_line = current_line; 236 return 0; 237 } 238 239 static int is_space_no_nl(int c) 240 { 241 return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'; 242 } 243 244 static char *append_text_len(const char *s, int len) 245 { 246 int start; 247 248 if (text_used + len + 1 > MAX_TEXT) { 249 fail("text overflow"); 250 return NULL; 251 } 252 start = text_used; 253 memcpy(text_buf + text_used, s, (size_t)len); 254 text_used += len; 255 text_buf[text_used++] = '\0'; 256 return text_buf + start; 257 } 258 259 static int push_token(struct Token *buf, int *count, int max_count, 260 int kind, int tight, int line, struct TextSpan text) 261 { 262 if (*count >= max_count) { 263 return fail("token overflow"); 264 } 265 buf[*count].kind = kind; 266 buf[*count].tight = tight; 267 buf[*count].line = line; 268 buf[*count].text = text; 269 *count += 1; 270 return 1; 271 } 272 273 static int push_pool_token(struct Token tok) 274 { 275 if (pool_used >= MAX_EXPAND) { 276 return fail("expansion overflow"); 277 } 278 expand_pool[pool_used++] = tok; 279 return 1; 280 } 281 282 static int token_text_eq(const struct Token *tok, const char *s) 283 { 284 int len = (int)strlen(s); 285 286 return tok->text.len == len && 287 memcmp(tok->text.ptr, s, (size_t)len) == 0; 288 } 289 290 static int span_eq_token(struct TextSpan span, const struct Token *tok) 291 { 292 return span.len == tok->text.len && 293 memcmp(span.ptr, tok->text.ptr, (size_t)span.len) == 0; 294 } 295 296 static int lex_source(const char *src) 297 { 298 /* Track whether whitespace (space, tab, comment, OR newline) precedes 299 * the next token. tight=1 means "no whitespace before me"; only 300 * LPAREN's tight bit is consulted, to decide whether %FOO(...) / 301 * !(...) etc. are paren-call forms. */ 302 int i = 0; 303 int line = 1; 304 int saw_separator = 1; 305 306 while (src[i] != '\0') { 307 int start; 308 int len; 309 int tight; 310 311 current_line = line; 312 313 if (is_space_no_nl((unsigned char)src[i])) { 314 saw_separator = 1; 315 i++; 316 continue; 317 } 318 if (src[i] == '\n') { 319 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 320 TOK_NEWLINE, 0, line, (struct TextSpan){src + i, 1})) { 321 return 0; 322 } 323 line++; 324 saw_separator = 1; 325 i++; 326 continue; 327 } 328 if (src[i] == '"' || src[i] == '\'') { 329 int quote = src[i]; 330 331 tight = !saw_separator; 332 start = i; 333 i++; 334 while (src[i] != '\0' && src[i] != quote) { 335 if (src[i] == '\\' && src[i + 1] != '\0') { 336 /* Skip backslash + next char as a unit so the 337 * close-quote test doesn't fire on `\"`, and so 338 * `\\` doesn't leave the trailing `\` to start a 339 * spurious escape. The escape's *meaning* is 340 * decoded later (e.g. by %bytes); the lexer only 341 * cares about token boundaries. */ 342 if (src[i + 1] == '\n') { 343 line++; 344 } 345 i += 2; 346 continue; 347 } 348 if (src[i] == '\n') { 349 line++; 350 } 351 i++; 352 } 353 if (src[i] == quote) { 354 i++; 355 } 356 len = i - start; 357 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 358 TOK_STRING, tight, current_line, (struct TextSpan){src + start, len})) { 359 return 0; 360 } 361 saw_separator = 0; 362 continue; 363 } 364 if (src[i] == '#' && src[i + 1] == '#') { 365 tight = !saw_separator; 366 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 367 TOK_PASTE, tight, line, (struct TextSpan){src + i, 2})) { 368 return 0; 369 } 370 i += 2; 371 saw_separator = 0; 372 continue; 373 } 374 if (src[i] == '#' || src[i] == ';') { 375 saw_separator = 1; 376 while (src[i] != '\0' && src[i] != '\n') { 377 i++; 378 } 379 continue; 380 } 381 if (src[i] == '(') { 382 tight = !saw_separator; 383 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 384 TOK_LPAREN, tight, line, (struct TextSpan){src + i, 1})) { 385 return 0; 386 } 387 i++; 388 saw_separator = 0; 389 continue; 390 } 391 if (src[i] == ')') { 392 tight = !saw_separator; 393 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 394 TOK_RPAREN, tight, line, (struct TextSpan){src + i, 1})) { 395 return 0; 396 } 397 i++; 398 saw_separator = 0; 399 continue; 400 } 401 if (src[i] == ',') { 402 tight = !saw_separator; 403 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 404 TOK_COMMA, tight, line, (struct TextSpan){src + i, 1})) { 405 return 0; 406 } 407 i++; 408 saw_separator = 0; 409 continue; 410 } 411 if (src[i] == '{') { 412 tight = !saw_separator; 413 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 414 TOK_LBRACE, tight, line, (struct TextSpan){src + i, 1})) { 415 return 0; 416 } 417 i++; 418 saw_separator = 0; 419 continue; 420 } 421 if (src[i] == '}') { 422 tight = !saw_separator; 423 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 424 TOK_RBRACE, tight, line, (struct TextSpan){src + i, 1})) { 425 return 0; 426 } 427 i++; 428 saw_separator = 0; 429 continue; 430 } 431 432 tight = !saw_separator; 433 start = i; 434 while (src[i] != '\0' && 435 !is_space_no_nl((unsigned char)src[i]) && 436 src[i] != '\n' && 437 src[i] != '#' && 438 src[i] != ';' && 439 src[i] != '(' && 440 src[i] != ')' && 441 src[i] != ',' && 442 src[i] != '{' && 443 src[i] != '}' && 444 !(src[i] == '#' && src[i + 1] == '#')) { 445 i++; 446 } 447 len = i - start; 448 if (!push_token(source_tokens, &source_count, MAX_TOKENS, 449 TOK_WORD, tight, line, (struct TextSpan){src + start, len})) { 450 return 0; 451 } 452 saw_separator = 0; 453 } 454 455 return 1; 456 } 457 458 static const struct Macro *find_macro(const struct Token *tok) 459 { 460 int i; 461 462 if (tok->kind != TOK_WORD || tok->text.len < 2) { 463 return NULL; 464 } 465 if (tok->text.ptr[0] != '%') { 466 return NULL; 467 } 468 for (i = 0; i < macro_count; i++) { 469 if (macros[i].name.len == tok->text.len - 1 && 470 memcmp(tok->text.ptr + 1, 471 macros[i].name.ptr, 472 (size_t)macros[i].name.len) == 0) { 473 return ¯os[i]; 474 } 475 } 476 return NULL; 477 } 478 479 static int find_param(const struct Macro *m, const struct Token *tok) 480 { 481 int i; 482 483 if (tok->kind != TOK_WORD) { 484 return 0; 485 } 486 for (i = 0; i < m->param_count; i++) { 487 if (span_eq_token(m->params[i], tok)) { 488 return i + 1; 489 } 490 } 491 return 0; 492 } 493 494 static int emit_newline(void) 495 { 496 if (output_used + 1 >= MAX_OUTPUT) { 497 return fail("output overflow"); 498 } 499 output_buf[output_used++] = '\n'; 500 output_need_space = 0; 501 return 1; 502 } 503 504 static int emit_string_as_bytes(const struct Token *tok); 505 static int emit_hex_value(unsigned long long value, int bytes); 506 static int is_local_label_token(const struct Token *tok); 507 508 static int emit_token(const struct Token *tok) 509 { 510 if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) { 511 return 1; 512 } 513 if (tok->kind == TOK_STRING) { 514 return emit_string_as_bytes(tok); 515 } 516 if (output_need_space) { 517 if (output_used + 1 >= MAX_OUTPUT) { 518 return fail("output overflow"); 519 } 520 output_buf[output_used++] = ' '; 521 } 522 if (output_used + tok->text.len >= MAX_OUTPUT) { 523 return fail("output overflow"); 524 } 525 memcpy(output_buf + output_used, tok->text.ptr, 526 (size_t)tok->text.len); 527 output_used += tok->text.len; 528 output_need_space = 1; 529 return 1; 530 } 531 532 /* Decode a "..." or '...' literal and emit one TOK_WORD per byte 533 * (each token's text is the two hex digits for that byte). Recognised 534 * escapes inside the literal: \n \t \r \0 \\ \" \xNN. No NUL is 535 * appended; user code writes one explicitly if needed. */ 536 static int emit_string_as_bytes(const struct Token *tok) 537 { 538 const char *src; 539 int src_len; 540 int src_i; 541 542 if (tok->text.len < 2) { 543 return fail("bad string"); 544 } 545 src = tok->text.ptr + 1; 546 src_len = tok->text.len - 2; 547 src_i = 0; 548 while (src_i < src_len) { 549 unsigned int b; 550 char c = src[src_i++]; 551 if (c == '\\') { 552 char e; 553 if (src_i >= src_len) { 554 return fail("bad escape"); 555 } 556 e = src[src_i++]; 557 if (e == 'n') b = 0x0A; 558 else if (e == 't') b = 0x09; 559 else if (e == 'r') b = 0x0D; 560 else if (e == '0') b = 0x00; 561 else if (e == '\\') b = 0x5C; 562 else if (e == '"') b = 0x22; 563 else if (e == 'x') { 564 int hi, lo, hv, lv; 565 if (src_i + 2 > src_len) { 566 return fail("bad escape"); 567 } 568 hi = (unsigned char)src[src_i++]; 569 lo = (unsigned char)src[src_i++]; 570 hv = (hi >= '0' && hi <= '9') ? hi - '0' : 571 (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 : 572 (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : -1; 573 lv = (lo >= '0' && lo <= '9') ? lo - '0' : 574 (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 : 575 (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : -1; 576 if (hv < 0 || lv < 0) { 577 return fail("bad escape"); 578 } 579 b = (unsigned int)((hv << 4) | lv); 580 } else { 581 return fail("bad escape"); 582 } 583 } else { 584 b = (unsigned char)c; 585 } 586 if (!emit_hex_value((unsigned long long)b, 1)) { 587 return 0; 588 } 589 } 590 return 1; 591 } 592 593 static int push_stream_span(struct TokenSpan span, int pool_mark) 594 { 595 struct Stream *s; 596 597 if (stream_top >= MAX_STACK) { 598 return fail("stream overflow"); 599 } 600 s = &streams[stream_top++]; 601 s->start = span.start; 602 s->end = span.end; 603 s->pos = span.start; 604 s->line_start = 1; 605 s->pool_mark = pool_mark; 606 return 1; 607 } 608 609 static struct Stream *current_stream(void) 610 { 611 if (stream_top <= 0) { 612 return NULL; 613 } 614 return &streams[stream_top - 1]; 615 } 616 617 static void pop_stream(void) 618 { 619 if (stream_top <= 0) { 620 return; 621 } 622 stream_top--; 623 if (streams[stream_top].pool_mark >= 0) { 624 pool_used = streams[stream_top].pool_mark; 625 } 626 } 627 628 static int copy_span_to_pool(struct TokenSpan span) 629 { 630 struct Token *tok; 631 632 for (tok = span.start; tok < span.end; tok++) { 633 if (!push_pool_token(*tok)) { 634 return 0; 635 } 636 } 637 return 1; 638 } 639 640 static int push_pool_stream_from_mark(int mark) 641 { 642 if (pool_used == mark) { 643 pool_used = mark; 644 return 1; 645 } 646 return push_stream_span((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, 647 mark); 648 } 649 650 static void skip_newlines(struct Token **pos, struct Token *end) 651 { 652 while (*pos < end && (*pos)->kind == TOK_NEWLINE) { 653 *pos += 1; 654 } 655 } 656 657 static int emit_decimal_text(long long value, struct TextSpan *out) 658 { 659 /* Render a non-negative integer as decimal into text_buf and 660 * return the span. No snprintf; plain reverse-fill. */ 661 char digits[24]; 662 int digit_count = 0; 663 long long v = value; 664 int start; 665 int i; 666 667 if (v < 0) { 668 return fail("bad directive"); 669 } 670 if (v == 0) { 671 digits[digit_count++] = '0'; 672 } else { 673 while (v > 0) { 674 digits[digit_count++] = (char)('0' + (v % 10)); 675 v /= 10; 676 } 677 } 678 679 if (text_used + digit_count + 1 > MAX_TEXT) { 680 return fail("text overflow"); 681 } 682 start = text_used; 683 for (i = digit_count - 1; i >= 0; i--) { 684 text_buf[text_used++] = digits[i]; 685 } 686 text_buf[text_used++] = '\0'; 687 out->ptr = text_buf + start; 688 out->len = digit_count; 689 return 1; 690 } 691 692 static int emit_dotted_name(struct TextSpan base, const char *suffix, 693 int suffix_len, struct TextSpan *out) 694 { 695 int total = base.len + 1 + suffix_len; 696 int start; 697 698 if (text_used + total + 1 > MAX_TEXT) { 699 return fail("text overflow"); 700 } 701 start = text_used; 702 memcpy(text_buf + text_used, base.ptr, (size_t)base.len); 703 text_used += base.len; 704 text_buf[text_used++] = '.'; 705 memcpy(text_buf + text_used, suffix, (size_t)suffix_len); 706 text_used += suffix_len; 707 text_buf[text_used++] = '\0'; 708 out->ptr = text_buf + start; 709 out->len = total; 710 return 1; 711 } 712 713 static int define_fielded_macro(struct TextSpan base, const char *suffix, 714 int suffix_len, long long value) 715 { 716 struct Macro *m; 717 struct Token body_tok; 718 719 if (macro_count >= MAX_MACROS) { 720 return fail("too many macros"); 721 } 722 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 723 return fail("macro body overflow"); 724 } 725 m = ¯os[macro_count]; 726 memset(m, 0, sizeof(*m)); 727 if (!emit_dotted_name(base, suffix, suffix_len, &m->name)) { 728 return 0; 729 } 730 m->param_count = 0; 731 body_tok.kind = TOK_WORD; 732 body_tok.tight = 0; 733 body_tok.line = current_line; 734 if (!emit_decimal_text(value, &body_tok.text)) { 735 return 0; 736 } 737 m->body_start = macro_body_tokens + macro_body_used; 738 macro_body_param_idx[macro_body_used] = 0; 739 macro_body_is_local_label[macro_body_used] = 0; 740 macro_body_tokens[macro_body_used++] = body_tok; 741 m->body_end = macro_body_tokens + macro_body_used; 742 macro_count++; 743 return 1; 744 } 745 746 static int define_fielded(struct Stream *s, long long stride, 747 const char *total_name, int total_name_len) 748 { 749 /* Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` and 750 * synthesizes N+1 zero-parameter macros: 751 * NAME.field_k -> k * stride 752 * NAME.<total> -> N * stride (SIZE for struct, COUNT for enum) 753 * The closing } must be immediately followed by TOK_NEWLINE. The 754 * newline is consumed iff the directive started at line_start. */ 755 struct TextSpan base; 756 long long index = 0; 757 int started_at_line_start = s->line_start; 758 759 s->pos++; 760 skip_newlines(&s->pos, s->end); 761 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 762 return fail("bad directive"); 763 } 764 base = s->pos->text; 765 s->pos++; 766 767 skip_newlines(&s->pos, s->end); 768 if (s->pos >= s->end || s->pos->kind != TOK_LBRACE) { 769 return fail("bad directive"); 770 } 771 s->pos++; 772 773 for (;;) { 774 while (s->pos < s->end && 775 (s->pos->kind == TOK_COMMA || s->pos->kind == TOK_NEWLINE)) { 776 s->pos++; 777 } 778 if (s->pos >= s->end) { 779 return fail("unterminated directive"); 780 } 781 if (s->pos->kind == TOK_RBRACE) { 782 s->pos++; 783 break; 784 } 785 if (s->pos->kind != TOK_WORD) { 786 return fail("bad directive"); 787 } 788 if (!define_fielded_macro(base, s->pos->text.ptr, s->pos->text.len, 789 index * stride)) { 790 return 0; 791 } 792 s->pos++; 793 index++; 794 } 795 796 if (!define_fielded_macro(base, total_name, total_name_len, index * stride)) { 797 return 0; 798 } 799 800 if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { 801 return fail("expected newline after struct/enum"); 802 } 803 if (started_at_line_start) { 804 s->pos++; 805 s->line_start = 1; 806 } 807 return 1; 808 } 809 810 static int define_macro(struct Stream *s) 811 { 812 /* Header is whitespace-insensitive: newlines inside (...) and around 813 * the keywords are skipped. Body collection skips newlines that fall 814 * between `)` and the first body token (so `%macro N()\nbody\n%endm` 815 * has body=[WORD body, NEWLINE], same as the old required-newline form). 816 * %endm is recognized anywhere in the body; the next token must be 817 * TOK_NEWLINE. The newline is consumed only when the directive started 818 * at s->line_start — that way mid-line directives leave the user's 819 * trailing newline in the stream for the main loop to emit. */ 820 struct Macro *m; 821 int started_at_line_start = s->line_start; 822 823 if (macro_count >= MAX_MACROS) { 824 return fail("too many macros"); 825 } 826 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 827 return fail("macro body overflow"); 828 } 829 830 m = ¯os[macro_count]; 831 memset(m, 0, sizeof(*m)); 832 s->pos++; 833 834 skip_newlines(&s->pos, s->end); 835 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 836 return fail("bad macro header"); 837 } 838 m->name = s->pos->text; 839 s->pos++; 840 841 skip_newlines(&s->pos, s->end); 842 if (s->pos >= s->end || s->pos->kind != TOK_LPAREN) { 843 return fail("bad macro header"); 844 } 845 s->pos++; 846 847 skip_newlines(&s->pos, s->end); 848 if (s->pos < s->end && s->pos->kind != TOK_RPAREN) { 849 while (1) { 850 if (m->param_count >= MAX_PARAMS) { 851 return fail("bad macro header"); 852 } 853 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 854 return fail("bad macro header"); 855 } 856 m->params[m->param_count] = s->pos->text; 857 m->param_count++; 858 s->pos++; 859 skip_newlines(&s->pos, s->end); 860 if (s->pos < s->end && s->pos->kind == TOK_COMMA) { 861 s->pos++; 862 skip_newlines(&s->pos, s->end); 863 continue; 864 } 865 break; 866 } 867 } 868 869 if (s->pos >= s->end || s->pos->kind != TOK_RPAREN) { 870 return fail("bad macro header"); 871 } 872 s->pos++; 873 skip_newlines(&s->pos, s->end); 874 875 m->body_start = macro_body_tokens + macro_body_used; 876 while (s->pos < s->end) { 877 int idx; 878 879 if (s->pos->kind == TOK_WORD && token_text_eq(s->pos, "%endm")) { 880 s->pos++; 881 if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { 882 return fail("expected newline after %endm"); 883 } 884 if (started_at_line_start) { 885 s->pos++; 886 s->line_start = 1; 887 } 888 m->body_end = macro_body_tokens + macro_body_used; 889 macro_count++; 890 return 1; 891 } 892 if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { 893 return fail("macro body overflow"); 894 } 895 idx = macro_body_used; 896 macro_body_tokens[idx] = *s->pos; 897 macro_body_param_idx[idx] = (unsigned char)find_param(m, s->pos); 898 macro_body_is_local_label[idx] = 899 is_local_label_token(s->pos) ? 1 : 0; 900 if (s->pos->kind == TOK_PASTE) { 901 m->has_paste = 1; 902 } 903 macro_body_used++; 904 s->pos++; 905 } 906 907 return fail("unterminated macro"); 908 } 909 910 static int parse_args(struct Token *lparen, struct Token *limit) 911 { 912 struct Token *tok = lparen + 1; 913 struct Token *arg_start = tok; 914 int depth = 1; 915 int brace_depth = 0; 916 int arg_index = 0; 917 918 args_have_paste = 0; 919 920 while (tok < limit) { 921 if (tok->kind == TOK_PASTE) { 922 args_have_paste = 1; 923 } 924 if (tok->kind == TOK_LPAREN) { 925 depth++; 926 tok++; 927 continue; 928 } 929 if (tok->kind == TOK_RPAREN) { 930 depth--; 931 if (depth == 0) { 932 if (brace_depth != 0) { 933 return fail("unbalanced braces"); 934 } 935 if (arg_start == tok && arg_index == 0) { 936 arg_count = 0; 937 } else { 938 if (arg_index >= MAX_PARAMS) { 939 return fail("too many args"); 940 } 941 arg_starts[arg_index] = arg_start; 942 arg_ends[arg_index] = tok; 943 arg_count = arg_index + 1; 944 } 945 call_end_pos = tok + 1; 946 return 1; 947 } 948 tok++; 949 continue; 950 } 951 if (tok->kind == TOK_LBRACE) { 952 brace_depth++; 953 tok++; 954 continue; 955 } 956 if (tok->kind == TOK_RBRACE) { 957 if (brace_depth <= 0) { 958 return fail("unbalanced braces"); 959 } 960 brace_depth--; 961 tok++; 962 continue; 963 } 964 if (tok->kind == TOK_COMMA && depth == 1 && brace_depth == 0) { 965 if (arg_index >= MAX_PARAMS) { 966 return fail("too many args"); 967 } 968 arg_starts[arg_index] = arg_start; 969 arg_ends[arg_index] = tok; 970 arg_index++; 971 arg_start = tok + 1; 972 tok++; 973 continue; 974 } 975 tok++; 976 } 977 978 return fail("unterminated macro call"); 979 } 980 981 static int arg_is_braced(struct TokenSpan span) 982 { 983 struct Token *tok; 984 int depth; 985 986 if (span.end - span.start < 2) { 987 return 0; 988 } 989 if (span.start->kind != TOK_LBRACE || 990 (span.end - 1)->kind != TOK_RBRACE) { 991 return 0; 992 } 993 depth = 0; 994 for (tok = span.start; tok < span.end; tok++) { 995 if (tok->kind == TOK_LBRACE) { 996 depth++; 997 } else if (tok->kind == TOK_RBRACE) { 998 depth--; 999 if (depth == 0 && tok != span.end - 1) { 1000 return 0; 1001 } 1002 } 1003 } 1004 return depth == 0; 1005 } 1006 1007 static int copy_arg_tokens_to_pool(struct TokenSpan span) 1008 { 1009 if (span.start == span.end) { 1010 return fail("bad macro argument"); 1011 } 1012 if (arg_is_braced(span)) { 1013 struct TokenSpan inner; 1014 inner.start = span.start + 1; 1015 inner.end = span.end - 1; 1016 if (inner.start == inner.end) { 1017 return 1; 1018 } 1019 return copy_span_to_pool(inner); 1020 } 1021 return copy_span_to_pool(span); 1022 } 1023 1024 static int copy_paste_arg_to_pool(struct TokenSpan span) 1025 { 1026 if (arg_is_braced(span)) { 1027 return fail("bad macro argument"); 1028 } 1029 if (span.end - span.start != 1) { 1030 return fail("bad macro argument"); 1031 } 1032 return copy_span_to_pool(span); 1033 } 1034 1035 static int append_pasted_token(struct Token *dst, 1036 const struct Token *left, 1037 const struct Token *right) 1038 { 1039 char tmp[512]; 1040 char *text_ptr; 1041 int n; 1042 1043 n = snprintf(tmp, sizeof(tmp), "%.*s%.*s", 1044 left->text.len, left->text.ptr, 1045 right->text.len, right->text.ptr); 1046 if (n < 0 || n >= (int)sizeof(tmp)) { 1047 return fail("bad paste"); 1048 } 1049 text_ptr = append_text_len(tmp, n); 1050 if (text_ptr == NULL) { 1051 return 0; 1052 } 1053 dst->kind = TOK_WORD; 1054 dst->tight = 0; 1055 dst->text.ptr = text_ptr; 1056 dst->text.len = n; 1057 return 1; 1058 } 1059 1060 static int paste_pool_range(int mark) 1061 { 1062 /* Skip newlines on both sides of TOK_PASTE: a body like `foo ##\n bar` 1063 * pastes to `foobar`, discarding the intervening newline. The left 1064 * operand is the rightmost non-newline already copied to `out`; the 1065 * right operand is the next non-newline past PASTE in `in`. */ 1066 struct Token *start = expand_pool + mark; 1067 struct Token *in = start; 1068 struct Token *out = start; 1069 struct Token *end = expand_pool + pool_used; 1070 1071 while (in < end) { 1072 if (in->kind == TOK_PASTE) { 1073 struct Token *left = out; 1074 struct Token *right = in + 1; 1075 1076 while (left > start && (left - 1)->kind == TOK_NEWLINE) { 1077 left--; 1078 } 1079 if (left == start) { 1080 pool_used = mark; 1081 return fail("bad paste"); 1082 } 1083 left--; 1084 if (left->kind == TOK_PASTE) { 1085 pool_used = mark; 1086 return fail("bad paste"); 1087 } 1088 while (right < end && right->kind == TOK_NEWLINE) { 1089 right++; 1090 } 1091 if (right >= end || right->kind == TOK_PASTE) { 1092 pool_used = mark; 1093 return fail("bad paste"); 1094 } 1095 if (!append_pasted_token(left, left, right)) { 1096 pool_used = mark; 1097 return 0; 1098 } 1099 out = left + 1; 1100 in = right + 1; 1101 continue; 1102 } 1103 if (out != in) { 1104 *out = *in; 1105 } 1106 out++; 1107 in++; 1108 } 1109 1110 pool_used = (int)(out - expand_pool); 1111 return 1; 1112 } 1113 1114 static int is_local_label_token(const struct Token *tok) 1115 { 1116 if (tok->kind != TOK_WORD || tok->text.len < 3) { 1117 return 0; 1118 } 1119 if (tok->text.ptr[0] != ':' && tok->text.ptr[0] != '&') { 1120 return 0; 1121 } 1122 if (tok->text.ptr[1] != '@') { 1123 return 0; 1124 } 1125 return 1; 1126 } 1127 1128 static int push_local_label_token(const struct Token *tok, int expansion_id) 1129 { 1130 /* Rewrite ":@name" -> ":name__NN", "&@name" -> "&name__NN". 1131 * Build the text directly in text_buf so the resulting span is stable. */ 1132 char digits[16]; 1133 int digit_count = 0; 1134 int unsigned_id; 1135 int start; 1136 int total; 1137 int i; 1138 struct Token out; 1139 1140 unsigned_id = expansion_id; 1141 if (unsigned_id == 0) { 1142 digits[digit_count++] = '0'; 1143 } else { 1144 while (unsigned_id > 0) { 1145 digits[digit_count++] = (char)('0' + (unsigned_id % 10)); 1146 unsigned_id /= 10; 1147 } 1148 } 1149 1150 /* Reserve: sigil(1) + tail(len-2) + "__"(2) + digits + NUL. */ 1151 total = 1 + (tok->text.len - 2) + 2 + digit_count; 1152 if (text_used + total + 1 > MAX_TEXT) { 1153 return fail("text overflow"); 1154 } 1155 start = text_used; 1156 text_buf[text_used++] = tok->text.ptr[0]; 1157 memcpy(text_buf + text_used, tok->text.ptr + 2, (size_t)(tok->text.len - 2)); 1158 text_used += tok->text.len - 2; 1159 text_buf[text_used++] = '_'; 1160 text_buf[text_used++] = '_'; 1161 for (i = digit_count - 1; i >= 0; i--) { 1162 text_buf[text_used++] = digits[i]; 1163 } 1164 text_buf[text_used++] = '\0'; 1165 1166 out.kind = TOK_WORD; 1167 out.tight = 0; 1168 out.line = current_line; 1169 out.text.ptr = text_buf + start; 1170 out.text.len = total; 1171 return push_pool_token(out); 1172 } 1173 1174 static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, 1175 const struct Macro *m, struct Token **after_out, 1176 int *mark_out) 1177 { 1178 struct Token *body_tok; 1179 struct Token *end_pos; 1180 int mark; 1181 int expansion_id; 1182 int saw_arg_paste = 0; 1183 1184 if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN && 1185 (call_tok + 1)->tight) { 1186 if (!parse_args(call_tok + 1, limit)) { 1187 return 0; 1188 } 1189 if (arg_count != m->param_count) { 1190 return fail("wrong arg count"); 1191 } 1192 end_pos = call_end_pos; 1193 saw_arg_paste = args_have_paste; 1194 } else if (m->param_count == 0) { 1195 arg_count = 0; 1196 end_pos = call_tok + 1; 1197 } else { 1198 return fail("bad macro call"); 1199 } 1200 1201 expansion_id = ++next_expansion_id; 1202 mark = pool_used; 1203 for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) { 1204 int idx = (int)(body_tok - macro_body_tokens); 1205 int param_idx = macro_body_param_idx[idx]; 1206 int pasted = 0; 1207 int ok; 1208 1209 if (param_idx != 0) { 1210 struct TokenSpan arg = {arg_starts[param_idx - 1], arg_ends[param_idx - 1]}; 1211 pasted = (body_tok > m->body_start && (body_tok - 1)->kind == TOK_PASTE) || 1212 (body_tok + 1 < m->body_end && (body_tok + 1)->kind == TOK_PASTE); 1213 ok = pasted ? copy_paste_arg_to_pool(arg) : copy_arg_tokens_to_pool(arg); 1214 if (!ok) { 1215 pool_used = mark; 1216 return 0; 1217 } 1218 continue; 1219 } 1220 if (macro_body_is_local_label[idx]) { 1221 if (!push_local_label_token(body_tok, expansion_id)) { 1222 pool_used = mark; 1223 return 0; 1224 } 1225 continue; 1226 } 1227 if (!push_pool_token(*body_tok)) { 1228 pool_used = mark; 1229 return 0; 1230 } 1231 } 1232 1233 if ((m->has_paste || saw_arg_paste) && !paste_pool_range(mark)) { 1234 return 0; 1235 } 1236 *after_out = end_pos; 1237 *mark_out = mark; 1238 return 1; 1239 } 1240 1241 static int parse_int_token(const struct Token *tok, long long *out) 1242 { 1243 char tmp[128]; 1244 char *end; 1245 unsigned long long uv; 1246 long long sv; 1247 1248 if (tok->kind != TOK_WORD || tok->text.len <= 0 || tok->text.len >= (int)sizeof(tmp)) { 1249 return fail("bad integer"); 1250 } 1251 memcpy(tmp, tok->text.ptr, (size_t)tok->text.len); 1252 tmp[tok->text.len] = '\0'; 1253 1254 errno = 0; 1255 if (tmp[0] == '-') { 1256 sv = strtoll(tmp, &end, 0); 1257 if (errno != 0 || *end != '\0') { 1258 return fail("bad integer"); 1259 } 1260 *out = sv; 1261 return 1; 1262 } 1263 1264 uv = strtoull(tmp, &end, 0); 1265 if (errno != 0 || *end != '\0') { 1266 return fail("bad integer"); 1267 } 1268 *out = (long long)uv; 1269 return 1; 1270 } 1271 1272 static enum ExprOp expr_op_code(const struct Token *tok) 1273 { 1274 if (tok->kind != TOK_WORD) { 1275 return EXPR_INVALID; 1276 } 1277 if (token_text_eq(tok, "+")) { 1278 return EXPR_ADD; 1279 } 1280 if (token_text_eq(tok, "-")) { 1281 return EXPR_SUB; 1282 } 1283 if (token_text_eq(tok, "*")) { 1284 return EXPR_MUL; 1285 } 1286 if (token_text_eq(tok, "/")) { 1287 return EXPR_DIV; 1288 } 1289 if (token_text_eq(tok, "%")) { 1290 return EXPR_MOD; 1291 } 1292 if (token_text_eq(tok, "<<")) { 1293 return EXPR_SHL; 1294 } 1295 if (token_text_eq(tok, ">>")) { 1296 return EXPR_SHR; 1297 } 1298 if (token_text_eq(tok, "&")) { 1299 return EXPR_AND; 1300 } 1301 if (token_text_eq(tok, "|")) { 1302 return EXPR_OR; 1303 } 1304 if (token_text_eq(tok, "^")) { 1305 return EXPR_XOR; 1306 } 1307 if (token_text_eq(tok, "~")) { 1308 return EXPR_NOT; 1309 } 1310 if (token_text_eq(tok, "=")) { 1311 return EXPR_EQ; 1312 } 1313 if (token_text_eq(tok, "!=")) { 1314 return EXPR_NE; 1315 } 1316 if (token_text_eq(tok, "<")) { 1317 return EXPR_LT; 1318 } 1319 if (token_text_eq(tok, "<=")) { 1320 return EXPR_LE; 1321 } 1322 if (token_text_eq(tok, ">")) { 1323 return EXPR_GT; 1324 } 1325 if (token_text_eq(tok, ">=")) { 1326 return EXPR_GE; 1327 } 1328 if (token_text_eq(tok, "strlen")) { 1329 return EXPR_STRLEN; 1330 } 1331 return EXPR_INVALID; 1332 } 1333 1334 static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long long *out) 1335 { 1336 int i; 1337 1338 switch (op) { 1339 case EXPR_ADD: 1340 if (argc < 1) { 1341 return fail("bad expression"); 1342 } 1343 *out = args[0]; 1344 for (i = 1; i < argc; i++) { 1345 *out += args[i]; 1346 } 1347 return 1; 1348 case EXPR_SUB: 1349 if (argc < 1) { 1350 return fail("bad expression"); 1351 } 1352 *out = (argc == 1) ? -args[0] : args[0]; 1353 for (i = 1; i < argc; i++) { 1354 *out -= args[i]; 1355 } 1356 return 1; 1357 case EXPR_MUL: 1358 if (argc < 1) { 1359 return fail("bad expression"); 1360 } 1361 *out = args[0]; 1362 for (i = 1; i < argc; i++) { 1363 *out *= args[i]; 1364 } 1365 return 1; 1366 case EXPR_DIV: 1367 if (argc != 2 || args[1] == 0) { 1368 return fail("bad expression"); 1369 } 1370 *out = args[0] / args[1]; 1371 return 1; 1372 case EXPR_MOD: 1373 if (argc != 2 || args[1] == 0) { 1374 return fail("bad expression"); 1375 } 1376 *out = args[0] % args[1]; 1377 return 1; 1378 case EXPR_SHL: 1379 if (argc != 2) { 1380 return fail("bad expression"); 1381 } 1382 *out = (long long)((unsigned long long)args[0] << args[1]); 1383 return 1; 1384 case EXPR_SHR: 1385 if (argc != 2) { 1386 return fail("bad expression"); 1387 } 1388 *out = args[0] >> args[1]; 1389 return 1; 1390 case EXPR_AND: 1391 if (argc < 1) { 1392 return fail("bad expression"); 1393 } 1394 *out = args[0]; 1395 for (i = 1; i < argc; i++) { 1396 *out &= args[i]; 1397 } 1398 return 1; 1399 case EXPR_OR: 1400 if (argc < 1) { 1401 return fail("bad expression"); 1402 } 1403 *out = args[0]; 1404 for (i = 1; i < argc; i++) { 1405 *out |= args[i]; 1406 } 1407 return 1; 1408 case EXPR_XOR: 1409 if (argc < 1) { 1410 return fail("bad expression"); 1411 } 1412 *out = args[0]; 1413 for (i = 1; i < argc; i++) { 1414 *out ^= args[i]; 1415 } 1416 return 1; 1417 case EXPR_NOT: 1418 if (argc != 1) { 1419 return fail("bad expression"); 1420 } 1421 *out = ~args[0]; 1422 return 1; 1423 case EXPR_EQ: 1424 if (argc != 2) { 1425 return fail("bad expression"); 1426 } 1427 *out = (args[0] == args[1]); 1428 return 1; 1429 case EXPR_NE: 1430 if (argc != 2) { 1431 return fail("bad expression"); 1432 } 1433 *out = (args[0] != args[1]); 1434 return 1; 1435 case EXPR_LT: 1436 if (argc != 2) { 1437 return fail("bad expression"); 1438 } 1439 *out = (args[0] < args[1]); 1440 return 1; 1441 case EXPR_LE: 1442 if (argc != 2) { 1443 return fail("bad expression"); 1444 } 1445 *out = (args[0] <= args[1]); 1446 return 1; 1447 case EXPR_GT: 1448 if (argc != 2) { 1449 return fail("bad expression"); 1450 } 1451 *out = (args[0] > args[1]); 1452 return 1; 1453 case EXPR_GE: 1454 if (argc != 2) { 1455 return fail("bad expression"); 1456 } 1457 *out = (args[0] >= args[1]); 1458 return 1; 1459 case EXPR_STRLEN: 1460 case EXPR_INVALID: 1461 break; 1462 } 1463 1464 return fail("bad expression"); 1465 } 1466 1467 static int eval_expr_range(struct TokenSpan span, long long *out); 1468 1469 static int expand_local_into_pool(struct Token *call_tok, struct Token *limit, 1470 struct Token **after_out, int *mark_out) 1471 { 1472 /* Resolve %local(NAME) against the current frame: build the lookup 1473 * key "<frame>_FRAME.<NAME>" and copy the matching macro's body 1474 * into the pool. NAME must be exactly one WORD token. The pool 1475 * mark and the position past the call's `)` are returned so the 1476 * caller can either push the body as a stream (process_tokens) or 1477 * recursively eval it as an expression (eval_expr_atom). */ 1478 char name[256]; 1479 int frame_len; 1480 int arg_len; 1481 int name_len; 1482 int i; 1483 const struct Macro *m = NULL; 1484 struct Token *arg_tok; 1485 int mark = pool_used; 1486 1487 if (call_tok + 1 >= limit || (call_tok + 1)->kind != TOK_LPAREN || 1488 !(call_tok + 1)->tight) { 1489 return fail("bad builtin"); 1490 } 1491 if (!parse_args(call_tok + 1, limit)) { 1492 return 0; 1493 } 1494 if (arg_count != 1) { 1495 return fail("bad builtin"); 1496 } 1497 if (arg_ends[0] - arg_starts[0] != 1) { 1498 return fail("bad builtin"); 1499 } 1500 arg_tok = arg_starts[0]; 1501 if (arg_tok->kind != TOK_WORD) { 1502 return fail("bad builtin"); 1503 } 1504 if (!frame_active) { 1505 return fail("local outside frame"); 1506 } 1507 1508 frame_len = current_frame.len; 1509 arg_len = arg_tok->text.len; 1510 name_len = frame_len + 7 /* _FRAME. */ + arg_len; 1511 if (name_len >= (int)sizeof(name)) { 1512 return fail("local name too long"); 1513 } 1514 memcpy(name, current_frame.ptr, (size_t)frame_len); 1515 memcpy(name + frame_len, "_FRAME.", 7); 1516 memcpy(name + frame_len + 7, arg_tok->text.ptr, (size_t)arg_len); 1517 1518 for (i = 0; i < macro_count; i++) { 1519 if (macros[i].name.len == name_len && 1520 memcmp(macros[i].name.ptr, name, (size_t)name_len) == 0) { 1521 m = ¯os[i]; 1522 break; 1523 } 1524 } 1525 if (m == NULL) { 1526 return fail("unknown local"); 1527 } 1528 1529 if (!copy_span_to_pool((struct TokenSpan){m->body_start, m->body_end})) { 1530 pool_used = mark; 1531 return 0; 1532 } 1533 *after_out = call_end_pos; 1534 *mark_out = mark; 1535 return 1; 1536 } 1537 1538 static int eval_expr_atom(struct Token *tok, struct Token *limit, 1539 struct Token **after_out, long long *out) 1540 { 1541 const struct Macro *macro; 1542 struct Token *after; 1543 int mark; 1544 1545 if (tok->kind == TOK_WORD && token_text_eq(tok, "%local")) { 1546 if (!expand_local_into_pool(tok, limit, &after, &mark)) { 1547 return 0; 1548 } 1549 if (pool_used == mark) { 1550 pool_used = mark; 1551 return fail("bad expression"); 1552 } 1553 if (!eval_expr_range((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, out)) { 1554 pool_used = mark; 1555 return 0; 1556 } 1557 pool_used = mark; 1558 *after_out = after; 1559 return 1; 1560 } 1561 1562 macro = find_macro(tok); 1563 if (macro != NULL && 1564 ((tok + 1 < limit && (tok + 1)->kind == TOK_LPAREN && 1565 (tok + 1)->tight) || 1566 macro->param_count == 0)) { 1567 if (!expand_macro_tokens(tok, limit, macro, &after, &mark)) { 1568 return 0; 1569 } 1570 if (pool_used == mark) { 1571 pool_used = mark; 1572 return fail("bad expression"); 1573 } 1574 if (!eval_expr_range((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, out)) { 1575 pool_used = mark; 1576 return 0; 1577 } 1578 pool_used = mark; 1579 *after_out = after; 1580 return 1; 1581 } 1582 1583 if (!parse_int_token(tok, out)) { 1584 return 0; 1585 } 1586 *after_out = tok + 1; 1587 return 1; 1588 } 1589 1590 static int eval_expr_range(struct TokenSpan span, long long *out) 1591 { 1592 struct ExprFrame frames[MAX_EXPR_FRAMES]; 1593 int frame_top = 0; 1594 struct Token *pos = span.start; 1595 long long value = 0; 1596 long long result = 0; 1597 int have_value = 0; 1598 int have_result = 0; 1599 1600 for (;;) { 1601 if (have_value) { 1602 if (frame_top > 0) { 1603 struct ExprFrame *frame = &frames[frame_top - 1]; 1604 1605 if (frame->argc >= MAX_PARAMS) { 1606 return fail("bad expression"); 1607 } 1608 frame->args[frame->argc++] = value; 1609 have_value = 0; 1610 continue; 1611 } 1612 if (have_result) { 1613 return fail("bad expression"); 1614 } 1615 result = value; 1616 have_result = 1; 1617 have_value = 0; 1618 continue; 1619 } 1620 1621 skip_newlines(&pos, span.end); 1622 if (pos >= span.end) { 1623 break; 1624 } 1625 if (pos->line > 0) { 1626 current_line = pos->line; 1627 } 1628 1629 if (pos->kind == TOK_LPAREN) { 1630 enum ExprOp op; 1631 1632 pos++; 1633 skip_newlines(&pos, span.end); 1634 if (pos >= span.end) { 1635 return fail("bad expression"); 1636 } 1637 op = expr_op_code(pos); 1638 if (op == EXPR_INVALID) { 1639 return fail("bad expression"); 1640 } 1641 pos++; 1642 if (op == EXPR_STRLEN) { 1643 /* strlen is degenerate: argument is a TOK_STRING atom, 1644 * not a recursive expression. Handle inline and yield 1645 * the string's raw byte count (span.len - 2). */ 1646 skip_newlines(&pos, span.end); 1647 if (pos >= span.end || pos->kind != TOK_STRING) { 1648 return fail("bad expression"); 1649 } 1650 if (pos->text.len < 2 || pos->text.ptr[0] != '"') { 1651 return fail("bad expression"); 1652 } 1653 value = (long long)(pos->text.len - 2); 1654 pos++; 1655 skip_newlines(&pos, span.end); 1656 if (pos >= span.end || pos->kind != TOK_RPAREN) { 1657 return fail("bad expression"); 1658 } 1659 pos++; 1660 have_value = 1; 1661 continue; 1662 } 1663 if (frame_top >= MAX_EXPR_FRAMES) { 1664 return fail("expression overflow"); 1665 } 1666 frames[frame_top].op = op; 1667 frames[frame_top].argc = 0; 1668 frame_top++; 1669 continue; 1670 } 1671 1672 if (pos->kind == TOK_RPAREN) { 1673 if (frame_top <= 0) { 1674 return fail("bad expression"); 1675 } 1676 if (!apply_expr_op(frames[frame_top - 1].op, 1677 frames[frame_top - 1].args, 1678 frames[frame_top - 1].argc, 1679 &value)) { 1680 return 0; 1681 } 1682 frame_top--; 1683 pos++; 1684 have_value = 1; 1685 continue; 1686 } 1687 1688 if (!eval_expr_atom(pos, span.end, &pos, &value)) { 1689 return 0; 1690 } 1691 have_value = 1; 1692 } 1693 1694 if (frame_top != 0 || !have_result) { 1695 return fail("bad expression"); 1696 } 1697 if (pos != span.end) { 1698 return fail("bad expression"); 1699 } 1700 1701 *out = result; 1702 return 1; 1703 } 1704 1705 static int emit_hex_value(unsigned long long value, int bytes) 1706 { 1707 /* Emit the bytes as bare little-endian hex digits. hex2pp's byte- 1708 * stream parser groups every two hex digits into one byte; no 1709 * quoting or separators are needed. */ 1710 char tmp[17]; 1711 static const char hex[] = "0123456789ABCDEF"; 1712 struct Token tok; 1713 int i; 1714 char *text_ptr; 1715 int total_len = 2 * bytes; 1716 1717 for (i = 0; i < bytes; i++) { 1718 unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF); 1719 tmp[2 * i] = hex[b >> 4]; 1720 tmp[2 * i + 1] = hex[b & 0x0F]; 1721 } 1722 tmp[total_len] = '\0'; 1723 1724 text_ptr = append_text_len(tmp, total_len); 1725 if (text_ptr == NULL) { 1726 return 0; 1727 } 1728 tok.kind = TOK_WORD; 1729 tok.tight = 0; 1730 tok.line = current_line; 1731 tok.text.ptr = text_ptr; 1732 tok.text.len = total_len; 1733 return emit_token(&tok); 1734 } 1735 1736 static int expand_builtin_call(struct Stream *s, const struct Token *tok) 1737 { 1738 long long value; 1739 1740 if (tok + 1 >= s->end || (tok + 1)->kind != TOK_LPAREN) { 1741 return fail("bad builtin"); 1742 } 1743 if (!parse_args((struct Token *)tok + 1, s->end)) { 1744 return 0; 1745 } 1746 1747 if (token_text_eq(tok, "!") || token_text_eq(tok, "@") || 1748 token_text_eq(tok, "%") || token_text_eq(tok, "$")) { 1749 struct TokenSpan arg; 1750 struct Token *end_pos; 1751 int bytes; 1752 1753 if (arg_count != 1) { 1754 return fail("bad builtin"); 1755 } 1756 arg.start = arg_starts[0]; 1757 arg.end = arg_ends[0]; 1758 end_pos = call_end_pos; 1759 if (!eval_expr_range(arg, &value)) { 1760 return 0; 1761 } 1762 s->pos = end_pos; 1763 s->line_start = 0; 1764 bytes = token_text_eq(tok, "!") ? 1 : 1765 token_text_eq(tok, "@") ? 2 : 1766 token_text_eq(tok, "%") ? 4 : 8; 1767 return emit_hex_value((unsigned long long)value, bytes); 1768 } 1769 1770 if (token_text_eq(tok, "%select")) { 1771 struct TokenSpan cond_arg, then_arg, else_arg, chosen; 1772 struct Token *end_pos; 1773 int mark; 1774 1775 if (arg_count != 3) { 1776 return fail("bad builtin"); 1777 } 1778 cond_arg.start = arg_starts[0]; cond_arg.end = arg_ends[0]; 1779 then_arg.start = arg_starts[1]; then_arg.end = arg_ends[1]; 1780 else_arg.start = arg_starts[2]; else_arg.end = arg_ends[2]; 1781 end_pos = call_end_pos; 1782 if (!eval_expr_range(cond_arg, &value)) { 1783 return 0; 1784 } 1785 chosen = (value != 0) ? then_arg : else_arg; 1786 s->pos = end_pos; 1787 s->line_start = 0; 1788 if (chosen.start == chosen.end) { 1789 return 1; 1790 } 1791 mark = pool_used; 1792 if (!copy_span_to_pool(chosen)) { 1793 pool_used = mark; 1794 return 0; 1795 } 1796 return push_pool_stream_from_mark(mark); 1797 } 1798 1799 if (token_text_eq(tok, "%local")) { 1800 struct Token *after; 1801 int mark; 1802 1803 if (!expand_local_into_pool((struct Token *)tok, s->end, &after, &mark)) { 1804 return 0; 1805 } 1806 s->pos = after; 1807 s->line_start = 0; 1808 return push_pool_stream_from_mark(mark); 1809 } 1810 1811 if (token_text_eq(tok, "%str")) { 1812 struct Token *arg_tok; 1813 struct Token *end_pos; 1814 struct Token out_tok; 1815 char *text_ptr; 1816 int orig_len; 1817 int out_len; 1818 1819 if (arg_count != 1) { 1820 return fail("bad builtin"); 1821 } 1822 if (arg_ends[0] - arg_starts[0] != 1) { 1823 return fail("bad builtin"); 1824 } 1825 arg_tok = arg_starts[0]; 1826 if (arg_tok->kind != TOK_WORD) { 1827 return fail("bad builtin"); 1828 } 1829 end_pos = call_end_pos; 1830 1831 orig_len = arg_tok->text.len; 1832 out_len = orig_len + 2; 1833 if (text_used + out_len + 1 > MAX_TEXT) { 1834 return fail("text overflow"); 1835 } 1836 text_ptr = text_buf + text_used; 1837 text_buf[text_used++] = '"'; 1838 memcpy(text_buf + text_used, arg_tok->text.ptr, (size_t)orig_len); 1839 text_used += orig_len; 1840 text_buf[text_used++] = '"'; 1841 text_buf[text_used++] = '\0'; 1842 1843 out_tok.kind = TOK_STRING; 1844 out_tok.tight = 0; 1845 out_tok.line = current_line; 1846 out_tok.text.ptr = text_ptr; 1847 out_tok.text.len = out_len; 1848 s->pos = end_pos; 1849 s->line_start = 0; 1850 return emit_token(&out_tok); 1851 } 1852 1853 return fail("bad builtin"); 1854 } 1855 1856 static int expand_call(struct Stream *s, const struct Macro *macro) 1857 { 1858 struct Token *after; 1859 int mark; 1860 1861 if (!expand_macro_tokens(s->pos, s->end, macro, &after, &mark)) { 1862 return 0; 1863 } 1864 s->pos = after; 1865 s->line_start = 0; 1866 return push_pool_stream_from_mark(mark); 1867 } 1868 1869 static int push_frame(struct Stream *s) 1870 { 1871 /* %frame NAME sets the single-slot current frame, used by %local 1872 * lookup. Frames do not nest: a second %frame before %endframe is 1873 * an error. The header behaves like %scope (newlines after the 1874 * name are absorbed when the directive appeared at line_start). */ 1875 int started_at_line_start = s->line_start; 1876 1877 s->pos++; 1878 skip_newlines(&s->pos, s->end); 1879 if (s->pos >= s->end || s->pos->kind != TOK_WORD) { 1880 return fail("bad frame header"); 1881 } 1882 if (frame_active) { 1883 return fail("frame already active"); 1884 } 1885 current_frame = s->pos->text; 1886 frame_active = 1; 1887 s->pos++; 1888 if (started_at_line_start) { 1889 skip_newlines(&s->pos, s->end); 1890 s->line_start = 1; 1891 } 1892 return 1; 1893 } 1894 1895 static int pop_frame(struct Stream *s) 1896 { 1897 /* %endframe must be immediately followed by TOK_NEWLINE; the newline 1898 * is consumed iff %endframe itself appeared at line_start. */ 1899 int started_at_line_start = s->line_start; 1900 1901 s->pos++; 1902 if (!frame_active) { 1903 return fail("frame underflow"); 1904 } 1905 frame_active = 0; 1906 if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { 1907 return fail("expected newline after %endframe"); 1908 } 1909 if (started_at_line_start) { 1910 s->pos++; 1911 s->line_start = 1; 1912 } 1913 return 1; 1914 } 1915 1916 static int process_tokens(void) 1917 { 1918 if (!push_stream_span((struct TokenSpan){source_tokens, source_tokens + source_count}, -1)) { 1919 return 0; 1920 } 1921 1922 /* Per-token dispatch is gated on the first byte of WORD tokens. 1923 * Plain pass-through tokens (e.g. hex literals, bare identifiers) 1924 * fail the c0=='%' / c0 in {!,@,$} test in one byte compare and go 1925 * straight to emit_token. Within the c0=='%' branch we dispatch on 1926 * the second byte to pick the matching directive/builtin without 1927 * walking ~9 token_text_eq probes. */ 1928 for (;;) { 1929 struct Stream *s; 1930 struct Token *tok; 1931 1932 s = current_stream(); 1933 if (s == NULL) { 1934 break; 1935 } 1936 if (s->pos >= s->end) { 1937 pop_stream(); 1938 continue; 1939 } 1940 1941 tok = s->pos; 1942 if (tok->line > 0) { 1943 current_line = tok->line; 1944 } 1945 1946 if (tok->kind == TOK_NEWLINE) { 1947 s->pos++; 1948 s->line_start = 1; 1949 if (!emit_newline()) { 1950 return 0; 1951 } 1952 continue; 1953 } 1954 1955 if (tok->kind == TOK_WORD && tok->text.len >= 1) { 1956 const char *p = tok->text.ptr; 1957 int len = tok->text.len; 1958 char c0 = p[0]; 1959 int has_paren = (tok + 1 < s->end && 1960 (tok + 1)->kind == TOK_LPAREN && 1961 (tok + 1)->tight); 1962 1963 if (c0 == '%' && len >= 2) { 1964 char c1 = p[1]; 1965 const struct Macro *macro; 1966 int handled = 0; 1967 1968 switch (c1) { 1969 case 'm': 1970 if (len == 6 && memcmp(p + 2, "acro", 4) == 0) { 1971 if (!define_macro(s)) return 0; 1972 handled = 1; 1973 } 1974 break; 1975 case 's': 1976 if (len == 7 && memcmp(p + 2, "truct", 5) == 0) { 1977 if (!define_fielded(s, 8, "SIZE", 4)) return 0; 1978 handled = 1; 1979 } else if (has_paren && len == 7 && 1980 memcmp(p + 2, "elect", 5) == 0) { 1981 if (!expand_builtin_call(s, tok)) return 0; 1982 handled = 1; 1983 } else if (has_paren && len == 4 && 1984 memcmp(p + 2, "tr", 2) == 0) { 1985 if (!expand_builtin_call(s, tok)) return 0; 1986 handled = 1; 1987 } 1988 break; 1989 case 'e': 1990 if (len == 5 && memcmp(p + 2, "num", 3) == 0) { 1991 if (!define_fielded(s, 1, "COUNT", 5)) return 0; 1992 handled = 1; 1993 } else if (len == 9 && 1994 memcmp(p + 2, "ndframe", 7) == 0) { 1995 if (!pop_frame(s)) return 0; 1996 handled = 1; 1997 } 1998 break; 1999 case 'f': 2000 if (len == 6 && memcmp(p + 2, "rame", 4) == 0) { 2001 if (!push_frame(s)) return 0; 2002 handled = 1; 2003 } 2004 break; 2005 case 'b': 2006 if (has_paren && len == 6 && 2007 memcmp(p + 2, "ytes", 4) == 0) { 2008 if (!expand_builtin_call(s, tok)) return 0; 2009 handled = 1; 2010 } 2011 break; 2012 case 'l': 2013 if (has_paren && len == 6 && 2014 memcmp(p + 2, "ocal", 4) == 0) { 2015 if (!expand_builtin_call(s, tok)) return 0; 2016 handled = 1; 2017 } 2018 break; 2019 } 2020 2021 if (handled) { 2022 continue; 2023 } 2024 2025 macro = find_macro(tok); 2026 if (macro != NULL && 2027 (has_paren || macro->param_count == 0)) { 2028 if (!expand_call(s, macro)) return 0; 2029 continue; 2030 } 2031 } else if (len == 1 && 2032 (c0 == '!' || c0 == '@' || 2033 c0 == '$' || c0 == '%')) { 2034 if (has_paren) { 2035 if (!expand_builtin_call(s, tok)) return 0; 2036 continue; 2037 } 2038 } 2039 } 2040 2041 s->pos++; 2042 s->line_start = 0; 2043 if (!emit_token(tok)) { 2044 return 0; 2045 } 2046 } 2047 2048 if (frame_active) { 2049 return fail("frame not closed"); 2050 } 2051 2052 if (output_used >= MAX_OUTPUT) { 2053 return fail("output overflow"); 2054 } 2055 output_buf[output_used] = '\0'; 2056 return 1; 2057 } 2058 2059 int main(int argc, char **argv) 2060 { 2061 FILE *in; 2062 FILE *out; 2063 size_t nread; 2064 2065 if (argc != 3) { 2066 fprintf(stderr, "usage: %s input.M1 output.M1\n", argv[0]); 2067 return 1; 2068 } 2069 2070 input_path = argv[1]; 2071 in = fopen(argv[1], "rb"); 2072 if (in == NULL) { 2073 perror(argv[1]); 2074 return 1; 2075 } 2076 nread = fread(input_buf, 1, MAX_INPUT, in); 2077 if (ferror(in)) { 2078 perror(argv[1]); 2079 fclose(in); 2080 return 1; 2081 } 2082 fclose(in); 2083 if (nread >= MAX_INPUT) { 2084 fprintf(stderr, "input too large\n"); 2085 return 1; 2086 } 2087 input_buf[nread] = '\0'; 2088 2089 if (!lex_source(input_buf) || !process_tokens()) { 2090 fprintf(stderr, "%s:%d: m1macro: %s\n", 2091 input_path != NULL ? input_path : "?", 2092 error_line, 2093 error_msg != NULL ? error_msg : "failed"); 2094 return 1; 2095 } 2096 2097 out = fopen(argv[2], "wb"); 2098 if (out == NULL) { 2099 perror(argv[2]); 2100 return 1; 2101 } 2102 if (fwrite(output_buf, 1, (size_t)output_used, out) != (size_t)output_used) { 2103 perror(argv[2]); 2104 fclose(out); 2105 return 1; 2106 } 2107 fclose(out); 2108 fprintf(stderr, "text_used=%d output_used=%d\n", text_used, output_used); 2109 return 0; 2110 }