asm_lex.c (19900B)
1 /* Assembler lexer. Streams tokens out of a borrowed source buffer. 2 * 3 * It intentionally keeps C-like number/string spelling rules because .S 4 * sources arrive after C preprocessing and GNU as accepts those spellings 5 * in directives and expressions. It does not own macro expansion or C 6 * keyword classification. 7 * 8 * Comments are consumed as whitespace; physical newlines surface as 9 * ASM_TOK_NEWLINE so the asm driver can keep line-oriented directive and 10 * instruction parsing. */ 11 12 #include "asm/asm_lex.h" 13 14 #include <string.h> 15 16 #include "core/heap.h" 17 #include "core/pool.h" 18 #include "core/slice.h" 19 20 struct AsmLexer { 21 Compiler* c; 22 Pool* pool; 23 Heap* heap; 24 const char* src; 25 size_t len; 26 size_t pos; 27 u32 file_id; 28 u32 line; 29 u32 col; 30 u8 at_bol; 31 u8 had_space; 32 }; 33 34 /* §5.1.1.2 translation phase 2: splice physical lines joined by 35 * backslash-newline. Advance past any splice sequence at l->pos so the 36 * cursor never rests on the leading backslash of a splice. */ 37 static void skip_splices(AsmLexer* l) { 38 while (l->pos + 1 < l->len && l->src[l->pos] == '\\' && 39 l->src[l->pos + 1] == '\n') { 40 l->pos += 2; 41 l->line++; 42 l->col = 1; 43 } 44 } 45 46 /* Logical peek: returns the off-th post-splice byte starting at l->pos, 47 * or -1 at end of input. Does not mutate l->pos. */ 48 static int peek(const AsmLexer* l, size_t off) { 49 size_t pos = l->pos; 50 size_t k = 0; 51 while (pos < l->len) { 52 if (pos + 1 < l->len && l->src[pos] == '\\' && l->src[pos + 1] == '\n') { 53 pos += 2; 54 continue; 55 } 56 if (k == off) return (unsigned char)l->src[pos]; 57 ++pos; 58 ++k; 59 } 60 return -1; 61 } 62 63 static int bump(AsmLexer* l) { 64 int ch; 65 skip_splices(l); 66 if (l->pos >= l->len) return -1; 67 ch = (unsigned char)l->src[l->pos++]; 68 if (ch == '\n') { 69 l->line++; 70 l->col = 1; 71 } else { 72 l->col++; 73 } 74 return ch; 75 } 76 77 static int is_digit(int c) { return c >= '0' && c <= '9'; } 78 static int is_hex_digit(int c) { 79 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || 80 (c >= 'A' && c <= 'F'); 81 } 82 /* Identifier-start byte (§6.4.2.1). Letters and underscore are ASCII; bytes 83 * ≥ 0x80 are accepted as the implementation-defined "other characters" 84 * permitted in identifiers — in practice UTF-8 lead/continuation bytes for 85 * extended source characters. UCNs are matched separately via ucn_len since 86 * they span multiple source bytes. */ 87 static int is_alpha(int c) { 88 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || 89 c >= 0x80; 90 } 91 static int is_alnum(int c) { return is_alpha(c) || is_digit(c); } 92 93 /* Match a UCN at offset `off` from the current position. Returns the total 94 * length (6 for \uXXXX, 10 for \UXXXXXXXX), or 0 if no UCN matches. The 95 * range constraints from §6.4.3 (no UCN < 00A0 except $/@/`, and none in 96 * D800–DFFF) are not enforced here — the lexical form is matched and any 97 * downstream phase that cares can diagnose. */ 98 static int ucn_len(const AsmLexer* l, size_t off) { 99 int n, i; 100 if (peek(l, off) != '\\') return 0; 101 if (peek(l, off + 1) == 'u') 102 n = 4; 103 else if (peek(l, off + 1) == 'U') 104 n = 8; 105 else 106 return 0; 107 for (i = 0; i < n; ++i) { 108 if (!is_hex_digit(peek(l, off + 2 + i))) return 0; 109 } 110 return 2 + n; 111 } 112 113 static SrcLoc asm_lex_here(const AsmLexer* l) { 114 SrcLoc loc; 115 loc.file_id = l->file_id; 116 loc.line = l->line; 117 loc.col = l->col; 118 return loc; 119 } 120 121 AsmLexer* asm_lex_open_mem(Compiler* c, const char* name, const char* src, 122 size_t len) { 123 Heap* h = (Heap*)c->ctx->heap; 124 AsmLexer* l = (AsmLexer*)h->alloc(h, sizeof(*l), _Alignof(AsmLexer)); 125 if (!l) return NULL; 126 memset(l, 0, sizeof(*l)); 127 l->c = c; 128 l->pool = c->global; 129 l->heap = h; 130 l->src = src ? src : ""; 131 l->len = src ? len : 0; 132 l->pos = 0; 133 if (source_add_memory(c->sources, slice_from_cstr(name), &l->file_id) != 134 KIT_OK) { 135 h->free(h, l, sizeof(*l)); 136 return NULL; 137 } 138 l->line = 1; 139 l->col = 1; 140 l->at_bol = 1; 141 l->had_space = 0; 142 return l; 143 } 144 145 void asm_lex_close(AsmLexer* l) { 146 if (!l) return; 147 l->heap->free(l->heap, l, sizeof(*l)); 148 } 149 150 SrcLoc asm_lex_loc(const AsmLexer* l) { return asm_lex_here(l); } 151 u32 asm_lex_file_id(const AsmLexer* l) { return l->file_id; } 152 const AsmLitInfo* asm_lex_lit(const AsmLexer* l, AsmLitId id) { 153 (void)l; 154 (void)id; 155 return NULL; 156 } 157 158 /* Intern bytes [start, end) with line splices (\<newline>) removed, so token 159 * spellings reflect post-phase-2 logical text. */ 160 static Sym intern_spliced(AsmLexer* l, size_t start, size_t end) { 161 size_t i; 162 int has_splice = 0; 163 char* buf; 164 size_t k; 165 Sym sym; 166 167 for (i = start; i + 1 < end; ++i) { 168 if (l->src[i] == '\\' && l->src[i + 1] == '\n') { 169 has_splice = 1; 170 break; 171 } 172 } 173 if (!has_splice) 174 return pool_intern_slice(l->pool, 175 (Slice){.s = l->src + start, .len = end - start}); 176 177 buf = (char*)l->heap->alloc(l->heap, end - start, 1); 178 k = 0; 179 for (i = start; i < end;) { 180 if (i + 1 < end && l->src[i] == '\\' && l->src[i + 1] == '\n') { 181 i += 2; 182 continue; 183 } 184 buf[k++] = l->src[i++]; 185 } 186 sym = pool_intern_slice(l->pool, (Slice){.s = buf, .len = k}); 187 l->heap->free(l->heap, buf, end - start); 188 return sym; 189 } 190 191 /* Skip whitespace and comments. Returns 1 if a newline boundary was crossed 192 * via comment consumption (caller still emits the explicit newline token on 193 * an in-source '\n'). */ 194 static void skip_ws_and_comments(AsmLexer* l) { 195 for (;;) { 196 int ch = peek(l, 0); 197 if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\v' || ch == '\f') { 198 bump(l); 199 l->had_space = 1; 200 continue; 201 } 202 if (ch == '/' && peek(l, 1) == '/') { 203 bump(l); 204 bump(l); 205 while (peek(l, 0) >= 0 && peek(l, 0) != '\n') bump(l); 206 l->had_space = 1; 207 continue; 208 } 209 if (ch == '/' && peek(l, 1) == '*') { 210 bump(l); 211 bump(l); 212 while (peek(l, 0) >= 0) { 213 if (peek(l, 0) == '*' && peek(l, 1) == '/') { 214 bump(l); 215 bump(l); 216 break; 217 } 218 bump(l); 219 } 220 l->had_space = 1; 221 continue; 222 } 223 break; 224 } 225 } 226 227 /* Consume a pp-number per §6.4.8. The cursor is positioned at the leading 228 * digit (or `.` followed by a digit) on entry. */ 229 static void scan_pp_number(AsmLexer* l) { 230 if (peek(l, 0) == '.') bump(l); 231 bump(l); /* first digit */ 232 while (l->pos < l->len) { 233 int c = peek(l, 0); 234 int n = peek(l, 1); 235 if ((c == 'e' || c == 'E' || c == 'p' || c == 'P') && 236 (n == '+' || n == '-')) { 237 bump(l); 238 bump(l); 239 } else if (is_alnum(c) || c == '.') { 240 bump(l); 241 } else { 242 break; 243 } 244 } 245 } 246 247 /* 1 if the pp-number text is a floating constant (§6.4.4.2): contains a 248 * radix `.`, a hex `p`/`P` exponent, or a decimal `e`/`E` exponent. */ 249 static int pp_number_is_float(const char* s, size_t n) { 250 int is_hex = 0; 251 size_t i = 0; 252 if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { 253 is_hex = 1; 254 i = 2; 255 } 256 for (; i < n; ++i) { 257 char c = s[i]; 258 if (c == '.') return 1; 259 if (is_hex && (c == 'p' || c == 'P')) return 1; 260 if (!is_hex && (c == 'e' || c == 'E')) { 261 if (i + 1 < n) { 262 char nx = s[i + 1]; 263 if (nx == '+' || nx == '-' || (nx >= '0' && nx <= '9')) return 1; 264 } 265 } 266 } 267 return 0; 268 } 269 270 /* Consume a quoted body — string ('"') or character ('\''). The cursor is 271 * positioned at the opening quote on entry. Returns 1 on an unterminated or 272 * newline-broken literal, 0 on a clean close. */ 273 static int scan_quoted(AsmLexer* l, int quote) { 274 bump(l); /* opening quote */ 275 for (;;) { 276 int ch = peek(l, 0); 277 if (ch < 0) return 1; 278 if (ch == quote) { 279 bump(l); 280 return 0; 281 } 282 if (ch == '\n') return 1; 283 if (ch == '\\') { 284 bump(l); /* backslash */ 285 if (peek(l, 0) < 0) return 1; 286 bump(l); /* the escaped char */ 287 continue; 288 } 289 bump(l); 290 } 291 } 292 293 AsmTok asm_lex_next(AsmLexer* l) { 294 AsmTok t; 295 SrcLoc tloc; 296 size_t start; 297 int ch; 298 299 memset(&t, 0, sizeof(t)); 300 301 /* Skip whitespace and comments. A newline token is emitted before any 302 * subsequent content tokens for the line that follows. */ 303 for (;;) { 304 skip_ws_and_comments(l); 305 skip_splices(l); 306 if (l->pos >= l->len) { 307 t.kind = ASM_TOK_EOF; 308 t.loc = asm_lex_here(l); 309 return t; 310 } 311 if (peek(l, 0) == '\n') { 312 tloc = asm_lex_here(l); 313 bump(l); 314 t.kind = ASM_TOK_NEWLINE; 315 t.loc = tloc; 316 l->at_bol = 1; 317 l->had_space = 0; 318 return t; 319 } 320 break; 321 } 322 323 tloc = asm_lex_here(l); 324 start = l->pos; 325 ch = peek(l, 0); 326 327 if (l->at_bol) t.flags |= ASM_TF_AT_BOL; 328 if (l->had_space) t.flags |= ASM_TF_HAS_SPACE; 329 l->at_bol = 0; 330 l->had_space = 0; 331 t.loc = tloc; 332 333 /* String / character literal, with optional encoding prefix. The prefix 334 * length and encoding flag are decoded together so the spelling we 335 * intern includes the prefix bytes. */ 336 { 337 int sp_len = -1; 338 int is_char = 0; 339 u32 encf = 0; 340 341 if (ch == '"') { 342 sp_len = 0; 343 is_char = 0; 344 } else if (ch == '\'') { 345 sp_len = 0; 346 is_char = 1; 347 } else if (ch == 'L' && peek(l, 1) == '"') { 348 sp_len = 1; 349 is_char = 0; 350 encf = ASM_TF_STR_WIDE; 351 } else if (ch == 'L' && peek(l, 1) == '\'') { 352 sp_len = 1; 353 is_char = 1; 354 encf = ASM_TF_STR_WIDE; 355 } else if (ch == 'u' && peek(l, 1) == '8' && peek(l, 2) == '"') { 356 sp_len = 2; 357 is_char = 0; 358 encf = ASM_TF_STR_U8; 359 } else if (ch == 'u' && peek(l, 1) == '"') { 360 sp_len = 1; 361 is_char = 0; 362 encf = ASM_TF_STR_U16; 363 } else if (ch == 'u' && peek(l, 1) == '\'') { 364 sp_len = 1; 365 is_char = 1; 366 encf = ASM_TF_STR_U16; 367 } else if (ch == 'U' && peek(l, 1) == '"') { 368 sp_len = 1; 369 is_char = 0; 370 encf = ASM_TF_STR_U32; 371 } else if (ch == 'U' && peek(l, 1) == '\'') { 372 sp_len = 1; 373 is_char = 1; 374 encf = ASM_TF_STR_U32; 375 } 376 377 if (sp_len >= 0) { 378 int i; 379 for (i = 0; i < sp_len; ++i) bump(l); 380 if (scan_quoted(l, is_char ? '\'' : '"')) t.flags |= ASM_TF_LITERAL_BAD; 381 t.kind = (u16)(is_char ? ASM_TOK_CHR : ASM_TOK_STR); 382 t.flags |= encf; 383 t.spelling = intern_spliced(l, start, l->pos); 384 t.v.str = t.spelling; 385 return t; 386 } 387 } 388 389 /* Local-label identifier: a `.L`-prefixed symbol name (the universal GNU 390 * convention for assembler-local labels, e.g. `.Lkit_ro.0`, `.L.str`, 391 * `.LBB0_1`). Lexed as a single ASM_TOK_IDENT — including the leading dot 392 * and any embedded dots — so it flows through the same operand / label / 393 * `.type` paths as an ordinary identifier. This is unambiguous against 394 * directives: no assembler directive begins with `.L`, so `.text`, 395 * `.section`, `.quad` etc. still tokenize as PUNCT('.') + IDENT and reach 396 * the directive dispatcher. Embedded `.` is consumed only when followed by 397 * another symbol char, so `.Lfoo, x` and `.Lfoo+4` stop at the delimiter. */ 398 if (ch == '.' && peek(l, 1) == 'L') { 399 bump(l); /* '.' */ 400 bump(l); /* 'L' */ 401 for (;;) { 402 int c = peek(l, 0); 403 if (is_alnum(c) || c == '$') { 404 bump(l); 405 } else if (c == '.' && (is_alnum(peek(l, 1)) || peek(l, 1) == '$' || 406 peek(l, 1) == '_')) { 407 bump(l); 408 } else { 409 break; 410 } 411 } 412 t.kind = ASM_TOK_IDENT; 413 t.spelling = intern_spliced(l, start, l->pos); 414 t.v.ident = t.spelling; 415 return t; 416 } 417 418 /* Identifier (§6.4.2). Encoding-prefix candidates above are matched 419 * before this since L/u/U followed by a quote is a literal, not an 420 * identifier. The grammar's identifier-nondigit covers letters, _, 421 * extended source chars (impl-defined; bytes ≥ 0x80 here), and UCNs 422 * (§6.4.3) — the latter span multiple source bytes so they're matched 423 * via ucn_len rather than the per-byte is_alpha predicate. */ 424 { 425 int u = ucn_len(l, 0); 426 if (is_alpha(ch) || u) { 427 if (u) { 428 int i; 429 for (i = 0; i < u; ++i) bump(l); 430 } else 431 bump(l); 432 for (;;) { 433 int c = peek(l, 0); 434 if (is_alnum(c)) { 435 bump(l); 436 } else if (c == '.' && is_digit(peek(l, 1))) { 437 /* Discriminator-mangled symbol: `name.N` (static locals, lambda / 438 * block-scope renaming, e.g. `acc.1`). A `.` followed by a digit 439 * continues the identifier. Restricted to `.`+digit so it never 440 * swallows a `.`-led mnemonic suffix (`b.eq`, `fcvt.w.s`) or the 441 * `.size foo, .-foo` location-counter dot. */ 442 bump(l); 443 } else if ((u = ucn_len(l, 0))) { 444 int i; 445 for (i = 0; i < u; ++i) bump(l); 446 } else { 447 break; 448 } 449 } 450 t.kind = ASM_TOK_IDENT; 451 t.spelling = intern_spliced(l, start, l->pos); 452 t.v.ident = t.spelling; 453 return t; 454 } 455 } 456 457 /* Preprocessor-number shaped token, classified to ASM_TOK_NUM / 458 * ASM_TOK_FLT for expression diagnostics and future directive support. */ 459 if (is_digit(ch) || (ch == '.' && is_digit(peek(l, 1)))) { 460 size_t plen; 461 char* pbuf; 462 size_t i, k; 463 scan_pp_number(l); 464 /* Classify on the post-splice text (the spelling we'll intern). */ 465 plen = l->pos - start; 466 pbuf = (char*)l->heap->alloc(l->heap, plen ? plen : 1, 1); 467 k = 0; 468 for (i = start; i < l->pos;) { 469 if (i + 1 < l->pos && l->src[i] == '\\' && l->src[i + 1] == '\n') { 470 i += 2; 471 continue; 472 } 473 pbuf[k++] = l->src[i++]; 474 } 475 t.kind = (u16)(pp_number_is_float(pbuf, k) ? ASM_TOK_FLT : ASM_TOK_NUM); 476 /* Preserve common C-style integer/float suffixes in token flags. The 477 * current assembler expression evaluator ignores them, but keeping the 478 * spelling metadata makes the lexer useful for future directive work. */ 479 if (t.kind == ASM_TOK_FLT) { 480 size_t j = k; 481 while (j > 0) { 482 char c = pbuf[j - 1]; 483 if (c == 'f' || c == 'F') { 484 t.flags |= ASM_TF_FLT_F; 485 --j; 486 continue; 487 } 488 if (c == 'l' || c == 'L') { 489 t.flags |= ASM_TF_FLT_L; 490 --j; 491 continue; 492 } 493 break; 494 } 495 } else { 496 size_t j = k; 497 while (j > 0) { 498 char c = pbuf[j - 1]; 499 if (c == 'u' || c == 'U') { 500 t.flags |= ASM_TF_INT_U; 501 --j; 502 continue; 503 } 504 if (c == 'l' || c == 'L') { 505 if (j >= 2 && (pbuf[j - 2] == 'l' || pbuf[j - 2] == 'L')) { 506 t.flags |= ASM_TF_INT_LL; 507 j -= 2; 508 } else { 509 t.flags |= ASM_TF_INT_L; 510 --j; 511 } 512 continue; 513 } 514 break; 515 } 516 } 517 t.spelling = pool_intern_slice(l->pool, (Slice){.s = pbuf, .len = k}); 518 l->heap->free(l->heap, pbuf, plen ? plen : 1); 519 return t; 520 } 521 522 /* Punctuator, longest match. `#` is a distinct token because it is both 523 * an asm immediate marker and, at BOL in preprocessed assembler, a line 524 * marker introducer. */ 525 { 526 int n0 = peek(l, 0); 527 int n1 = peek(l, 1); 528 int n2 = peek(l, 2); 529 int n3 = peek(l, 3); 530 int adv = 1; 531 u32 punct = ASM_P_NONE; 532 u16 kind = ASM_TOK_PUNCT; 533 int i; 534 535 switch (n0) { 536 case '#': 537 if (n1 == '#') { 538 adv = 2; 539 kind = ASM_TOK_HASH_HASH; 540 punct = ASM_P_HASH_HASH; 541 } else { 542 adv = 1; 543 kind = ASM_TOK_HASH; 544 punct = '#'; 545 } 546 break; 547 case '.': 548 if (n1 == '.' && n2 == '.') { 549 adv = 3; 550 punct = ASM_P_ELLIPSIS; 551 } else { 552 adv = 1; 553 punct = '.'; 554 } 555 break; 556 case '-': 557 if (n1 == '>') { 558 adv = 2; 559 punct = ASM_P_ARROW; 560 } else if (n1 == '-') { 561 adv = 2; 562 punct = ASM_P_DEC; 563 } else if (n1 == '=') { 564 adv = 2; 565 punct = ASM_P_SUB_ASSIGN; 566 } else { 567 adv = 1; 568 punct = '-'; 569 } 570 break; 571 case '+': 572 if (n1 == '+') { 573 adv = 2; 574 punct = ASM_P_INC; 575 } else if (n1 == '=') { 576 adv = 2; 577 punct = ASM_P_ADD_ASSIGN; 578 } else { 579 adv = 1; 580 punct = '+'; 581 } 582 break; 583 case '<': 584 if (n1 == '<' && n2 == '=') { 585 adv = 3; 586 punct = ASM_P_SHL_ASSIGN; 587 } else if (n1 == '<') { 588 adv = 2; 589 punct = ASM_P_SHL; 590 } else if (n1 == '=') { 591 adv = 2; 592 punct = ASM_P_LE; 593 } else if (n1 == ':') { 594 adv = 2; 595 punct = '['; 596 } /* digraph */ 597 else if (n1 == '%') { 598 adv = 2; 599 punct = '{'; 600 } /* digraph */ 601 else { 602 adv = 1; 603 punct = '<'; 604 } 605 break; 606 case '>': 607 if (n1 == '>' && n2 == '=') { 608 adv = 3; 609 punct = ASM_P_SHR_ASSIGN; 610 } else if (n1 == '>') { 611 adv = 2; 612 punct = ASM_P_SHR; 613 } else if (n1 == '=') { 614 adv = 2; 615 punct = ASM_P_GE; 616 } else { 617 adv = 1; 618 punct = '>'; 619 } 620 break; 621 case '=': 622 if (n1 == '=') { 623 adv = 2; 624 punct = ASM_P_EQ; 625 } else { 626 adv = 1; 627 punct = '='; 628 } 629 break; 630 case '!': 631 if (n1 == '=') { 632 adv = 2; 633 punct = ASM_P_NE; 634 } else { 635 adv = 1; 636 punct = '!'; 637 } 638 break; 639 case '&': 640 if (n1 == '&') { 641 adv = 2; 642 punct = ASM_P_AND; 643 } else if (n1 == '=') { 644 adv = 2; 645 punct = ASM_P_AND_ASSIGN; 646 } else { 647 adv = 1; 648 punct = '&'; 649 } 650 break; 651 case '|': 652 if (n1 == '|') { 653 adv = 2; 654 punct = ASM_P_OR; 655 } else if (n1 == '=') { 656 adv = 2; 657 punct = ASM_P_OR_ASSIGN; 658 } else { 659 adv = 1; 660 punct = '|'; 661 } 662 break; 663 case '^': 664 if (n1 == '=') { 665 adv = 2; 666 punct = ASM_P_XOR_ASSIGN; 667 } else { 668 adv = 1; 669 punct = '^'; 670 } 671 break; 672 case '*': 673 if (n1 == '=') { 674 adv = 2; 675 punct = ASM_P_MUL_ASSIGN; 676 } else { 677 adv = 1; 678 punct = '*'; 679 } 680 break; 681 case '/': 682 if (n1 == '=') { 683 adv = 2; 684 punct = ASM_P_DIV_ASSIGN; 685 } else { 686 adv = 1; 687 punct = '/'; 688 } 689 break; 690 case '%': 691 if (n1 == ':' && n2 == '%' && n3 == ':') { 692 adv = 4; 693 kind = ASM_TOK_HASH_HASH; 694 punct = ASM_P_HASH_HASH; 695 } else if (n1 == ':') { 696 adv = 2; 697 kind = ASM_TOK_HASH; 698 punct = '#'; 699 } else if (n1 == '=') { 700 adv = 2; 701 punct = ASM_P_MOD_ASSIGN; 702 } else if (n1 == '>') { 703 adv = 2; 704 punct = '}'; 705 } /* digraph */ 706 else { 707 adv = 1; 708 punct = '%'; 709 } 710 break; 711 case ':': 712 if (n1 == '>') { 713 adv = 2; 714 punct = ']'; 715 } /* digraph */ 716 else { 717 adv = 1; 718 punct = ':'; 719 } 720 break; 721 case '(': 722 case ')': 723 case '{': 724 case '}': 725 case '[': 726 case ']': 727 case ',': 728 case ';': 729 case '?': 730 case '~': 731 adv = 1; 732 punct = (u32)n0; 733 break; 734 default: 735 /* Unknown byte. Surface as a single-char punct so the token 736 * stream still progresses; PP/parse may diagnose. */ 737 adv = 1; 738 punct = (u32)n0; 739 break; 740 } 741 742 for (i = 0; i < adv; ++i) bump(l); 743 t.kind = kind; 744 t.v.punct = punct; 745 t.spelling = intern_spliced(l, start, l->pos); 746 return t; 747 } 748 }