link_script.c (26703B)
1 /* Linker-script parser: a minimal GNU-ld-subset front end that produces 2 * the structured KitLinkScript form documented in <kit/link.h>. The 3 * applicator (link_layout.c) consumes the structured form; this file 4 * never speaks ELF or layout. 5 * 6 * Subset (driven by the kernel.lds at the head of doc/DESIGN.md §13): 7 * ENTRY(symbol) 8 * SECTIONS { ... } 9 * . = expr 10 * name = expr 11 * name : [ALIGN(N)] { body } 12 * /DISCARD/ : { body } 13 * body items: *(p1 p2 ...), name = expr, . = expr 14 * exprs: int literal (dec / 0x), `.`, ident, parens, 15 * + - * / & | ^ << >>, 16 * ALIGN(align) (1-arg: aligns `.`, GNU form) 17 * ALIGN(val, align) (2-arg: aligns an explicit expr) 18 * slash-star comments; whitespace insensitive. 19 * 20 * Anything else (MEMORY, PROVIDE, KEEP, AT>, > REGION, OVERLAY, INSERT, 21 * OUTPUT_FORMAT, INPUT, GROUP, MAX, MIN, line comments, quoted strings, 22 * file patterns other than the implicit `*` of `*(...)`) is a parse 23 * error: emits a diagnostic and returns 1, leaving *out unchanged. 24 * 25 * Encoding contracts the applicator relies on: 26 * - /DISCARD/ is encoded as a KitLinkOutputSection with name 27 * "/DISCARD/" (a literal sentinel, not a parsed identifier). 28 * - An output section's `: ALIGN(N)` header is encoded as the first 29 * entry in its asns[]: a dot-assignment whose expr is ALIGN(., N). 30 * - `*(p1 p2 ...)` produces one KitLinkInputMatch per pattern with 31 * file_pattern = empty slice (implicit `*`) and section_pattern set. 32 * COMMON is parsed as a literal pattern "COMMON". 33 * 34 * Allocation: every node and string is owned by the compiler's tu arena. 35 * kit_link_script_free is therefore a no-op — the arena outlives the 36 * script and is collectively freed with the compiler. During parsing we 37 * grow temporary arrays on the host heap, then arena-copy at finish. 38 * 39 * Diagnostics: SourceManager registration of a script buffer is a future 40 * cleanup; for now diagnostics carry file_id = 0 and pack the byte 41 * offset into the SrcLoc.line field (col is computed inline). */ 42 43 #include <kit/core.h> 44 #include <kit/link.h> 45 #include <stdarg.h> 46 #include <stddef.h> 47 #include <string.h> 48 49 #include "core/arena.h" 50 #include "core/core.h" 51 #include "core/diag.h" 52 #include "core/heap.h" 53 #include "core/slice.h" 54 55 /* The public KitLinkScript has no place to carry its backing-arena 56 * pointer, so we allocate a fixed-shape owner block via heap and arena- 57 * init it inline. kit_link_script_free recovers the owner by stepping 58 * back from the script field to the wrapping struct. The arena's first 59 * member must match struct Arena (defined in core/arena.h). */ 60 typedef struct ScriptOwner { 61 Arena arena; 62 KitLinkScript script; 63 } ScriptOwner; 64 65 typedef struct LSP { 66 Arena* arena; 67 Heap* heap; 68 KitDiagSink* diag; 69 const char* src; 70 size_t len; 71 size_t pos; 72 /* one-bit error sticky: any diagnostic flips this and the parser 73 * unwinds without producing partial output. */ 74 int err; 75 } LSP; 76 77 /* ---- diagnostics ---- */ 78 79 static SrcLoc lsp_loc(const LSP* p, size_t off) { 80 /* TODO: register the script buffer with SourceManager so diagnostics 81 * carry a real file_id; until then encode the byte offset as `line` 82 * and recompute a 1-based line/col on demand. */ 83 SrcLoc l; 84 size_t i, line = 1, col = 1; 85 l.file_id = 0; 86 for (i = 0; i < off && i < p->len; ++i) { 87 if (p->src[i] == '\n') { 88 ++line; 89 col = 1; 90 } else { 91 ++col; 92 } 93 } 94 l.line = (u32)line; 95 l.col = (u32)col; 96 return l; 97 } 98 99 static void lsp_errf(LSP* p, size_t off, const char* fmt, ...) { 100 va_list ap; 101 if (!p->diag) { 102 p->err = 1; 103 return; 104 } 105 va_start(ap, fmt); 106 diag_emitv(p->diag, DIAG_ERROR, lsp_loc(p, off), fmt, ap); 107 va_end(ap); 108 p->err = 1; 109 } 110 111 /* ---- arena helpers ---- */ 112 113 static char* lsp_strdup(LSP* p, const char* s, size_t n) { 114 return arena_strdup(p->arena, s, n); 115 } 116 117 /* Arena-copy a span of the script text and return a KitSlice over the 118 * copy. The copy is NUL-terminated (arena_strdup), so consumers that hit 119 * a host boundary can use .s directly. */ 120 static KitSlice lsp_slice(LSP* p, const char* s, size_t n) { 121 KitSlice out; 122 out.s = lsp_strdup(p, s, n); 123 out.len = out.s ? n : 0; 124 return out; 125 } 126 127 static KitLinkExpr* lsp_new_expr(LSP* p) { 128 return arena_znew(p->arena, KitLinkExpr); 129 } 130 131 /* ---- heap-backed temp vectors (copied to the arena at finish) ---- */ 132 133 typedef struct VecAsn { 134 KitLinkAssignment* p; 135 u32 n, cap; 136 } VecAsn; 137 typedef struct VecMatch { 138 KitLinkInputMatch* p; 139 u32 n, cap; 140 } VecMatch; 141 typedef struct VecSec { 142 KitLinkOutputSection* p; 143 u32 n, cap; 144 } VecSec; 145 146 static int vec_reserve_(LSP* p, void** ptr, u32* cap, u32 want, size_t es) { 147 u32 nc; 148 void* nb; 149 if (*cap >= want) return 0; 150 nc = *cap ? *cap * 2 : 8; 151 while (nc < want) nc *= 2; 152 nb = p->heap->realloc(p->heap, *ptr, (size_t)*cap * es, (size_t)nc * es, 153 sizeof(void*)); 154 if (!nb) return 1; 155 *ptr = nb; 156 *cap = nc; 157 return 0; 158 } 159 160 #define VEC_PUSH(p, v, val) \ 161 (vec_reserve_((p), (void**)&(v).p, &(v).cap, (v).n + 1, sizeof(*(v).p)) \ 162 ? 1 \ 163 : ((v).p[(v).n++] = (val), 0)) 164 165 static void vec_free_(LSP* p, void* ptr, u32 cap, size_t es) { 166 if (ptr) p->heap->free(p->heap, ptr, (size_t)cap * es); 167 } 168 169 /* ---- lex primitives ---- */ 170 171 static int is_id_start(int c) { 172 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || 173 c == '.'; 174 } 175 static int is_id_cont(int c) { 176 return is_id_start(c) || (c >= '0' && c <= '9') || c == '-'; 177 } 178 179 static void skip_ws(LSP* p) { 180 while (p->pos < p->len) { 181 char ch = p->src[p->pos]; 182 if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { 183 ++p->pos; 184 continue; 185 } 186 if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '*') { 187 size_t start = p->pos; 188 p->pos += 2; 189 while (p->pos + 1 < p->len && 190 !(p->src[p->pos] == '*' && p->src[p->pos + 1] == '/')) { 191 ++p->pos; 192 } 193 if (p->pos + 1 >= p->len) { 194 lsp_errf(p, start, "unterminated /* comment"); 195 return; 196 } 197 p->pos += 2; 198 continue; 199 } 200 if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '/') { 201 lsp_errf(p, p->pos, "// line comments not supported"); 202 return; 203 } 204 break; 205 } 206 } 207 208 static int peek_ch(LSP* p) { 209 skip_ws(p); 210 if (p->err) return -1; 211 if (p->pos >= p->len) return -1; 212 return (unsigned char)p->src[p->pos]; 213 } 214 215 static int match_ch(LSP* p, char ch) { 216 skip_ws(p); 217 if (p->err) return 0; 218 if (p->pos < p->len && p->src[p->pos] == ch) { 219 ++p->pos; 220 return 1; 221 } 222 return 0; 223 } 224 225 static int expect_ch(LSP* p, char ch) { 226 if (match_ch(p, ch)) return 0; 227 lsp_errf(p, p->pos, "expected '%c'", ch); 228 return 1; 229 } 230 231 /* Lex an identifier-or-section-name token in place: returns a pointer 232 * into p->src and length via *out_len. Section names like .text.* and 233 * /DISCARD/ are handled by the section-name-aware variant below. */ 234 static int lex_ident(LSP* p, const char** out, size_t* out_len) { 235 size_t start; 236 skip_ws(p); 237 if (p->err) return 1; 238 if (p->pos >= p->len || !is_id_start((unsigned char)p->src[p->pos])) { 239 lsp_errf(p, p->pos, "expected identifier"); 240 return 1; 241 } 242 start = p->pos; 243 while (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) ++p->pos; 244 *out = p->src + start; 245 *out_len = p->pos - start; 246 return 0; 247 } 248 249 /* Match a literal keyword. Caller must have already peeked. */ 250 static int match_kw(LSP* p, const char* kw) { 251 size_t klen = slice_from_cstr(kw).len; 252 size_t save; 253 skip_ws(p); 254 if (p->err) return 0; 255 save = p->pos; 256 if (p->pos + klen > p->len) return 0; 257 if (memcmp(p->src + p->pos, kw, klen) != 0) return 0; 258 /* must not glue to a following id-cont character */ 259 if (p->pos + klen < p->len && 260 is_id_cont((unsigned char)p->src[p->pos + klen])) 261 return 0; 262 p->pos += klen; 263 (void)save; 264 return 1; 265 } 266 267 /* ---- expression parser (precedence climbing) ---- 268 * 269 * Levels (low -> high): 270 * 0: | 271 * 1: ^ 272 * 2: & 273 * 3: << >> 274 * 4: + - 275 * 5: * / 276 * 6: unary (none beyond parenthesized atoms here) 277 * atom: int | . | ALIGN(e,a) | ident | (expr) 278 */ 279 280 static KitLinkExpr* parse_expr(LSP* p); 281 282 static KitLinkExpr* parse_int(LSP* p) { 283 KitLinkExpr* e; 284 size_t start = p->pos; 285 i64 v = 0; 286 if (p->pos + 1 < p->len && p->src[p->pos] == '0' && 287 (p->src[p->pos + 1] == 'x' || p->src[p->pos + 1] == 'X')) { 288 p->pos += 2; 289 if (p->pos >= p->len) { 290 lsp_errf(p, start, "malformed hex literal"); 291 return NULL; 292 } 293 while (p->pos < p->len) { 294 char ch = p->src[p->pos]; 295 int d; 296 if (ch >= '0' && ch <= '9') 297 d = ch - '0'; 298 else if (ch >= 'a' && ch <= 'f') 299 d = 10 + (ch - 'a'); 300 else if (ch >= 'A' && ch <= 'F') 301 d = 10 + (ch - 'A'); 302 else 303 break; 304 v = (v << 4) | d; 305 ++p->pos; 306 } 307 if (p->pos == start + 2) { 308 lsp_errf(p, start, "empty hex literal"); 309 return NULL; 310 } 311 } else { 312 while (p->pos < p->len && p->src[p->pos] >= '0' && p->src[p->pos] <= '9') { 313 v = v * 10 + (p->src[p->pos] - '0'); 314 ++p->pos; 315 } 316 if (p->pos == start) { 317 lsp_errf(p, start, "expected integer"); 318 return NULL; 319 } 320 } 321 e = lsp_new_expr(p); 322 if (!e) return NULL; 323 e->kind = KIT_LE_INT; 324 e->v.int_val = v; 325 return e; 326 } 327 328 static KitLinkExpr* parse_atom(LSP* p) { 329 int ch; 330 skip_ws(p); 331 if (p->err) return NULL; 332 ch = peek_ch(p); 333 if (ch < 0) { 334 lsp_errf(p, p->pos, "unexpected end of expression"); 335 return NULL; 336 } 337 if (ch == '(') { 338 KitLinkExpr* e; 339 ++p->pos; 340 e = parse_expr(p); 341 if (!e) return NULL; 342 if (expect_ch(p, ')')) return NULL; 343 return e; 344 } 345 if (ch == '.') { 346 /* `.` only — bare dot, not a dotted ident. We disambiguate by 347 * looking at the next char: a digit/letter/underscore/dot here is a 348 * lex error in this subset (no .text in expression position). */ 349 size_t off = p->pos; 350 ++p->pos; 351 if (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) { 352 lsp_errf(p, off, "dotted identifiers not allowed in expressions"); 353 return NULL; 354 } 355 { 356 KitLinkExpr* e = lsp_new_expr(p); 357 if (!e) return NULL; 358 e->kind = KIT_LE_DOT; 359 return e; 360 } 361 } 362 if (ch >= '0' && ch <= '9') return parse_int(p); 363 if (is_id_start(ch)) { 364 /* either ALIGN(...) or a symbol reference */ 365 if (match_kw(p, "ALIGN")) { 366 /* Two forms, matching GNU ld: 367 * ALIGN(align) — align the current location `.` (val defaults 368 * to dot); the common `. = ALIGN(N)` idiom. 369 * ALIGN(val, align) — align an explicit expression. */ 370 KitLinkExpr *val, *aln, *e; 371 if (expect_ch(p, '(')) return NULL; 372 val = parse_expr(p); 373 if (!val) return NULL; 374 skip_ws(p); 375 if (p->pos < p->len && p->src[p->pos] == ',') { 376 ++p->pos; 377 aln = parse_expr(p); 378 if (!aln) return NULL; 379 } else { 380 /* 1-arg form: the parsed expr is the alignment; val is `.`. */ 381 aln = val; 382 val = lsp_new_expr(p); 383 if (!val) return NULL; 384 val->kind = KIT_LE_DOT; 385 } 386 if (expect_ch(p, ')')) return NULL; 387 e = lsp_new_expr(p); 388 if (!e) return NULL; 389 e->kind = KIT_LE_ALIGN; 390 e->v.align.val = val; 391 e->v.align.align = aln; 392 return e; 393 } 394 if (match_kw(p, "MAX") || match_kw(p, "MIN")) { 395 lsp_errf(p, p->pos, "MAX/MIN not supported in this subset"); 396 return NULL; 397 } 398 { 399 const char* s; 400 size_t n; 401 KitLinkExpr* e; 402 if (lex_ident(p, &s, &n)) return NULL; 403 e = lsp_new_expr(p); 404 if (!e) return NULL; 405 e->kind = KIT_LE_SYM; 406 e->v.name = lsp_slice(p, s, n); 407 return e; 408 } 409 } 410 lsp_errf(p, p->pos, "unexpected '%c' in expression", (char)ch); 411 return NULL; 412 } 413 414 /* Returns >=0 binding power for a binary operator at p->pos and 415 * advances past it; -1 if no binary operator at the lookahead. */ 416 static int try_take_binop(LSP* p, KitLinkExprKind* out_kind) { 417 int ch; 418 skip_ws(p); 419 if (p->err) return -1; 420 if (p->pos >= p->len) return -1; 421 ch = (unsigned char)p->src[p->pos]; 422 switch (ch) { 423 case '|': 424 ++p->pos; 425 *out_kind = KIT_LE_OR; 426 return 0; 427 case '^': 428 ++p->pos; 429 *out_kind = KIT_LE_XOR; 430 return 1; 431 case '&': 432 ++p->pos; 433 *out_kind = KIT_LE_AND; 434 return 2; 435 case '<': 436 if (p->pos + 1 < p->len && p->src[p->pos + 1] == '<') { 437 p->pos += 2; 438 *out_kind = KIT_LE_SHL; 439 return 3; 440 } 441 return -1; 442 case '>': 443 if (p->pos + 1 < p->len && p->src[p->pos + 1] == '>') { 444 p->pos += 2; 445 *out_kind = KIT_LE_SHR; 446 return 3; 447 } 448 return -1; 449 case '+': 450 ++p->pos; 451 *out_kind = KIT_LE_ADD; 452 return 4; 453 case '-': 454 ++p->pos; 455 *out_kind = KIT_LE_SUB; 456 return 4; 457 case '*': 458 ++p->pos; 459 *out_kind = KIT_LE_MUL; 460 return 5; 461 case '/': 462 /* Division. Block-comment and /DISCARD/ openers are filtered 463 * elsewhere: skip_ws eats slash-star comments, and /DISCARD/ is 464 * recognized by the SECTIONS-body loop before expression 465 * context. */ 466 ++p->pos; 467 *out_kind = KIT_LE_DIV; 468 return 5; 469 default: 470 return -1; 471 } 472 } 473 474 static KitLinkExpr* parse_binop_rhs(LSP* p, int min_bp, KitLinkExpr* lhs) { 475 while (!p->err) { 476 size_t save; 477 KitLinkExprKind k; 478 int bp; 479 skip_ws(p); 480 if (p->err) return NULL; 481 save = p->pos; 482 bp = try_take_binop(p, &k); 483 if (bp < 0) return lhs; 484 if (bp < min_bp) { 485 p->pos = save; 486 return lhs; 487 } 488 { 489 KitLinkExpr* rhs = parse_atom(p); 490 KitLinkExpr* node; 491 if (!rhs) return NULL; 492 rhs = parse_binop_rhs(p, bp + 1, rhs); 493 if (!rhs) return NULL; 494 node = lsp_new_expr(p); 495 if (!node) return NULL; 496 node->kind = (uint8_t)k; 497 node->v.bin.lhs = lhs; 498 node->v.bin.rhs = rhs; 499 lhs = node; 500 } 501 } 502 return NULL; 503 } 504 505 static KitLinkExpr* parse_expr(LSP* p) { 506 KitLinkExpr* lhs = parse_atom(p); 507 if (!lhs) return NULL; 508 return parse_binop_rhs(p, 0, lhs); 509 } 510 511 /* ---- assignment helpers ---- */ 512 513 static int push_dot_align(LSP* p, VecAsn* asns, KitLinkExpr* align_n) { 514 KitLinkExpr* dot; 515 KitLinkExpr* aln; 516 KitLinkAssignment a; 517 dot = lsp_new_expr(p); 518 if (!dot) return 1; 519 dot->kind = KIT_LE_DOT; 520 aln = lsp_new_expr(p); 521 if (!aln) return 1; 522 aln->kind = KIT_LE_ALIGN; 523 aln->v.align.val = dot; 524 aln->v.align.align = align_n; 525 a.kind = KIT_LAS_DOT; 526 a.sym = KIT_SLICE_NULL; 527 a.expr = aln; 528 return VEC_PUSH(p, *asns, a); 529 } 530 531 /* ---- output section body ---- */ 532 533 static int parse_input_matchers(LSP* p, VecMatch* out) { 534 /* opening `*` already consumed by caller. expect `(p1 p2 ...)` */ 535 if (expect_ch(p, '(')) return 1; 536 for (;;) { 537 int ch; 538 skip_ws(p); 539 if (p->err) return 1; 540 ch = peek_ch(p); 541 if (ch == ')') { 542 ++p->pos; 543 return 0; 544 } 545 if (ch < 0) { 546 lsp_errf(p, p->pos, "unterminated `*(...)`"); 547 return 1; 548 } 549 /* a pattern is a section-name-like run: id-start chars plus '*'. */ 550 { 551 size_t start; 552 const char* s; 553 size_t n; 554 KitLinkInputMatch m; 555 start = p->pos; 556 while (p->pos < p->len) { 557 char c = p->src[p->pos]; 558 if (is_id_cont((unsigned char)c) || c == '*') 559 ++p->pos; 560 else 561 break; 562 } 563 n = p->pos - start; 564 if (n == 0) { 565 lsp_errf(p, p->pos, "expected section pattern"); 566 return 1; 567 } 568 s = p->src + start; 569 m.file_pattern = KIT_SLICE_NULL; 570 m.section_pattern = lsp_slice(p, s, n); 571 m.keep = 0; 572 if (VEC_PUSH(p, *out, m)) return 1; 573 } 574 } 575 } 576 577 static int parse_section_body(LSP* p, VecMatch* inputs, VecAsn* asns) { 578 if (expect_ch(p, '{')) return 1; 579 for (;;) { 580 int ch; 581 skip_ws(p); 582 if (p->err) return 1; 583 ch = peek_ch(p); 584 if (ch == '}') { 585 ++p->pos; 586 return 0; 587 } 588 if (ch < 0) { 589 lsp_errf(p, p->pos, "unterminated section body"); 590 return 1; 591 } 592 if (ch == '*') { 593 ++p->pos; 594 if (parse_input_matchers(p, inputs)) return 1; 595 continue; 596 } 597 if (ch == '.') { 598 /* `. = expr;` */ 599 size_t off = p->pos; 600 ++p->pos; 601 skip_ws(p); 602 if (p->err) return 1; 603 if (!match_ch(p, '=')) { 604 lsp_errf(p, off, "expected `. = expr` in section body"); 605 return 1; 606 } 607 { 608 KitLinkExpr* e = parse_expr(p); 609 KitLinkAssignment a; 610 if (!e) return 1; 611 if (!match_ch(p, ';')) { /* ; is optional but encouraged */ 612 } 613 a.kind = KIT_LAS_DOT; 614 a.sym = KIT_SLICE_NULL; 615 a.expr = e; 616 if (VEC_PUSH(p, *asns, a)) return 1; 617 } 618 continue; 619 } 620 if (is_id_start(ch)) { 621 /* sym = expr; */ 622 const char* s; 623 size_t n; 624 KitLinkExpr* e; 625 KitLinkAssignment a; 626 if (match_kw(p, "PROVIDE") || match_kw(p, "KEEP")) { 627 lsp_errf(p, p->pos, "PROVIDE/KEEP not supported in this subset"); 628 return 1; 629 } 630 if (lex_ident(p, &s, &n)) return 1; 631 skip_ws(p); 632 if (p->err) return 1; 633 if (!match_ch(p, '=')) { 634 lsp_errf(p, p->pos, "expected `=` after `%.*s`", (int)n, s); 635 return 1; 636 } 637 e = parse_expr(p); 638 if (!e) return 1; 639 (void)match_ch(p, ';'); 640 a.kind = KIT_LAS_SYM; 641 a.sym = lsp_slice(p, s, n); 642 a.expr = e; 643 if (VEC_PUSH(p, *asns, a)) return 1; 644 continue; 645 } 646 lsp_errf(p, p->pos, "unexpected '%c' in section body", (char)ch); 647 return 1; 648 } 649 } 650 651 /* ---- output section header ---- */ 652 653 static int parse_output_section(LSP* p, const char* name_buf, size_t name_len, 654 VecSec* sections) { 655 /* The `:` is the next non-ws char on entry. Header may carry 656 * `: ALIGN(N)` then `{ body }`. */ 657 KitLinkOutputSection sec; 658 VecMatch inputs = {0}; 659 VecAsn asns = {0}; 660 KitLinkExpr* align_n = NULL; 661 662 if (expect_ch(p, ':')) return 1; 663 skip_ws(p); 664 if (p->err) return 1; 665 if (match_kw(p, "ALIGN")) { 666 if (expect_ch(p, '(')) return 1; 667 align_n = parse_expr(p); 668 if (!align_n) return 1; 669 if (expect_ch(p, ')')) return 1; 670 } 671 /* Reject AT>, > REGION, >REGION before the body. */ 672 skip_ws(p); 673 if (p->err) return 1; 674 if (p->pos < p->len && 675 (p->src[p->pos] == '>' || (p->src[p->pos] == 'A' && match_kw(p, "AT")))) { 676 lsp_errf(p, p->pos, 677 "memory-region placement (>REGION / AT>) not supported"); 678 return 1; 679 } 680 681 /* Section header alignment is encoded as the first asn — applicator 682 * pulls it before processing inputs. */ 683 if (align_n) { 684 if (push_dot_align(p, &asns, align_n)) goto fail; 685 } 686 687 if (parse_section_body(p, &inputs, &asns)) goto fail; 688 689 /* Optional trailing `> REGION` / `AT> REGION` / `: NOLOAD` etc. — all 690 * unsupported. We allow an optional trailing `;` and nothing else. */ 691 (void)match_ch(p, ';'); 692 693 /* Materialize. */ 694 { 695 KitLinkInputMatch* arr_in = NULL; 696 KitLinkAssignment* arr_as = NULL; 697 if (inputs.n) { 698 arr_in = arena_array(p->arena, KitLinkInputMatch, inputs.n); 699 if (!arr_in) goto fail; 700 memcpy(arr_in, inputs.p, sizeof(*arr_in) * inputs.n); 701 } 702 if (asns.n) { 703 arr_as = arena_array(p->arena, KitLinkAssignment, asns.n); 704 if (!arr_as) goto fail; 705 memcpy(arr_as, asns.p, sizeof(*arr_as) * asns.n); 706 } 707 memset(&sec, 0, sizeof(sec)); 708 sec.name = lsp_slice(p, name_buf, name_len); 709 sec.inputs = arr_in; 710 sec.ninputs = inputs.n; 711 sec.asns = arr_as; 712 sec.nasns = asns.n; 713 } 714 715 vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p)); 716 vec_free_(p, asns.p, asns.cap, sizeof(*asns.p)); 717 718 return VEC_PUSH(p, *sections, sec); 719 720 fail: 721 vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p)); 722 vec_free_(p, asns.p, asns.cap, sizeof(*asns.p)); 723 return 1; 724 } 725 726 /* ---- SECTIONS{...} ---- */ 727 728 static int parse_sections_block(LSP* p, VecAsn* top_asns, VecSec* sections) { 729 if (expect_ch(p, '{')) return 1; 730 for (;;) { 731 int ch; 732 skip_ws(p); 733 if (p->err) return 1; 734 ch = peek_ch(p); 735 if (ch == '}') { 736 ++p->pos; 737 return 0; 738 } 739 if (ch < 0) { 740 lsp_errf(p, p->pos, "unterminated SECTIONS block"); 741 return 1; 742 } 743 /* /DISCARD/ : { body } */ 744 if (ch == '/') { 745 static const char kDiscard[] = "/DISCARD/"; 746 size_t klen = sizeof(kDiscard) - 1; 747 if (p->pos + klen <= p->len && 748 memcmp(p->src + p->pos, kDiscard, klen) == 0) { 749 p->pos += klen; 750 if (parse_output_section(p, kDiscard, klen, sections)) return 1; 751 continue; 752 } 753 lsp_errf(p, p->pos, "expected /DISCARD/ or section header"); 754 return 1; 755 } 756 /* `. = expr;` at SECTIONS top level */ 757 if (ch == '.') { 758 size_t off = p->pos; 759 /* Distinguish bare-dot (`. =`) from `.text :` head. Bare dot has 760 * no id-cont following. */ 761 if (p->pos + 1 < p->len && 762 is_id_cont((unsigned char)p->src[p->pos + 1])) { 763 /* falls through to identifier path */ 764 } else { 765 ++p->pos; 766 skip_ws(p); 767 if (p->err) return 1; 768 if (!match_ch(p, '=')) { 769 lsp_errf(p, off, "expected `. = expr`"); 770 return 1; 771 } 772 { 773 KitLinkExpr* e = parse_expr(p); 774 KitLinkAssignment a; 775 if (!e) return 1; 776 (void)match_ch(p, ';'); 777 a.kind = KIT_LAS_DOT; 778 a.sym = KIT_SLICE_NULL; 779 a.expr = e; 780 if (VEC_PUSH(p, *top_asns, a)) return 1; 781 } 782 continue; 783 } 784 } 785 if (is_id_start(ch)) { 786 /* either `name :` (output section) or `sym = expr;` */ 787 const char* s; 788 size_t n; 789 size_t name_off; 790 if (match_kw(p, "PROVIDE")) { 791 lsp_errf(p, p->pos, "PROVIDE not supported in this subset"); 792 return 1; 793 } 794 name_off = p->pos; 795 if (lex_ident(p, &s, &n)) return 1; 796 skip_ws(p); 797 if (p->err) return 1; 798 if (p->pos < p->len && p->src[p->pos] == ':') { 799 char* nm = lsp_strdup(p, s, n); 800 if (!nm) return 1; 801 if (parse_output_section(p, nm, n, sections)) return 1; 802 continue; 803 } 804 if (match_ch(p, '=')) { 805 KitLinkExpr* e = parse_expr(p); 806 KitLinkAssignment a; 807 if (!e) return 1; 808 (void)match_ch(p, ';'); 809 a.kind = KIT_LAS_SYM; 810 a.sym = lsp_slice(p, s, n); 811 a.expr = e; 812 if (VEC_PUSH(p, *top_asns, a)) return 1; 813 continue; 814 } 815 lsp_errf(p, name_off, 816 "expected `:` (output section) or `=` (assignment) after " 817 "`%.*s`", 818 (int)n, s); 819 return 1; 820 } 821 lsp_errf(p, p->pos, "unexpected '%c' in SECTIONS body", (char)ch); 822 return 1; 823 } 824 } 825 826 /* ---- top level ---- */ 827 828 static int parse_top(LSP* p, KitLinkScript* out) { 829 VecAsn top_asns = {0}; 830 VecSec sections = {0}; 831 KitSlice entry_name = KIT_SLICE_NULL; 832 int saw_sections = 0; 833 int rc = 1; 834 835 for (;;) { 836 int ch; 837 skip_ws(p); 838 if (p->err) goto done; 839 if (p->pos >= p->len) break; 840 ch = (unsigned char)p->src[p->pos]; 841 842 if (is_id_start(ch)) { 843 if (match_kw(p, "ENTRY")) { 844 const char* s; 845 size_t n; 846 if (expect_ch(p, '(')) goto done; 847 if (lex_ident(p, &s, &n)) goto done; 848 if (expect_ch(p, ')')) goto done; 849 (void)match_ch(p, ';'); 850 entry_name = lsp_slice(p, s, n); 851 if (!entry_name.s) goto done; 852 continue; 853 } 854 if (match_kw(p, "SECTIONS")) { 855 if (saw_sections) { 856 lsp_errf(p, p->pos, "duplicate SECTIONS block"); 857 goto done; 858 } 859 if (parse_sections_block(p, &top_asns, §ions)) goto done; 860 saw_sections = 1; 861 continue; 862 } 863 if (match_kw(p, "MEMORY") || match_kw(p, "OVERLAY") || 864 match_kw(p, "INSERT") || match_kw(p, "OUTPUT_FORMAT") || 865 match_kw(p, "OUTPUT_ARCH") || match_kw(p, "INPUT") || 866 match_kw(p, "GROUP") || match_kw(p, "VERSION") || 867 match_kw(p, "PROVIDE") || match_kw(p, "STARTUP") || 868 match_kw(p, "SEARCH_DIR") || match_kw(p, "TARGET")) { 869 lsp_errf(p, p->pos, 870 "directive not supported in this linker-script subset"); 871 goto done; 872 } 873 lsp_errf(p, p->pos, "unknown top-level directive"); 874 goto done; 875 } 876 lsp_errf(p, p->pos, "unexpected '%c' at top level", (char)ch); 877 goto done; 878 } 879 880 /* Materialize. */ 881 out->entry = entry_name; 882 out->regions = NULL; 883 out->nregions = 0; 884 out->top_asns = NULL; 885 out->ntop_asns = 0; 886 out->sections = NULL; 887 out->nsections = 0; 888 889 if (top_asns.n) { 890 KitLinkAssignment* a = arena_array(p->arena, KitLinkAssignment, top_asns.n); 891 if (!a) goto done; 892 memcpy(a, top_asns.p, sizeof(*a) * top_asns.n); 893 out->top_asns = a; 894 out->ntop_asns = top_asns.n; 895 } 896 if (sections.n) { 897 KitLinkOutputSection* s = 898 arena_array(p->arena, KitLinkOutputSection, sections.n); 899 if (!s) goto done; 900 memcpy(s, sections.p, sizeof(*s) * sections.n); 901 out->sections = s; 902 out->nsections = sections.n; 903 } 904 rc = 0; 905 906 done: 907 vec_free_(p, top_asns.p, top_asns.cap, sizeof(*top_asns.p)); 908 vec_free_(p, sections.p, sections.cap, sizeof(*sections.p)); 909 return rc; 910 } 911 912 /* ---- public API ---- */ 913 914 KitStatus kit_link_script_parse(const KitContext* ctx, KitSlice text, 915 KitLinkScript** out) { 916 ScriptOwner* owner; 917 LSP p; 918 int rc; 919 Heap* h; 920 921 if (!out) return KIT_INVALID; 922 *out = NULL; 923 if (!ctx || !ctx->heap || !text.s) return KIT_INVALID; 924 925 h = ctx->heap; 926 owner = (ScriptOwner*)h->alloc(h, sizeof(*owner), _Alignof(ScriptOwner)); 927 if (!owner) return KIT_NOMEM; 928 memset(owner, 0, sizeof(*owner)); 929 /* 16 KiB blocks: matches the linker's tu arena defaults and is plenty 930 * for the script subset we support. */ 931 arena_init(&owner->arena, h, 16u * 1024u); 932 933 memset(&p, 0, sizeof(p)); 934 p.arena = &owner->arena; 935 p.heap = h; 936 p.diag = ctx->diag; 937 p.src = text.s; 938 p.len = text.len; 939 940 rc = parse_top(&p, &owner->script); 941 if (rc != 0 || p.err) { 942 arena_fini(&owner->arena); 943 h->free(h, owner, sizeof(*owner)); 944 return KIT_ERR; 945 } 946 *out = &owner->script; 947 return KIT_OK; 948 } 949 950 void kit_link_script_free(const KitContext* ctx, KitLinkScript* s) { 951 ScriptOwner* owner; 952 Heap* h; 953 if (!ctx || !ctx->heap || !s) return; 954 owner = (ScriptOwner*)((char*)s - offsetof(ScriptOwner, script)); 955 h = ctx->heap; 956 arena_fini(&owner->arena); 957 h->free(h, owner, sizeof(*owner)); 958 }