kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link_script.c (26703B)


      1 /* Linker-script parser: a minimal GNU-ld-subset front end that produces
      2  * the structured KitLinkScript form documented in <kit/link.h>. The
      3  * applicator (link_layout.c) consumes the structured form; this file
      4  * never speaks ELF or layout.
      5  *
      6  * Subset (driven by the kernel.lds at the head of doc/DESIGN.md §13):
      7  *   ENTRY(symbol)
      8  *   SECTIONS { ... }
      9  *     . = expr
     10  *     name = expr
     11  *     name : [ALIGN(N)] { body }
     12  *     /DISCARD/ : { body }
     13  *   body items: *(p1 p2 ...), name = expr, . = expr
     14  *   exprs: int literal (dec / 0x), `.`, ident, parens,
     15  *          + - * / & | ^ << >>,
     16  *          ALIGN(align)        (1-arg: aligns `.`, GNU form)
     17  *          ALIGN(val, align)   (2-arg: aligns an explicit expr)
     18  *   slash-star comments; whitespace insensitive.
     19  *
     20  * Anything else (MEMORY, PROVIDE, KEEP, AT>, > REGION, OVERLAY, INSERT,
     21  * OUTPUT_FORMAT, INPUT, GROUP, MAX, MIN, line comments, quoted strings,
     22  * file patterns other than the implicit `*` of `*(...)`) is a parse
     23  * error: emits a diagnostic and returns 1, leaving *out unchanged.
     24  *
     25  * Encoding contracts the applicator relies on:
     26  *   - /DISCARD/ is encoded as a KitLinkOutputSection with name
     27  *     "/DISCARD/" (a literal sentinel, not a parsed identifier).
     28  *   - An output section's `: ALIGN(N)` header is encoded as the first
     29  *     entry in its asns[]: a dot-assignment whose expr is ALIGN(., N).
     30  *   - `*(p1 p2 ...)` produces one KitLinkInputMatch per pattern with
     31  *     file_pattern = empty slice (implicit `*`) and section_pattern set.
     32  *     COMMON is parsed as a literal pattern "COMMON".
     33  *
     34  * Allocation: every node and string is owned by the compiler's tu arena.
     35  * kit_link_script_free is therefore a no-op — the arena outlives the
     36  * script and is collectively freed with the compiler. During parsing we
     37  * grow temporary arrays on the host heap, then arena-copy at finish.
     38  *
     39  * Diagnostics: SourceManager registration of a script buffer is a future
     40  * cleanup; for now diagnostics carry file_id = 0 and pack the byte
     41  * offset into the SrcLoc.line field (col is computed inline). */
     42 
     43 #include <kit/core.h>
     44 #include <kit/link.h>
     45 #include <stdarg.h>
     46 #include <stddef.h>
     47 #include <string.h>
     48 
     49 #include "core/arena.h"
     50 #include "core/core.h"
     51 #include "core/diag.h"
     52 #include "core/heap.h"
     53 #include "core/slice.h"
     54 
     55 /* The public KitLinkScript has no place to carry its backing-arena
     56  * pointer, so we allocate a fixed-shape owner block via heap and arena-
     57  * init it inline. kit_link_script_free recovers the owner by stepping
     58  * back from the script field to the wrapping struct. The arena's first
     59  * member must match struct Arena (defined in core/arena.h). */
     60 typedef struct ScriptOwner {
     61   Arena arena;
     62   KitLinkScript script;
     63 } ScriptOwner;
     64 
     65 typedef struct LSP {
     66   Arena* arena;
     67   Heap* heap;
     68   KitDiagSink* diag;
     69   const char* src;
     70   size_t len;
     71   size_t pos;
     72   /* one-bit error sticky: any diagnostic flips this and the parser
     73    * unwinds without producing partial output. */
     74   int err;
     75 } LSP;
     76 
     77 /* ---- diagnostics ---- */
     78 
     79 static SrcLoc lsp_loc(const LSP* p, size_t off) {
     80   /* TODO: register the script buffer with SourceManager so diagnostics
     81    * carry a real file_id; until then encode the byte offset as `line`
     82    * and recompute a 1-based line/col on demand. */
     83   SrcLoc l;
     84   size_t i, line = 1, col = 1;
     85   l.file_id = 0;
     86   for (i = 0; i < off && i < p->len; ++i) {
     87     if (p->src[i] == '\n') {
     88       ++line;
     89       col = 1;
     90     } else {
     91       ++col;
     92     }
     93   }
     94   l.line = (u32)line;
     95   l.col = (u32)col;
     96   return l;
     97 }
     98 
     99 static void lsp_errf(LSP* p, size_t off, const char* fmt, ...) {
    100   va_list ap;
    101   if (!p->diag) {
    102     p->err = 1;
    103     return;
    104   }
    105   va_start(ap, fmt);
    106   diag_emitv(p->diag, DIAG_ERROR, lsp_loc(p, off), fmt, ap);
    107   va_end(ap);
    108   p->err = 1;
    109 }
    110 
    111 /* ---- arena helpers ---- */
    112 
    113 static char* lsp_strdup(LSP* p, const char* s, size_t n) {
    114   return arena_strdup(p->arena, s, n);
    115 }
    116 
    117 /* Arena-copy a span of the script text and return a KitSlice over the
    118  * copy. The copy is NUL-terminated (arena_strdup), so consumers that hit
    119  * a host boundary can use .s directly. */
    120 static KitSlice lsp_slice(LSP* p, const char* s, size_t n) {
    121   KitSlice out;
    122   out.s = lsp_strdup(p, s, n);
    123   out.len = out.s ? n : 0;
    124   return out;
    125 }
    126 
    127 static KitLinkExpr* lsp_new_expr(LSP* p) {
    128   return arena_znew(p->arena, KitLinkExpr);
    129 }
    130 
    131 /* ---- heap-backed temp vectors (copied to the arena at finish) ---- */
    132 
    133 typedef struct VecAsn {
    134   KitLinkAssignment* p;
    135   u32 n, cap;
    136 } VecAsn;
    137 typedef struct VecMatch {
    138   KitLinkInputMatch* p;
    139   u32 n, cap;
    140 } VecMatch;
    141 typedef struct VecSec {
    142   KitLinkOutputSection* p;
    143   u32 n, cap;
    144 } VecSec;
    145 
    146 static int vec_reserve_(LSP* p, void** ptr, u32* cap, u32 want, size_t es) {
    147   u32 nc;
    148   void* nb;
    149   if (*cap >= want) return 0;
    150   nc = *cap ? *cap * 2 : 8;
    151   while (nc < want) nc *= 2;
    152   nb = p->heap->realloc(p->heap, *ptr, (size_t)*cap * es, (size_t)nc * es,
    153                         sizeof(void*));
    154   if (!nb) return 1;
    155   *ptr = nb;
    156   *cap = nc;
    157   return 0;
    158 }
    159 
    160 #define VEC_PUSH(p, v, val)                                               \
    161   (vec_reserve_((p), (void**)&(v).p, &(v).cap, (v).n + 1, sizeof(*(v).p)) \
    162        ? 1                                                                \
    163        : ((v).p[(v).n++] = (val), 0))
    164 
    165 static void vec_free_(LSP* p, void* ptr, u32 cap, size_t es) {
    166   if (ptr) p->heap->free(p->heap, ptr, (size_t)cap * es);
    167 }
    168 
    169 /* ---- lex primitives ---- */
    170 
    171 static int is_id_start(int c) {
    172   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
    173          c == '.';
    174 }
    175 static int is_id_cont(int c) {
    176   return is_id_start(c) || (c >= '0' && c <= '9') || c == '-';
    177 }
    178 
    179 static void skip_ws(LSP* p) {
    180   while (p->pos < p->len) {
    181     char ch = p->src[p->pos];
    182     if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
    183       ++p->pos;
    184       continue;
    185     }
    186     if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '*') {
    187       size_t start = p->pos;
    188       p->pos += 2;
    189       while (p->pos + 1 < p->len &&
    190              !(p->src[p->pos] == '*' && p->src[p->pos + 1] == '/')) {
    191         ++p->pos;
    192       }
    193       if (p->pos + 1 >= p->len) {
    194         lsp_errf(p, start, "unterminated /* comment");
    195         return;
    196       }
    197       p->pos += 2;
    198       continue;
    199     }
    200     if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '/') {
    201       lsp_errf(p, p->pos, "// line comments not supported");
    202       return;
    203     }
    204     break;
    205   }
    206 }
    207 
    208 static int peek_ch(LSP* p) {
    209   skip_ws(p);
    210   if (p->err) return -1;
    211   if (p->pos >= p->len) return -1;
    212   return (unsigned char)p->src[p->pos];
    213 }
    214 
    215 static int match_ch(LSP* p, char ch) {
    216   skip_ws(p);
    217   if (p->err) return 0;
    218   if (p->pos < p->len && p->src[p->pos] == ch) {
    219     ++p->pos;
    220     return 1;
    221   }
    222   return 0;
    223 }
    224 
    225 static int expect_ch(LSP* p, char ch) {
    226   if (match_ch(p, ch)) return 0;
    227   lsp_errf(p, p->pos, "expected '%c'", ch);
    228   return 1;
    229 }
    230 
    231 /* Lex an identifier-or-section-name token in place: returns a pointer
    232  * into p->src and length via *out_len. Section names like .text.* and
    233  * /DISCARD/ are handled by the section-name-aware variant below. */
    234 static int lex_ident(LSP* p, const char** out, size_t* out_len) {
    235   size_t start;
    236   skip_ws(p);
    237   if (p->err) return 1;
    238   if (p->pos >= p->len || !is_id_start((unsigned char)p->src[p->pos])) {
    239     lsp_errf(p, p->pos, "expected identifier");
    240     return 1;
    241   }
    242   start = p->pos;
    243   while (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) ++p->pos;
    244   *out = p->src + start;
    245   *out_len = p->pos - start;
    246   return 0;
    247 }
    248 
    249 /* Match a literal keyword. Caller must have already peeked. */
    250 static int match_kw(LSP* p, const char* kw) {
    251   size_t klen = slice_from_cstr(kw).len;
    252   size_t save;
    253   skip_ws(p);
    254   if (p->err) return 0;
    255   save = p->pos;
    256   if (p->pos + klen > p->len) return 0;
    257   if (memcmp(p->src + p->pos, kw, klen) != 0) return 0;
    258   /* must not glue to a following id-cont character */
    259   if (p->pos + klen < p->len &&
    260       is_id_cont((unsigned char)p->src[p->pos + klen]))
    261     return 0;
    262   p->pos += klen;
    263   (void)save;
    264   return 1;
    265 }
    266 
    267 /* ---- expression parser (precedence climbing) ----
    268  *
    269  * Levels (low -> high):
    270  *   0: |
    271  *   1: ^
    272  *   2: &
    273  *   3: << >>
    274  *   4: + -
    275  *   5: * /
    276  *   6: unary (none beyond parenthesized atoms here)
    277  *   atom: int | . | ALIGN(e,a) | ident | (expr)
    278  */
    279 
    280 static KitLinkExpr* parse_expr(LSP* p);
    281 
    282 static KitLinkExpr* parse_int(LSP* p) {
    283   KitLinkExpr* e;
    284   size_t start = p->pos;
    285   i64 v = 0;
    286   if (p->pos + 1 < p->len && p->src[p->pos] == '0' &&
    287       (p->src[p->pos + 1] == 'x' || p->src[p->pos + 1] == 'X')) {
    288     p->pos += 2;
    289     if (p->pos >= p->len) {
    290       lsp_errf(p, start, "malformed hex literal");
    291       return NULL;
    292     }
    293     while (p->pos < p->len) {
    294       char ch = p->src[p->pos];
    295       int d;
    296       if (ch >= '0' && ch <= '9')
    297         d = ch - '0';
    298       else if (ch >= 'a' && ch <= 'f')
    299         d = 10 + (ch - 'a');
    300       else if (ch >= 'A' && ch <= 'F')
    301         d = 10 + (ch - 'A');
    302       else
    303         break;
    304       v = (v << 4) | d;
    305       ++p->pos;
    306     }
    307     if (p->pos == start + 2) {
    308       lsp_errf(p, start, "empty hex literal");
    309       return NULL;
    310     }
    311   } else {
    312     while (p->pos < p->len && p->src[p->pos] >= '0' && p->src[p->pos] <= '9') {
    313       v = v * 10 + (p->src[p->pos] - '0');
    314       ++p->pos;
    315     }
    316     if (p->pos == start) {
    317       lsp_errf(p, start, "expected integer");
    318       return NULL;
    319     }
    320   }
    321   e = lsp_new_expr(p);
    322   if (!e) return NULL;
    323   e->kind = KIT_LE_INT;
    324   e->v.int_val = v;
    325   return e;
    326 }
    327 
    328 static KitLinkExpr* parse_atom(LSP* p) {
    329   int ch;
    330   skip_ws(p);
    331   if (p->err) return NULL;
    332   ch = peek_ch(p);
    333   if (ch < 0) {
    334     lsp_errf(p, p->pos, "unexpected end of expression");
    335     return NULL;
    336   }
    337   if (ch == '(') {
    338     KitLinkExpr* e;
    339     ++p->pos;
    340     e = parse_expr(p);
    341     if (!e) return NULL;
    342     if (expect_ch(p, ')')) return NULL;
    343     return e;
    344   }
    345   if (ch == '.') {
    346     /* `.` only — bare dot, not a dotted ident. We disambiguate by
    347      * looking at the next char: a digit/letter/underscore/dot here is a
    348      * lex error in this subset (no .text in expression position). */
    349     size_t off = p->pos;
    350     ++p->pos;
    351     if (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) {
    352       lsp_errf(p, off, "dotted identifiers not allowed in expressions");
    353       return NULL;
    354     }
    355     {
    356       KitLinkExpr* e = lsp_new_expr(p);
    357       if (!e) return NULL;
    358       e->kind = KIT_LE_DOT;
    359       return e;
    360     }
    361   }
    362   if (ch >= '0' && ch <= '9') return parse_int(p);
    363   if (is_id_start(ch)) {
    364     /* either ALIGN(...) or a symbol reference */
    365     if (match_kw(p, "ALIGN")) {
    366       /* Two forms, matching GNU ld:
    367        *   ALIGN(align)        — align the current location `.` (val defaults
    368        *                         to dot); the common `. = ALIGN(N)` idiom.
    369        *   ALIGN(val, align)   — align an explicit expression. */
    370       KitLinkExpr *val, *aln, *e;
    371       if (expect_ch(p, '(')) return NULL;
    372       val = parse_expr(p);
    373       if (!val) return NULL;
    374       skip_ws(p);
    375       if (p->pos < p->len && p->src[p->pos] == ',') {
    376         ++p->pos;
    377         aln = parse_expr(p);
    378         if (!aln) return NULL;
    379       } else {
    380         /* 1-arg form: the parsed expr is the alignment; val is `.`. */
    381         aln = val;
    382         val = lsp_new_expr(p);
    383         if (!val) return NULL;
    384         val->kind = KIT_LE_DOT;
    385       }
    386       if (expect_ch(p, ')')) return NULL;
    387       e = lsp_new_expr(p);
    388       if (!e) return NULL;
    389       e->kind = KIT_LE_ALIGN;
    390       e->v.align.val = val;
    391       e->v.align.align = aln;
    392       return e;
    393     }
    394     if (match_kw(p, "MAX") || match_kw(p, "MIN")) {
    395       lsp_errf(p, p->pos, "MAX/MIN not supported in this subset");
    396       return NULL;
    397     }
    398     {
    399       const char* s;
    400       size_t n;
    401       KitLinkExpr* e;
    402       if (lex_ident(p, &s, &n)) return NULL;
    403       e = lsp_new_expr(p);
    404       if (!e) return NULL;
    405       e->kind = KIT_LE_SYM;
    406       e->v.name = lsp_slice(p, s, n);
    407       return e;
    408     }
    409   }
    410   lsp_errf(p, p->pos, "unexpected '%c' in expression", (char)ch);
    411   return NULL;
    412 }
    413 
    414 /* Returns >=0 binding power for a binary operator at p->pos and
    415  * advances past it; -1 if no binary operator at the lookahead. */
    416 static int try_take_binop(LSP* p, KitLinkExprKind* out_kind) {
    417   int ch;
    418   skip_ws(p);
    419   if (p->err) return -1;
    420   if (p->pos >= p->len) return -1;
    421   ch = (unsigned char)p->src[p->pos];
    422   switch (ch) {
    423     case '|':
    424       ++p->pos;
    425       *out_kind = KIT_LE_OR;
    426       return 0;
    427     case '^':
    428       ++p->pos;
    429       *out_kind = KIT_LE_XOR;
    430       return 1;
    431     case '&':
    432       ++p->pos;
    433       *out_kind = KIT_LE_AND;
    434       return 2;
    435     case '<':
    436       if (p->pos + 1 < p->len && p->src[p->pos + 1] == '<') {
    437         p->pos += 2;
    438         *out_kind = KIT_LE_SHL;
    439         return 3;
    440       }
    441       return -1;
    442     case '>':
    443       if (p->pos + 1 < p->len && p->src[p->pos + 1] == '>') {
    444         p->pos += 2;
    445         *out_kind = KIT_LE_SHR;
    446         return 3;
    447       }
    448       return -1;
    449     case '+':
    450       ++p->pos;
    451       *out_kind = KIT_LE_ADD;
    452       return 4;
    453     case '-':
    454       ++p->pos;
    455       *out_kind = KIT_LE_SUB;
    456       return 4;
    457     case '*':
    458       ++p->pos;
    459       *out_kind = KIT_LE_MUL;
    460       return 5;
    461     case '/':
    462       /* Division. Block-comment and /DISCARD/ openers are filtered
    463        * elsewhere: skip_ws eats slash-star comments, and /DISCARD/ is
    464        * recognized by the SECTIONS-body loop before expression
    465        * context. */
    466       ++p->pos;
    467       *out_kind = KIT_LE_DIV;
    468       return 5;
    469     default:
    470       return -1;
    471   }
    472 }
    473 
    474 static KitLinkExpr* parse_binop_rhs(LSP* p, int min_bp, KitLinkExpr* lhs) {
    475   while (!p->err) {
    476     size_t save;
    477     KitLinkExprKind k;
    478     int bp;
    479     skip_ws(p);
    480     if (p->err) return NULL;
    481     save = p->pos;
    482     bp = try_take_binop(p, &k);
    483     if (bp < 0) return lhs;
    484     if (bp < min_bp) {
    485       p->pos = save;
    486       return lhs;
    487     }
    488     {
    489       KitLinkExpr* rhs = parse_atom(p);
    490       KitLinkExpr* node;
    491       if (!rhs) return NULL;
    492       rhs = parse_binop_rhs(p, bp + 1, rhs);
    493       if (!rhs) return NULL;
    494       node = lsp_new_expr(p);
    495       if (!node) return NULL;
    496       node->kind = (uint8_t)k;
    497       node->v.bin.lhs = lhs;
    498       node->v.bin.rhs = rhs;
    499       lhs = node;
    500     }
    501   }
    502   return NULL;
    503 }
    504 
    505 static KitLinkExpr* parse_expr(LSP* p) {
    506   KitLinkExpr* lhs = parse_atom(p);
    507   if (!lhs) return NULL;
    508   return parse_binop_rhs(p, 0, lhs);
    509 }
    510 
    511 /* ---- assignment helpers ---- */
    512 
    513 static int push_dot_align(LSP* p, VecAsn* asns, KitLinkExpr* align_n) {
    514   KitLinkExpr* dot;
    515   KitLinkExpr* aln;
    516   KitLinkAssignment a;
    517   dot = lsp_new_expr(p);
    518   if (!dot) return 1;
    519   dot->kind = KIT_LE_DOT;
    520   aln = lsp_new_expr(p);
    521   if (!aln) return 1;
    522   aln->kind = KIT_LE_ALIGN;
    523   aln->v.align.val = dot;
    524   aln->v.align.align = align_n;
    525   a.kind = KIT_LAS_DOT;
    526   a.sym = KIT_SLICE_NULL;
    527   a.expr = aln;
    528   return VEC_PUSH(p, *asns, a);
    529 }
    530 
    531 /* ---- output section body ---- */
    532 
    533 static int parse_input_matchers(LSP* p, VecMatch* out) {
    534   /* opening `*` already consumed by caller. expect `(p1 p2 ...)` */
    535   if (expect_ch(p, '(')) return 1;
    536   for (;;) {
    537     int ch;
    538     skip_ws(p);
    539     if (p->err) return 1;
    540     ch = peek_ch(p);
    541     if (ch == ')') {
    542       ++p->pos;
    543       return 0;
    544     }
    545     if (ch < 0) {
    546       lsp_errf(p, p->pos, "unterminated `*(...)`");
    547       return 1;
    548     }
    549     /* a pattern is a section-name-like run: id-start chars plus '*'. */
    550     {
    551       size_t start;
    552       const char* s;
    553       size_t n;
    554       KitLinkInputMatch m;
    555       start = p->pos;
    556       while (p->pos < p->len) {
    557         char c = p->src[p->pos];
    558         if (is_id_cont((unsigned char)c) || c == '*')
    559           ++p->pos;
    560         else
    561           break;
    562       }
    563       n = p->pos - start;
    564       if (n == 0) {
    565         lsp_errf(p, p->pos, "expected section pattern");
    566         return 1;
    567       }
    568       s = p->src + start;
    569       m.file_pattern = KIT_SLICE_NULL;
    570       m.section_pattern = lsp_slice(p, s, n);
    571       m.keep = 0;
    572       if (VEC_PUSH(p, *out, m)) return 1;
    573     }
    574   }
    575 }
    576 
    577 static int parse_section_body(LSP* p, VecMatch* inputs, VecAsn* asns) {
    578   if (expect_ch(p, '{')) return 1;
    579   for (;;) {
    580     int ch;
    581     skip_ws(p);
    582     if (p->err) return 1;
    583     ch = peek_ch(p);
    584     if (ch == '}') {
    585       ++p->pos;
    586       return 0;
    587     }
    588     if (ch < 0) {
    589       lsp_errf(p, p->pos, "unterminated section body");
    590       return 1;
    591     }
    592     if (ch == '*') {
    593       ++p->pos;
    594       if (parse_input_matchers(p, inputs)) return 1;
    595       continue;
    596     }
    597     if (ch == '.') {
    598       /* `. = expr;` */
    599       size_t off = p->pos;
    600       ++p->pos;
    601       skip_ws(p);
    602       if (p->err) return 1;
    603       if (!match_ch(p, '=')) {
    604         lsp_errf(p, off, "expected `. = expr` in section body");
    605         return 1;
    606       }
    607       {
    608         KitLinkExpr* e = parse_expr(p);
    609         KitLinkAssignment a;
    610         if (!e) return 1;
    611         if (!match_ch(p, ';')) { /* ; is optional but encouraged */
    612         }
    613         a.kind = KIT_LAS_DOT;
    614         a.sym = KIT_SLICE_NULL;
    615         a.expr = e;
    616         if (VEC_PUSH(p, *asns, a)) return 1;
    617       }
    618       continue;
    619     }
    620     if (is_id_start(ch)) {
    621       /* sym = expr; */
    622       const char* s;
    623       size_t n;
    624       KitLinkExpr* e;
    625       KitLinkAssignment a;
    626       if (match_kw(p, "PROVIDE") || match_kw(p, "KEEP")) {
    627         lsp_errf(p, p->pos, "PROVIDE/KEEP not supported in this subset");
    628         return 1;
    629       }
    630       if (lex_ident(p, &s, &n)) return 1;
    631       skip_ws(p);
    632       if (p->err) return 1;
    633       if (!match_ch(p, '=')) {
    634         lsp_errf(p, p->pos, "expected `=` after `%.*s`", (int)n, s);
    635         return 1;
    636       }
    637       e = parse_expr(p);
    638       if (!e) return 1;
    639       (void)match_ch(p, ';');
    640       a.kind = KIT_LAS_SYM;
    641       a.sym = lsp_slice(p, s, n);
    642       a.expr = e;
    643       if (VEC_PUSH(p, *asns, a)) return 1;
    644       continue;
    645     }
    646     lsp_errf(p, p->pos, "unexpected '%c' in section body", (char)ch);
    647     return 1;
    648   }
    649 }
    650 
    651 /* ---- output section header ---- */
    652 
    653 static int parse_output_section(LSP* p, const char* name_buf, size_t name_len,
    654                                 VecSec* sections) {
    655   /* The `:` is the next non-ws char on entry. Header may carry
    656    * `: ALIGN(N)` then `{ body }`. */
    657   KitLinkOutputSection sec;
    658   VecMatch inputs = {0};
    659   VecAsn asns = {0};
    660   KitLinkExpr* align_n = NULL;
    661 
    662   if (expect_ch(p, ':')) return 1;
    663   skip_ws(p);
    664   if (p->err) return 1;
    665   if (match_kw(p, "ALIGN")) {
    666     if (expect_ch(p, '(')) return 1;
    667     align_n = parse_expr(p);
    668     if (!align_n) return 1;
    669     if (expect_ch(p, ')')) return 1;
    670   }
    671   /* Reject AT>, > REGION, >REGION before the body. */
    672   skip_ws(p);
    673   if (p->err) return 1;
    674   if (p->pos < p->len &&
    675       (p->src[p->pos] == '>' || (p->src[p->pos] == 'A' && match_kw(p, "AT")))) {
    676     lsp_errf(p, p->pos,
    677              "memory-region placement (>REGION / AT>) not supported");
    678     return 1;
    679   }
    680 
    681   /* Section header alignment is encoded as the first asn — applicator
    682    * pulls it before processing inputs. */
    683   if (align_n) {
    684     if (push_dot_align(p, &asns, align_n)) goto fail;
    685   }
    686 
    687   if (parse_section_body(p, &inputs, &asns)) goto fail;
    688 
    689   /* Optional trailing `> REGION` / `AT> REGION` / `: NOLOAD` etc. — all
    690    * unsupported. We allow an optional trailing `;` and nothing else. */
    691   (void)match_ch(p, ';');
    692 
    693   /* Materialize. */
    694   {
    695     KitLinkInputMatch* arr_in = NULL;
    696     KitLinkAssignment* arr_as = NULL;
    697     if (inputs.n) {
    698       arr_in = arena_array(p->arena, KitLinkInputMatch, inputs.n);
    699       if (!arr_in) goto fail;
    700       memcpy(arr_in, inputs.p, sizeof(*arr_in) * inputs.n);
    701     }
    702     if (asns.n) {
    703       arr_as = arena_array(p->arena, KitLinkAssignment, asns.n);
    704       if (!arr_as) goto fail;
    705       memcpy(arr_as, asns.p, sizeof(*arr_as) * asns.n);
    706     }
    707     memset(&sec, 0, sizeof(sec));
    708     sec.name = lsp_slice(p, name_buf, name_len);
    709     sec.inputs = arr_in;
    710     sec.ninputs = inputs.n;
    711     sec.asns = arr_as;
    712     sec.nasns = asns.n;
    713   }
    714 
    715   vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p));
    716   vec_free_(p, asns.p, asns.cap, sizeof(*asns.p));
    717 
    718   return VEC_PUSH(p, *sections, sec);
    719 
    720 fail:
    721   vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p));
    722   vec_free_(p, asns.p, asns.cap, sizeof(*asns.p));
    723   return 1;
    724 }
    725 
    726 /* ---- SECTIONS{...} ---- */
    727 
    728 static int parse_sections_block(LSP* p, VecAsn* top_asns, VecSec* sections) {
    729   if (expect_ch(p, '{')) return 1;
    730   for (;;) {
    731     int ch;
    732     skip_ws(p);
    733     if (p->err) return 1;
    734     ch = peek_ch(p);
    735     if (ch == '}') {
    736       ++p->pos;
    737       return 0;
    738     }
    739     if (ch < 0) {
    740       lsp_errf(p, p->pos, "unterminated SECTIONS block");
    741       return 1;
    742     }
    743     /* /DISCARD/ : { body } */
    744     if (ch == '/') {
    745       static const char kDiscard[] = "/DISCARD/";
    746       size_t klen = sizeof(kDiscard) - 1;
    747       if (p->pos + klen <= p->len &&
    748           memcmp(p->src + p->pos, kDiscard, klen) == 0) {
    749         p->pos += klen;
    750         if (parse_output_section(p, kDiscard, klen, sections)) return 1;
    751         continue;
    752       }
    753       lsp_errf(p, p->pos, "expected /DISCARD/ or section header");
    754       return 1;
    755     }
    756     /* `. = expr;` at SECTIONS top level */
    757     if (ch == '.') {
    758       size_t off = p->pos;
    759       /* Distinguish bare-dot (`. =`) from `.text :` head. Bare dot has
    760        * no id-cont following. */
    761       if (p->pos + 1 < p->len &&
    762           is_id_cont((unsigned char)p->src[p->pos + 1])) {
    763         /* falls through to identifier path */
    764       } else {
    765         ++p->pos;
    766         skip_ws(p);
    767         if (p->err) return 1;
    768         if (!match_ch(p, '=')) {
    769           lsp_errf(p, off, "expected `. = expr`");
    770           return 1;
    771         }
    772         {
    773           KitLinkExpr* e = parse_expr(p);
    774           KitLinkAssignment a;
    775           if (!e) return 1;
    776           (void)match_ch(p, ';');
    777           a.kind = KIT_LAS_DOT;
    778           a.sym = KIT_SLICE_NULL;
    779           a.expr = e;
    780           if (VEC_PUSH(p, *top_asns, a)) return 1;
    781         }
    782         continue;
    783       }
    784     }
    785     if (is_id_start(ch)) {
    786       /* either `name :` (output section) or `sym = expr;` */
    787       const char* s;
    788       size_t n;
    789       size_t name_off;
    790       if (match_kw(p, "PROVIDE")) {
    791         lsp_errf(p, p->pos, "PROVIDE not supported in this subset");
    792         return 1;
    793       }
    794       name_off = p->pos;
    795       if (lex_ident(p, &s, &n)) return 1;
    796       skip_ws(p);
    797       if (p->err) return 1;
    798       if (p->pos < p->len && p->src[p->pos] == ':') {
    799         char* nm = lsp_strdup(p, s, n);
    800         if (!nm) return 1;
    801         if (parse_output_section(p, nm, n, sections)) return 1;
    802         continue;
    803       }
    804       if (match_ch(p, '=')) {
    805         KitLinkExpr* e = parse_expr(p);
    806         KitLinkAssignment a;
    807         if (!e) return 1;
    808         (void)match_ch(p, ';');
    809         a.kind = KIT_LAS_SYM;
    810         a.sym = lsp_slice(p, s, n);
    811         a.expr = e;
    812         if (VEC_PUSH(p, *top_asns, a)) return 1;
    813         continue;
    814       }
    815       lsp_errf(p, name_off,
    816                "expected `:` (output section) or `=` (assignment) after "
    817                "`%.*s`",
    818                (int)n, s);
    819       return 1;
    820     }
    821     lsp_errf(p, p->pos, "unexpected '%c' in SECTIONS body", (char)ch);
    822     return 1;
    823   }
    824 }
    825 
    826 /* ---- top level ---- */
    827 
    828 static int parse_top(LSP* p, KitLinkScript* out) {
    829   VecAsn top_asns = {0};
    830   VecSec sections = {0};
    831   KitSlice entry_name = KIT_SLICE_NULL;
    832   int saw_sections = 0;
    833   int rc = 1;
    834 
    835   for (;;) {
    836     int ch;
    837     skip_ws(p);
    838     if (p->err) goto done;
    839     if (p->pos >= p->len) break;
    840     ch = (unsigned char)p->src[p->pos];
    841 
    842     if (is_id_start(ch)) {
    843       if (match_kw(p, "ENTRY")) {
    844         const char* s;
    845         size_t n;
    846         if (expect_ch(p, '(')) goto done;
    847         if (lex_ident(p, &s, &n)) goto done;
    848         if (expect_ch(p, ')')) goto done;
    849         (void)match_ch(p, ';');
    850         entry_name = lsp_slice(p, s, n);
    851         if (!entry_name.s) goto done;
    852         continue;
    853       }
    854       if (match_kw(p, "SECTIONS")) {
    855         if (saw_sections) {
    856           lsp_errf(p, p->pos, "duplicate SECTIONS block");
    857           goto done;
    858         }
    859         if (parse_sections_block(p, &top_asns, &sections)) goto done;
    860         saw_sections = 1;
    861         continue;
    862       }
    863       if (match_kw(p, "MEMORY") || match_kw(p, "OVERLAY") ||
    864           match_kw(p, "INSERT") || match_kw(p, "OUTPUT_FORMAT") ||
    865           match_kw(p, "OUTPUT_ARCH") || match_kw(p, "INPUT") ||
    866           match_kw(p, "GROUP") || match_kw(p, "VERSION") ||
    867           match_kw(p, "PROVIDE") || match_kw(p, "STARTUP") ||
    868           match_kw(p, "SEARCH_DIR") || match_kw(p, "TARGET")) {
    869         lsp_errf(p, p->pos,
    870                  "directive not supported in this linker-script subset");
    871         goto done;
    872       }
    873       lsp_errf(p, p->pos, "unknown top-level directive");
    874       goto done;
    875     }
    876     lsp_errf(p, p->pos, "unexpected '%c' at top level", (char)ch);
    877     goto done;
    878   }
    879 
    880   /* Materialize. */
    881   out->entry = entry_name;
    882   out->regions = NULL;
    883   out->nregions = 0;
    884   out->top_asns = NULL;
    885   out->ntop_asns = 0;
    886   out->sections = NULL;
    887   out->nsections = 0;
    888 
    889   if (top_asns.n) {
    890     KitLinkAssignment* a = arena_array(p->arena, KitLinkAssignment, top_asns.n);
    891     if (!a) goto done;
    892     memcpy(a, top_asns.p, sizeof(*a) * top_asns.n);
    893     out->top_asns = a;
    894     out->ntop_asns = top_asns.n;
    895   }
    896   if (sections.n) {
    897     KitLinkOutputSection* s =
    898         arena_array(p->arena, KitLinkOutputSection, sections.n);
    899     if (!s) goto done;
    900     memcpy(s, sections.p, sizeof(*s) * sections.n);
    901     out->sections = s;
    902     out->nsections = sections.n;
    903   }
    904   rc = 0;
    905 
    906 done:
    907   vec_free_(p, top_asns.p, top_asns.cap, sizeof(*top_asns.p));
    908   vec_free_(p, sections.p, sections.cap, sizeof(*sections.p));
    909   return rc;
    910 }
    911 
    912 /* ---- public API ---- */
    913 
    914 KitStatus kit_link_script_parse(const KitContext* ctx, KitSlice text,
    915                                 KitLinkScript** out) {
    916   ScriptOwner* owner;
    917   LSP p;
    918   int rc;
    919   Heap* h;
    920 
    921   if (!out) return KIT_INVALID;
    922   *out = NULL;
    923   if (!ctx || !ctx->heap || !text.s) return KIT_INVALID;
    924 
    925   h = ctx->heap;
    926   owner = (ScriptOwner*)h->alloc(h, sizeof(*owner), _Alignof(ScriptOwner));
    927   if (!owner) return KIT_NOMEM;
    928   memset(owner, 0, sizeof(*owner));
    929   /* 16 KiB blocks: matches the linker's tu arena defaults and is plenty
    930    * for the script subset we support. */
    931   arena_init(&owner->arena, h, 16u * 1024u);
    932 
    933   memset(&p, 0, sizeof(p));
    934   p.arena = &owner->arena;
    935   p.heap = h;
    936   p.diag = ctx->diag;
    937   p.src = text.s;
    938   p.len = text.len;
    939 
    940   rc = parse_top(&p, &owner->script);
    941   if (rc != 0 || p.err) {
    942     arena_fini(&owner->arena);
    943     h->free(h, owner, sizeof(*owner));
    944     return KIT_ERR;
    945   }
    946   *out = &owner->script;
    947   return KIT_OK;
    948 }
    949 
    950 void kit_link_script_free(const KitContext* ctx, KitLinkScript* s) {
    951   ScriptOwner* owner;
    952   Heap* h;
    953   if (!ctx || !ctx->heap || !s) return;
    954   owner = (ScriptOwner*)((char*)s - offsetof(ScriptOwner, script));
    955   h = ctx->heap;
    956   arena_fini(&owner->arena);
    957   h->free(h, owner, sizeof(*owner));
    958 }