commit bf6d1df8204f07146354dce4f52030f65135cf08
parent 5336a9a232e0099fbfbbabb4eb2be3e4cab93c02
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 10:04:00 -0700
Move assembler parsing under src/asm
Diffstat:
20 files changed, 1983 insertions(+), 1230 deletions(-)
diff --git a/src/api/ar.c b/src/api/ar.c
@@ -1,6 +1,6 @@
/* POSIX ar archive reader/writer (cfree_ar_write / cfree_ar_iter /
* cfree_ar_list). Pure format I/O over CfreeWriter and a const byte
- * range — no pp/lex/parse/cg/obj dependencies. Kept in its own TU so
+ * range — no C frontend/cg/obj dependencies. Kept in its own TU so
* consumers that only need the ar surface (e.g. test/ar_test, the
* driver's `ar` and `ld` paths once split out) don't drag in the full
* compile/link pipeline through the linker.
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -7,12 +7,12 @@
#include <cfree.h>
#include "arch/arch.h"
+#include "asm/asm.h"
#include "core/arena.h"
#include "core/heap.h"
#include "core/pool.h"
#include "link/link.h"
#include "obj/obj.h"
-#include "parse/parse.h"
/* CfreeCompiler lifecycle (cfree_compiler_new / cfree_compiler_free)
* lives in src/api/lifecycle.c so consumers that only need lifecycle
@@ -46,13 +46,13 @@ static _Noreturn void panic_bad_options(Compiler* c, const char* msg) {
/* One-TU compile against a fresh ObjBuilder. The builder is finalized on
* exit so it is immediately consumable by the linker or an emit_* function.
- * The input bytes must outlive this call. Branches on input->lang: C goes
- * through the preprocessor + C parser + codegen; ASM bypasses pp/cg and
- * feeds tokens straight to the assembler. */
+ * The input bytes must outlive this call. Registered language frontends own
+ * their compile path; ASM remains the built-in fallback and feeds tokens
+ * straight to the assembler. */
static void compile_into(Compiler* c, const CfreeCompileOptions* opts,
const CfreeBytesInput* input, ObjBuilder* ob) {
CfreeCompileFn frontend = NULL;
- Lexer* lex;
+ AsmLexer* lex;
MCEmitter* mc;
if (input->lang < CFREE_LANG_COUNT) {
@@ -66,17 +66,19 @@ static void compile_into(Compiler* c, const CfreeCompileOptions* opts,
return;
}
- lex = lex_open_mem(c, input->name, (const char*)input->data, input->len);
- mc = mc_new(c, ob);
-
if (input->lang == CFREE_LANG_ASM) {
+ lex = asm_lex_open_mem(c, input->name, (const char*)input->data, input->len);
+ mc = mc_new(c, ob);
/* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */
- parse_asm(c, lex, mc);
+ asm_parse(c, lex, mc);
obj_finalize(ob);
mc_free(mc);
/* The assembler owns the lexer it was handed; no pp_free release. */
return;
}
+
+ compiler_panic(c, no_loc(), "no frontend registered for input language: %u",
+ (u32)input->lang);
}
/* Suffix-based language inference. See header. */
@@ -378,7 +380,7 @@ int cfree_pipeline_link_jit(CfreePipeline* p, const CfreeLinkOptions* opts,
* plus their detect_* helpers) live in src/api/detect.c — pure byte parsing,
* no internal-libcfree dependencies, kept separate so consumers that only
* detect inputs (e.g. cfree-roundtrip tests) don't drag this TU's
- * lex/pp/parse/cg/etc. dependencies in through the linker. */
+ * C frontend/cg/etc. dependencies in through the linker. */
#if 0 /* moved to src/api/detect.c */
CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len)
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -15,10 +15,10 @@
#include <cfree.h>
#include "arch/arch.h"
+#include "asm/asm.h"
#include "debug/debug.h"
#include "link/link.h"
#include "obj/obj.h"
-#include "parse/parse.h"
/* Internal panic stub used when a not-yet-implemented subsystem is invoked
* with a Compiler in hand. Public-API stubs that don't have a Compiler
@@ -30,7 +30,7 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) {
/* C preprocessing is owned by the C frontend. */
-/* parse_asm lives in src/parse/parse_asm.c. */
+/* asm_parse lives in src/asm/asm.c. */
/* mc_new / mc_free live in src/arch/mc.c.
* cgtarget_new / cgtarget_finalize / cgtarget_free live in src/arch/<target>.c
diff --git a/src/arch/aa64_asm.c b/src/arch/aa64_asm.c
@@ -23,9 +23,9 @@
#include "core/arena.h"
#include "core/pool.h"
#include "core/strbuf.h"
-#include "lex/lex.h"
+#include "asm/asm_lex.h"
#include "obj/obj.h"
-#include "parse/parse_asm_helpers.h"
+#include "asm/asm_helpers.h"
/* ---- public handle ---- */
@@ -73,7 +73,7 @@ void aa64_inline_bind(AA64Asm* a,
/* ---- helpers ---- */
-static int tok_punct(Tok t, u32 p) { return asm_driver_tok_is_punct(t, p); }
+static int tok_punct(AsmTok t, u32 p) { return asm_driver_tok_is_punct(t, p); }
static int icase_eq(const char* a, size_t an, const char* b) {
size_t i;
@@ -203,19 +203,19 @@ static int parse_fp_d_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) {
}
static AA64Reg parse_reg(AsmDriver* d) {
- Tok t = asm_driver_next(d);
+ AsmTok t = asm_driver_next(d);
AA64Reg r;
memset(&r, 0, sizeof r);
- if (t.kind != TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r))
+ if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r))
asm_driver_panic(d, "asm: expected register");
return r;
}
static AA64Reg parse_ldstp_reg(AsmDriver* d) {
- Tok t = asm_driver_next(d);
+ AsmTok t = asm_driver_next(d);
AA64Reg r;
memset(&r, 0, sizeof r);
- if (t.kind != TOK_IDENT ||
+ if (t.kind != ASM_TOK_IDENT ||
(!parse_reg_from_ident(d, t.v.ident, &r) &&
!parse_fp_d_reg_from_ident(d, t.v.ident, &r))) {
asm_driver_panic(d, "asm: expected register");
@@ -281,9 +281,9 @@ static int parse_cond_from_ident(AsmDriver* d, Sym ident, u32* out) {
}
static u32 parse_cond(AsmDriver* d, const char* what) {
- Tok t = asm_driver_next(d);
+ AsmTok t = asm_driver_next(d);
u32 cond = 0;
- if (t.kind != TOK_IDENT || !parse_cond_from_ident(d, t.v.ident, &cond))
+ if (t.kind != ASM_TOK_IDENT || !parse_cond_from_ident(d, t.v.ident, &cond))
asm_driver_panic(d, "asm: %s: expected condition code", what);
return cond;
}
@@ -333,8 +333,8 @@ static void p_nop(AsmDriver* d) {
* clrex [#imm4] ; option defaults to sy (15) when omitted */
static u32 parse_barrier_option(AsmDriver* d, int allow_dmb_ld_st) {
if (asm_driver_at_eol(d)) return AA64_BARRIER_OPT_SY;
- Tok t = asm_driver_peek(d);
- if (t.kind == TOK_IDENT) {
+ AsmTok t = asm_driver_peek(d);
+ if (t.kind == ASM_TOK_IDENT) {
(void)asm_driver_next(d);
size_t n = 0;
const char* s = pool_str(asm_driver_pool(d), t.v.ident, &n);
@@ -386,8 +386,8 @@ static void p_clrex(AsmDriver* d) {
static void p_mov(AsmDriver* d) {
AA64Reg rd = parse_reg(d);
expect_comma(d, "mov");
- Tok t = asm_driver_peek(d);
- if (t.kind == TOK_IDENT) {
+ AsmTok t = asm_driver_peek(d);
+ if (t.kind == ASM_TOK_IDENT) {
AA64Reg src;
memset(&src, 0, sizeof src);
if (parse_reg_from_ident(d, t.v.ident, &src)) {
@@ -455,8 +455,8 @@ static void p_movwide(AsmDriver* d, u32 opc) {
u32 hw = 0;
if (asm_driver_eat_comma(d)) {
/* lsl #N (N is 0/16/32/48). */
- Tok lid = asm_driver_next(d);
- if (lid.kind != TOK_IDENT)
+ AsmTok lid = asm_driver_next(d);
+ if (lid.kind != ASM_TOK_IDENT)
asm_driver_panic(d, "asm: expected 'lsl'");
size_t ln = 0;
const char* lp = pool_str(asm_driver_pool(d), lid.v.ident, &ln);
@@ -493,8 +493,8 @@ static void p_except(AsmDriver* d, u32 form) {
/* Read optional `, lsl|lsr|asr|ror #imm` shift modifier. Returns 1 if
* present. */
static int parse_shift_mod(AsmDriver* d, u32* shift_out, u32* imm6_out) {
- Tok t = asm_driver_peek(d);
- if (t.kind != TOK_IDENT) return 0;
+ AsmTok t = asm_driver_peek(d);
+ if (t.kind != ASM_TOK_IDENT) return 0;
size_t n = 0;
const char* p = pool_str(asm_driver_pool(d), t.v.ident, &n);
u32 sh;
@@ -523,8 +523,8 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) {
expect_comma(d, "add/sub");
AA64Reg rn = parse_reg(d);
expect_comma(d, "add/sub");
- Tok t = asm_driver_peek(d);
- if (tok_punct(t, '#') || t.kind == TOK_NUM || tok_punct(t, '-') ||
+ AsmTok t = asm_driver_peek(d);
+ if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') ||
tok_punct(t, '+')) {
/* immediate form */
if (rd.is64 != rn.is64)
@@ -538,8 +538,8 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) {
i64 imm = parse_imm_const(d);
u32 sh = 0;
if (asm_driver_eat_comma(d)) {
- Tok lid = asm_driver_next(d);
- if (lid.kind != TOK_IDENT)
+ AsmTok lid = asm_driver_next(d);
+ if (lid.kind != ASM_TOK_IDENT)
asm_driver_panic(d, "asm: expected 'lsl #12'");
size_t ln = 0;
const char* lp = pool_str(asm_driver_pool(d), lid.v.ident, &ln);
@@ -581,17 +581,17 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) {
static void p_cmp(AsmDriver* d, int is_neg /* cmn flips op */) {
AA64Reg rn = parse_reg(d);
expect_comma(d, "cmp");
- Tok t = asm_driver_peek(d);
- if (tok_punct(t, '#') || t.kind == TOK_NUM || tok_punct(t, '-') ||
+ AsmTok t = asm_driver_peek(d);
+ if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') ||
tok_punct(t, '+')) {
require_sp_spelling(d, rn, "cmp imm");
i64 imm = parse_imm_const(d);
u32 sh = 0;
if (asm_driver_eat_comma(d)) {
- Tok lid = asm_driver_next(d);
+ AsmTok lid = asm_driver_next(d);
size_t ln = 0;
const char* lp =
- (lid.kind == TOK_IDENT)
+ (lid.kind == ASM_TOK_IDENT)
? pool_str(asm_driver_pool(d), lid.v.ident, &ln)
: NULL;
if (!lp || !icase_eq(lp, ln, "lsl"))
@@ -1173,36 +1173,36 @@ static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text,
}
if (i == len) return;
- Lexer* lx = lex_open_mem(a->c, "<inline-asm>", text, len);
+ AsmLexer* lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len);
AsmDriver* d = asm_driver_open_inline(a->c, mc, lx);
/* The first non-trivial token must be the mnemonic identifier (or a
* `.directive`, but inline asm doesn't normally use directives — leave
* that path unsupported until needed). */
- Tok t = asm_driver_peek(d);
- while (t.kind == TOK_NEWLINE || t.kind == TOK_PP_HASH) {
+ AsmTok t = asm_driver_peek(d);
+ while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) {
(void)asm_driver_next(d);
- if (t.kind == TOK_PP_HASH) {
+ if (t.kind == ASM_TOK_HASH) {
/* Skip cpp linemarker rest of line. */
while (!asm_driver_at_eol(d)) (void)asm_driver_next(d);
}
t = asm_driver_peek(d);
}
- if (t.kind == TOK_EOF) {
+ if (t.kind == ASM_TOK_EOF) {
asm_driver_close_inline(d);
- lex_close(lx);
+ asm_lex_close(lx);
return;
}
- if (t.kind != TOK_IDENT)
+ if (t.kind != ASM_TOK_IDENT)
inline_panic(a, "expected mnemonic at start of inline asm line");
(void)asm_driver_next(d);
Sym mn = t.v.ident;
/* Compose `b.eq` etc. — same trick as the standalone driver. */
- Tok dot = asm_driver_peek(d);
+ AsmTok dot = asm_driver_peek(d);
if (asm_driver_tok_is_punct(dot, '.')) {
(void)asm_driver_next(d);
- Tok rest = asm_driver_next(d);
- if (rest.kind != TOK_IDENT)
+ AsmTok rest = asm_driver_next(d);
+ if (rest.kind != ASM_TOK_IDENT)
inline_panic(a, "composite mnemonic: expected ident after '.'");
size_t hn = 0, rn = 0;
const char* hp = pool_str(asm_driver_pool(d), mn, &hn);
@@ -1217,7 +1217,7 @@ static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text,
}
aa64_asm_insn(a, d, mn);
asm_driver_close_inline(d);
- lex_close(lx);
+ asm_lex_close(lx);
}
/* Substitute placeholders into one line's StrBuf, then dispatch.
diff --git a/src/arch/aa64_asm.h b/src/arch/aa64_asm.h
@@ -13,7 +13,7 @@
* resolution and label management live on the driver side. */
#include "core/core.h"
-#include "lex/lex.h"
+#include "asm/asm_lex.h"
typedef struct AsmDriver AsmDriver;
@@ -27,8 +27,8 @@ void aa64_asm_close(AA64Asm*);
/* Parse one mnemonic line. `mnemonic` is the first identifier on the
* line (or "b.cond" composite). The driver has already consumed the
* mnemonic identifier and any trailing dot-suffix. This function
- * consumes operands up to (but not including) the next TOK_NEWLINE or
- * TOK_EOF, and writes the encoded instruction(s) through the driver's
+ * consumes operands up to (but not including) the next ASM_TOK_NEWLINE or
+ * ASM_TOK_EOF, and writes the encoded instruction(s) through the driver's
* MCEmitter. Diagnostics on parse failure go through compiler_panic. */
void aa64_asm_insn(AA64Asm*, AsmDriver*, Sym mnemonic);
diff --git a/src/arch/mc.c b/src/arch/mc.c
@@ -1,6 +1,6 @@
/* Generic MCEmitter implementation.
*
- * MCEmitter sits between CGTarget (or parse_asm) and ObjBuilder. It owns
+ * MCEmitter sits between CGTarget (or asm_parse) and ObjBuilder. It owns
* the current section, byte position, machine label table, and forwards
* relocations / source-location stamps. Encoding is the caller's job —
* MCEmitter writes whatever bytes it's handed.
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -0,0 +1,1032 @@
+/* GNU-as compatible assembler driver — arch-agnostic.
+ *
+ * Reads tokens from an AsmLexer, dispatches directives, manages labels and
+ * section state, and forwards mnemonic lines to the per-arch instruction
+ * parser. Output goes through MCEmitter against an ObjBuilder.
+ *
+ * AsmLexer quirks worked around here:
+ * - `#` is both the immediate marker in asm and the token used for
+ * preprocessed-assembler line markers.
+ * `#` at BOL is a cpp linemarker → skip to next newline; elsewhere
+ * the per-arch parser treats it as the immediate prefix.
+ * - composite mnemonics (`b.eq`, `b.ne`, ...) arrive as IDENT '.' IDENT
+ * and are reassembled before dispatch.
+ * - `.text` etc. arrive as PUNCT('.') + IDENT and are stitched here.
+ *
+ * Symbol bookkeeping: a Sym→ObjSymId map records the symbols introduced
+ * by labels, `.globl`, and operand references so a forward reference
+ * (`b foo` before `foo:`) shares one symbol with its later definition.
+ * A second Sym→AsmEqu map carries `.set`/`.equ` constants. */
+
+#include "asm/asm.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "arch/aa64_asm.h"
+#include "arch/arch.h"
+#include "asm/asm_helpers.h"
+#include "asm/asm_lex.h"
+#include "core/arena.h"
+#include "core/hashmap.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+
+HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32);
+HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32);
+
+typedef struct AsmEqu {
+ i64 value;
+ ObjSymId sym; /* nonzero when value is `sym + offset` */
+ u8 has_sym;
+ u8 pad[3];
+} AsmEqu;
+HASHMAP_DEFINE(SymEquMap, Sym, AsmEqu, hash_u32);
+
+struct AsmDriver {
+ Compiler* c;
+ AsmLexer* lex;
+ MCEmitter* mc;
+ ObjBuilder* ob;
+ Pool* pool;
+ Heap* heap;
+
+ AsmTok cur;
+ int has_cur;
+
+ /* OBJ_SEC_NONE until first emit / explicit `.text` etc. */
+ ObjSecId cur_sec;
+
+ SymSecMap sec_map;
+ SymSymMap sym_map;
+ SymEquMap equ_map;
+
+ Sym n_text, n_data, n_rodata, n_bss;
+
+ /* Per-arch handle. Phase-3 ships aa64 only; phase-5 adds dispatch. */
+ AA64Asm* aa64;
+};
+
+/* ---- token plumbing ---- */
+
+static AsmTok d_peek(AsmDriver* d) {
+ if (!d->has_cur) {
+ d->cur = asm_lex_next(d->lex);
+ d->has_cur = 1;
+ }
+ return d->cur;
+}
+
+static AsmTok d_next(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ d->has_cur = 0;
+ return t;
+}
+
+static int d_is_eol(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ return t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_EOF;
+}
+
+static void d_skip_to_eol(AsmDriver* d) {
+ while (!d_is_eol(d)) (void)d_next(d);
+}
+
+static void d_eat_eol(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ if (t.kind == ASM_TOK_NEWLINE) (void)d_next(d);
+}
+
+static SrcLoc d_loc(AsmDriver* d) {
+ if (d->has_cur) return d->cur.loc;
+ return asm_lex_loc(d->lex);
+}
+
+_Noreturn static void d_panicf(AsmDriver* d, const char* fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ compiler_panicv(d->c, d_loc(d), fmt, ap);
+ /* unreachable; va_end omitted because compiler_panicv is _Noreturn */
+}
+
+/* ---- spelling helpers ---- */
+
+static const char* asm_str(AsmDriver* d, Sym s, size_t* nout) {
+ return pool_str(d->pool, s, nout);
+}
+
+static int sym_eq(AsmDriver* d, Sym s, const char* lit) {
+ size_t n = 0;
+ const char* p = asm_str(d, s, &n);
+ size_t i;
+ if (!p) return 0;
+ for (i = 0; i < n; ++i) {
+ if (!lit[i] || p[i] != lit[i]) return 0;
+ }
+ return lit[n] == '\0';
+}
+
+static int starts_with(AsmDriver* d, Sym s, const char* prefix) {
+ size_t n = 0;
+ const char* p = asm_str(d, s, &n);
+ size_t i;
+ if (!p) return 0;
+ for (i = 0; prefix[i]; ++i) {
+ if (i >= n || p[i] != prefix[i]) return 0;
+ }
+ return 1;
+}
+
+/* ---- section management ---- */
+
+static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
+ u32 align) {
+ ObjSecId* hit = SymSecMap_get(&d->sec_map, name);
+ if (hit) return *hit;
+ ObjSecId id = obj_section(d->ob, name, kind, flags, align);
+ SymSecMap_set(&d->sec_map, name, id);
+ return id;
+}
+
+static void set_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
+ u32 align) {
+ ObjSecId id = ensure_section(d, name, kind, flags, align);
+ d->cur_sec = id;
+ d->mc->set_section(d->mc, id);
+}
+
+/* ---- symbol management ---- */
+
+static ObjSymId intern_sym(AsmDriver* d, Sym name) {
+ ObjSymId* hit = SymSymMap_get(&d->sym_map, name);
+ if (hit) return *hit;
+ ObjSymId id = obj_symbol_find(d->ob, name);
+ if (id == OBJ_SYM_NONE) {
+ id = obj_symbol_ex(d->ob, name, SB_LOCAL, SV_DEFAULT, SK_NOTYPE,
+ OBJ_SEC_NONE, 0, 0, 0);
+ }
+ SymSymMap_set(&d->sym_map, name, id);
+ return id;
+}
+
+static ObjSym* sym_mut(AsmDriver* d, ObjSymId id) {
+ /* obj.h gives us a const view via obj_symbol_get; the underlying
+ * record lives in the builder's arena and is safe to mutate
+ * pre-finalize. Wrapping the cast keeps the const-stripping in
+ * one place. */
+ return (ObjSym*)obj_symbol_get(d->ob, id);
+}
+
+/* ---- expression evaluator (constants + sym ± const) ---- */
+
+typedef struct AsmExpr {
+ ObjSymId sym;
+ i64 value;
+} AsmExpr;
+
+static AsmExpr expr_c(i64 v) {
+ AsmExpr e = {OBJ_SYM_NONE, v};
+ return e;
+}
+static AsmExpr expr_s(ObjSymId s, i64 v) {
+ AsmExpr e = {s, v};
+ return e;
+}
+
+static int tok_is_punct(AsmTok t, u32 p) {
+ return t.kind == ASM_TOK_PUNCT && t.v.punct == p;
+}
+
+static i64 lit_to_i64(AsmDriver* d, Sym spelling) {
+ size_t n = 0;
+ const char* p = asm_str(d, spelling, &n);
+ u64 v = 0;
+ int base = 10;
+ size_t i = 0;
+ if (!p || !n) return 0;
+ if (n >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ base = 16;
+ i = 2;
+ } else if (n >= 2 && p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
+ base = 2;
+ i = 2;
+ } else if (n >= 1 && p[0] == '0') {
+ base = 8;
+ i = 1;
+ }
+ for (; i < n; ++i) {
+ char c = p[i];
+ u32 dv;
+ if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
+ if (c >= '0' && c <= '9')
+ dv = (u32)(c - '0');
+ else if (c >= 'a' && c <= 'f')
+ dv = 10 + (u32)(c - 'a');
+ else if (c >= 'A' && c <= 'F')
+ dv = 10 + (u32)(c - 'A');
+ else
+ d_panicf(d, "asm: bad digit in integer literal");
+ if (dv >= (u32)base) d_panicf(d, "asm: digit out of base");
+ v = v * (u64)base + dv;
+ }
+ return (i64)v;
+}
+
+static AsmExpr parse_expr(AsmDriver*);
+static AsmExpr parse_unary(AsmDriver*);
+
+static AsmExpr parse_primary(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ if (t.kind == ASM_TOK_NUM) {
+ (void)d_next(d);
+ return expr_c(lit_to_i64(d, t.spelling));
+ }
+ if (t.kind == ASM_TOK_IDENT) {
+ (void)d_next(d);
+ AsmEqu* eq = SymEquMap_get(&d->equ_map, t.v.ident);
+ if (eq) {
+ if (eq->has_sym) return expr_s(eq->sym, eq->value);
+ return expr_c(eq->value);
+ }
+ return expr_s(intern_sym(d, t.v.ident), 0);
+ }
+ if (tok_is_punct(t, '(')) {
+ (void)d_next(d);
+ AsmExpr e = parse_expr(d);
+ AsmTok cl = d_peek(d);
+ if (!tok_is_punct(cl, ')')) d_panicf(d, "asm: expected ')'");
+ (void)d_next(d);
+ return e;
+ }
+ d_panicf(d, "asm: expected expression");
+}
+
+static AsmExpr parse_unary(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ if (tok_is_punct(t, '-')) {
+ (void)d_next(d);
+ AsmExpr e = parse_unary(d);
+ if (e.sym) d_panicf(d, "asm: unary '-' on symbol");
+ return expr_c(-e.value);
+ }
+ if (tok_is_punct(t, '+')) {
+ (void)d_next(d);
+ return parse_unary(d);
+ }
+ if (tok_is_punct(t, '~')) {
+ (void)d_next(d);
+ AsmExpr e = parse_unary(d);
+ if (e.sym) d_panicf(d, "asm: unary '~' on symbol");
+ return expr_c(~e.value);
+ }
+ return parse_primary(d);
+}
+
+static AsmExpr parse_mul(AsmDriver* d) {
+ AsmExpr a = parse_unary(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '*') && !tok_is_punct(t, '/') && !tok_is_punct(t, '%'))
+ return a;
+ u32 op = t.v.punct;
+ (void)d_next(d);
+ AsmExpr b = parse_unary(d);
+ if (a.sym || b.sym) d_panicf(d, "asm: '*/%%' on symbolic operand");
+ if (op == '*')
+ a.value *= b.value;
+ else if (op == '/') {
+ if (!b.value) d_panicf(d, "asm: division by zero");
+ a.value /= b.value;
+ } else {
+ if (!b.value) d_panicf(d, "asm: modulo by zero");
+ a.value %= b.value;
+ }
+ }
+}
+
+static AsmExpr parse_add(AsmDriver* d) {
+ AsmExpr a = parse_mul(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '+') && !tok_is_punct(t, '-')) return a;
+ u32 op = t.v.punct;
+ (void)d_next(d);
+ AsmExpr b = parse_mul(d);
+ if (op == '+') {
+ if (a.sym && b.sym) d_panicf(d, "asm: cannot add two symbols");
+ if (b.sym) {
+ a.sym = b.sym;
+ a.value += b.value;
+ } else
+ a.value += b.value;
+ } else {
+ if (b.sym) d_panicf(d, "asm: cannot subtract symbol from constant");
+ a.value -= b.value;
+ }
+ }
+}
+
+static AsmExpr parse_shift(AsmDriver* d) {
+ AsmExpr a = parse_add(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, ASM_P_SHL) && !tok_is_punct(t, ASM_P_SHR)) return a;
+ u32 op = t.v.punct;
+ (void)d_next(d);
+ AsmExpr b = parse_add(d);
+ if (a.sym || b.sym) d_panicf(d, "asm: shift on symbolic operand");
+ if (op == ASM_P_SHL)
+ a.value = (i64)((u64)a.value << (b.value & 63));
+ else
+ a.value = a.value >> (b.value & 63);
+ }
+}
+
+static AsmExpr parse_band(AsmDriver* d) {
+ AsmExpr a = parse_shift(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '&')) return a;
+ (void)d_next(d);
+ AsmExpr b = parse_shift(d);
+ if (a.sym || b.sym) d_panicf(d, "asm: '&' on symbolic operand");
+ a.value &= b.value;
+ }
+}
+
+static AsmExpr parse_bxor(AsmDriver* d) {
+ AsmExpr a = parse_band(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '^')) return a;
+ (void)d_next(d);
+ AsmExpr b = parse_band(d);
+ if (a.sym || b.sym) d_panicf(d, "asm: '^' on symbolic operand");
+ a.value ^= b.value;
+ }
+}
+
+static AsmExpr parse_bor(AsmDriver* d) {
+ AsmExpr a = parse_bxor(d);
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '|')) return a;
+ (void)d_next(d);
+ AsmExpr b = parse_bxor(d);
+ if (a.sym || b.sym) d_panicf(d, "asm: '|' on symbolic operand");
+ a.value |= b.value;
+ }
+}
+
+static AsmExpr parse_expr(AsmDriver* d) { return parse_bor(d); }
+
+/* ---- public helpers exposed to per-arch parser ---- */
+
+AsmTok asm_driver_peek(AsmDriver* d) { return d_peek(d); }
+AsmTok asm_driver_next(AsmDriver* d) { return d_next(d); }
+int asm_driver_at_eol(AsmDriver* d) { return d_is_eol(d); }
+SrcLoc asm_driver_loc(AsmDriver* d) { return d_loc(d); }
+MCEmitter* asm_driver_mc(AsmDriver* d) { return d->mc; }
+ObjBuilder* asm_driver_ob(AsmDriver* d) { return d->ob; }
+Compiler* asm_driver_compiler(AsmDriver* d) { return d->c; }
+Pool* asm_driver_pool(AsmDriver* d) { return d->pool; }
+
+_Noreturn void asm_driver_panic(AsmDriver* d, const char* fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ compiler_panicv(d->c, d_loc(d), fmt, ap);
+}
+
+ObjSymId asm_driver_intern_sym(AsmDriver* d, Sym name) {
+ return intern_sym(d, name);
+}
+
+ObjSecId asm_driver_cur_section(AsmDriver* d) {
+ if (d->cur_sec == OBJ_SEC_NONE) {
+ if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text");
+ d->cur_sec =
+ ensure_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4);
+ d->mc->set_section(d->mc, d->cur_sec);
+ }
+ return d->cur_sec;
+}
+
+int asm_driver_eat_comma(AsmDriver* d) {
+ AsmTok t = d_peek(d);
+ if (tok_is_punct(t, ',')) {
+ (void)d_next(d);
+ return 1;
+ }
+ return 0;
+}
+
+int asm_driver_eat_punct(AsmDriver* d, u32 p) {
+ AsmTok t = d_peek(d);
+ if (tok_is_punct(t, p)) {
+ (void)d_next(d);
+ return 1;
+ }
+ /* `#` arrives as ASM_TOK_HASH from the C lexer; accept it as the
+ * immediate-prefix punctuator here. */
+ if (p == '#' && t.kind == ASM_TOK_HASH) {
+ (void)d_next(d);
+ return 1;
+ }
+ return 0;
+}
+
+void asm_driver_expect_punct(AsmDriver* d, u32 p, const char* what) {
+ if (!asm_driver_eat_punct(d, p))
+ d_panicf(d, "asm: expected '%s' (%s)", "punct", what);
+}
+
+i64 asm_driver_parse_const(AsmDriver* d) {
+ AsmExpr e = parse_expr(d);
+ if (e.sym) d_panicf(d, "asm: constant expression expected");
+ return e.value;
+}
+
+void asm_driver_parse_sym_expr(AsmDriver* d, ObjSymId* sym_out, i64* off_out) {
+ AsmExpr e = parse_expr(d);
+ *sym_out = e.sym;
+ *off_out = e.value;
+}
+
+int asm_driver_tok_is_punct(AsmTok t, u32 p) {
+ if (tok_is_punct(t, p)) return 1;
+ /* `#` arrives as ASM_TOK_HASH from the C lexer. */
+ if (p == '#' && t.kind == ASM_TOK_HASH) return 1;
+ return 0;
+}
+
+/* ---- string-literal decoding ---- */
+
+static void decode_string(AsmDriver* d, Sym spelling, u8** out, u32* nout) {
+ size_t n = 0;
+ const char* p = asm_str(d, spelling, &n);
+ /* Skip any encoding prefix (L/u/u8/U). */
+ while (n && (*p == 'L' || *p == 'u' || *p == 'U' || *p == '8')) {
+ ++p;
+ --n;
+ }
+ if (n < 2 || p[0] != '"' || p[n - 1] != '"')
+ d_panicf(d, "asm: malformed string literal");
+ size_t cap = n;
+ u8* buf = (u8*)d->heap->alloc(d->heap, cap ? cap : 1, 1);
+ u32 k = 0;
+ for (size_t i = 1; i + 1 < n; ++i) {
+ char c = p[i];
+ if (c != '\\') {
+ buf[k++] = (u8)c;
+ continue;
+ }
+ ++i;
+ if (i + 1 >= n) break;
+ char e = p[i];
+ switch (e) {
+ case 'n':
+ buf[k++] = '\n';
+ break;
+ case 't':
+ buf[k++] = '\t';
+ break;
+ case 'r':
+ buf[k++] = '\r';
+ break;
+ case '\\':
+ buf[k++] = '\\';
+ break;
+ case '"':
+ buf[k++] = '"';
+ break;
+ case '\'':
+ buf[k++] = '\'';
+ break;
+ case '0':
+ buf[k++] = 0;
+ break;
+ case 'b':
+ buf[k++] = 8;
+ break;
+ case 'f':
+ buf[k++] = 12;
+ break;
+ case 'v':
+ buf[k++] = 11;
+ break;
+ case 'a':
+ buf[k++] = 7;
+ break;
+ case 'x': {
+ u32 v = 0;
+ int dn = 0;
+ while (i + 2 < n) {
+ char h = p[i + 1];
+ int dv;
+ if (h >= '0' && h <= '9')
+ dv = h - '0';
+ else if (h >= 'a' && h <= 'f')
+ dv = 10 + (h - 'a');
+ else if (h >= 'A' && h <= 'F')
+ dv = 10 + (h - 'A');
+ else
+ break;
+ v = v * 16 + (u32)dv;
+ ++i;
+ if (++dn >= 2) break;
+ }
+ buf[k++] = (u8)v;
+ break;
+ }
+ default:
+ if (e >= '0' && e <= '7') {
+ u32 v = (u32)(e - '0');
+ int dn = 1;
+ while (dn < 3 && i + 2 < n) {
+ char h = p[i + 1];
+ if (h < '0' || h > '7') break;
+ v = v * 8 + (u32)(h - '0');
+ ++i;
+ ++dn;
+ }
+ buf[k++] = (u8)v;
+ } else {
+ buf[k++] = (u8)e;
+ }
+ break;
+ }
+ }
+ *out = buf;
+ *nout = k;
+}
+
+/* ---- directives ---- */
+
+static Sym expect_ident(AsmDriver* d, const char* what) {
+ AsmTok t = d_peek(d);
+ if (t.kind != ASM_TOK_IDENT)
+ d_panicf(d, "asm: %s: expected identifier", what);
+ (void)d_next(d);
+ return t.v.ident;
+}
+
+static void emit_le(AsmDriver* d, u64 v, u32 width) {
+ u8 buf[8];
+ for (u32 i = 0; i < width; ++i) buf[i] = (u8)(v >> (8 * i));
+ (void)asm_driver_cur_section(d);
+ d->mc->emit_bytes(d->mc, buf, width);
+}
+
+static void emit_int_directive(AsmDriver* d, u32 width) {
+ for (;;) {
+ AsmExpr e = parse_expr(d);
+ if (e.sym) {
+ RelocKind k;
+ if (width == 4)
+ k = R_ABS32;
+ else if (width == 8)
+ k = R_ABS64;
+ else
+ d_panicf(d, "asm: symbolic .byte/.hword not supported");
+ (void)asm_driver_cur_section(d);
+ u32 ofs = d->mc->pos(d->mc);
+ u8 zero[8] = {0};
+ d->mc->emit_bytes(d->mc, zero, width);
+ d->mc->emit_reloc_at(d->mc, d->cur_sec, ofs, k, e.sym, e.value, 1, 0);
+ } else {
+ emit_le(d, (u64)e.value, width);
+ }
+ if (!asm_driver_eat_comma(d)) break;
+ }
+}
+
+static void do_directive(AsmDriver* d, Sym name) {
+ if (sym_eq(d, name, "text")) {
+ if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text");
+ set_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "data")) {
+ if (!d->n_data) d->n_data = pool_intern_cstr(d->pool, ".data");
+ set_section(d, d->n_data, SEC_DATA, (u16)(SF_ALLOC | SF_WRITE), 8);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "rodata")) {
+ if (!d->n_rodata) d->n_rodata = pool_intern_cstr(d->pool, ".rodata");
+ set_section(d, d->n_rodata, SEC_RODATA, (u16)SF_ALLOC, 8);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "bss")) {
+ if (!d->n_bss) d->n_bss = pool_intern_cstr(d->pool, ".bss");
+ set_section(d, d->n_bss, SEC_BSS, (u16)(SF_ALLOC | SF_WRITE), 8);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "section")) {
+ Sym sname = 0;
+ AsmTok t = d_peek(d);
+ if (t.kind == ASM_TOK_IDENT) {
+ sname = t.v.ident;
+ (void)d_next(d);
+ } else if (t.kind == ASM_TOK_STR) {
+ size_t n = 0;
+ const char* p = asm_str(d, t.spelling, &n);
+ if (n >= 2 && p[0] == '"') sname = pool_intern(d->pool, p + 1, n - 2);
+ (void)d_next(d);
+ } else if (tok_is_punct(t, '.')) {
+ (void)d_next(d);
+ AsmTok id = d_next(d);
+ if (id.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .section: bad name");
+ size_t ni = 0;
+ const char* nm = asm_str(d, id.v.ident, &ni);
+ char buf[128];
+ if (ni + 1 >= sizeof buf) d_panicf(d, "asm: .section: name too long");
+ buf[0] = '.';
+ for (size_t i = 0; i < ni; ++i) buf[i + 1] = nm[i];
+ sname = pool_intern(d->pool, buf, ni + 1);
+ } else {
+ d_panicf(d, "asm: .section: expected name");
+ }
+ SecKind kind = SEC_OTHER;
+ u16 flags = 0;
+ {
+ size_t nn = 0;
+ const char* p = asm_str(d, sname, &nn);
+ if (p) {
+ if (nn >= 5 && memcmp(p, ".text", 5) == 0) {
+ kind = SEC_TEXT;
+ flags = (u16)(SF_ALLOC | SF_EXEC);
+ } else if (nn >= 7 && memcmp(p, ".rodata", 7) == 0) {
+ kind = SEC_RODATA;
+ flags = (u16)SF_ALLOC;
+ } else if (nn >= 5 && memcmp(p, ".data", 5) == 0) {
+ kind = SEC_DATA;
+ flags = (u16)(SF_ALLOC | SF_WRITE);
+ } else if (nn >= 4 && memcmp(p, ".bss", 4) == 0) {
+ kind = SEC_BSS;
+ flags = (u16)(SF_ALLOC | SF_WRITE);
+ }
+ }
+ }
+ /* Skip optional remainder: flags string, type tag, etc. */
+ d_skip_to_eol(d);
+ set_section(d, sname, kind, flags, 1);
+ return;
+ }
+ if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) {
+ Sym n = expect_ident(d, ".globl");
+ sym_mut(d, intern_sym(d, n))->bind = (u16)SB_GLOBAL;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "local")) {
+ Sym n = expect_ident(d, ".local");
+ sym_mut(d, intern_sym(d, n))->bind = (u16)SB_LOCAL;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "weak")) {
+ Sym n = expect_ident(d, ".weak");
+ sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "hidden")) {
+ Sym n = expect_ident(d, ".hidden");
+ sym_mut(d, intern_sym(d, n))->vis = (u8)SV_HIDDEN;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "protected")) {
+ Sym n = expect_ident(d, ".protected");
+ sym_mut(d, intern_sym(d, n))->vis = (u8)SV_PROTECTED;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "internal")) {
+ Sym n = expect_ident(d, ".internal");
+ sym_mut(d, intern_sym(d, n))->vis = (u8)SV_INTERNAL;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "type")) {
+ Sym n = expect_ident(d, ".type");
+ ObjSymId id = intern_sym(d, n);
+ if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .type: expected ','");
+ AsmTok t = d_next(d);
+ Sym tag = 0;
+ if (tok_is_punct(t, '@') || tok_is_punct(t, '%')) {
+ AsmTok ti = d_next(d);
+ if (ti.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .type: tag");
+ tag = ti.v.ident;
+ } else if (t.kind == ASM_TOK_IDENT) {
+ tag = t.v.ident;
+ } else if (t.kind == ASM_TOK_STR) {
+ size_t sn = 0;
+ const char* sp = asm_str(d, t.spelling, &sn);
+ if (sn >= 2 && sp[0] == '"' && sp[sn - 1] == '"')
+ tag = pool_intern(d->pool, sp + 1, sn - 2);
+ } else {
+ d_panicf(d, "asm: .type: tag");
+ }
+ if (tag && sym_eq(d, tag, "function"))
+ sym_mut(d, id)->kind = (u16)SK_FUNC;
+ else if (tag && sym_eq(d, tag, "object"))
+ sym_mut(d, id)->kind = (u16)SK_OBJ;
+ else if (tag && sym_eq(d, tag, "tls_object"))
+ sym_mut(d, id)->kind = (u16)SK_TLS;
+ else if (tag && sym_eq(d, tag, "gnu_indirect_function"))
+ sym_mut(d, id)->kind = (u16)SK_IFUNC;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "size")) {
+ Sym n = expect_ident(d, ".size");
+ ObjSymId id = intern_sym(d, n);
+ if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .size: expected ','");
+ /* Recognize `. - NAME`. */
+ AsmTok t = d_peek(d);
+ i64 sz = 0;
+ if (tok_is_punct(t, '.')) {
+ (void)d_next(d);
+ if (tok_is_punct(d_peek(d), '-')) {
+ (void)d_next(d);
+ AsmTok rid = d_peek(d);
+ if (rid.kind == ASM_TOK_IDENT && rid.v.ident == n) {
+ (void)d_next(d);
+ const ObjSym* os = obj_symbol_get(d->ob, id);
+ if (os && os->section_id == d->cur_sec)
+ sz = (i64)d->mc->pos(d->mc) - (i64)os->value;
+ }
+ }
+ } else {
+ AsmExpr e = parse_expr(d);
+ if (!e.sym) sz = e.value;
+ }
+ if (sz < 0) sz = 0;
+ sym_mut(d, id)->size = (u64)sz;
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "byte")) {
+ emit_int_directive(d, 1);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "hword") || sym_eq(d, name, "short") ||
+ sym_eq(d, name, "2byte")) {
+ emit_int_directive(d, 2);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "word") || sym_eq(d, name, "long") ||
+ sym_eq(d, name, "int") || sym_eq(d, name, "4byte")) {
+ emit_int_directive(d, 4);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "quad") || sym_eq(d, name, "8byte") ||
+ sym_eq(d, name, "dword") || sym_eq(d, name, "xword")) {
+ emit_int_directive(d, 8);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "ascii") || sym_eq(d, name, "asciz") ||
+ sym_eq(d, name, "string")) {
+ int term = !sym_eq(d, name, "ascii");
+ for (;;) {
+ AsmTok t = d_peek(d);
+ if (t.kind != ASM_TOK_STR)
+ d_panicf(d, "asm: .ascii/.string: expected string");
+ (void)d_next(d);
+ u8* buf = NULL;
+ u32 n = 0;
+ decode_string(d, t.spelling, &buf, &n);
+ (void)asm_driver_cur_section(d);
+ d->mc->emit_bytes(d->mc, buf, n);
+ if (term) emit_le(d, 0, 1);
+ d->heap->free(d->heap, buf, n);
+ if (!asm_driver_eat_comma(d)) break;
+ }
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "zero") || sym_eq(d, name, "skip") ||
+ sym_eq(d, name, "space")) {
+ i64 n = asm_driver_parse_const(d);
+ i64 fill = 0;
+ if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
+ if (n > 0) {
+ (void)asm_driver_cur_section(d);
+ d->mc->emit_fill(d->mc, (size_t)n, (u8)fill);
+ }
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "fill")) {
+ i64 n = asm_driver_parse_const(d);
+ i64 size = 1, val = 0;
+ if (asm_driver_eat_comma(d)) size = asm_driver_parse_const(d);
+ if (asm_driver_eat_comma(d)) val = asm_driver_parse_const(d);
+ if (size < 1 || size > 8) d_panicf(d, "asm: .fill: size out of range");
+ (void)asm_driver_cur_section(d);
+ for (i64 i = 0; i < n; ++i) emit_le(d, (u64)val, (u32)size);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "align") || sym_eq(d, name, "balign")) {
+ i64 a = asm_driver_parse_const(d);
+ i64 fill = 0;
+ if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
+ if (a <= 0 || (a & (a - 1))) d_panicf(d, "asm: .align: not a power of 2");
+ (void)asm_driver_cur_section(d);
+ d->mc->emit_align(d->mc, (u32)a, (u8)fill);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "p2align")) {
+ i64 lg = asm_driver_parse_const(d);
+ i64 fill = 0;
+ if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
+ if (lg < 0 || lg > 16) d_panicf(d, "asm: .p2align: out of range");
+ (void)asm_driver_cur_section(d);
+ d->mc->emit_align(d->mc, 1u << (u32)lg, (u8)fill);
+ d_skip_to_eol(d);
+ return;
+ }
+ if (sym_eq(d, name, "set") || sym_eq(d, name, "equ")) {
+ Sym n = expect_ident(d, ".set");
+ if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .set: expected ','");
+ AsmExpr e = parse_expr(d);
+ AsmEqu eq;
+ eq.value = e.value;
+ eq.sym = e.sym;
+ eq.has_sym = e.sym ? 1 : 0;
+ eq.pad[0] = eq.pad[1] = eq.pad[2] = 0;
+ SymEquMap_set(&d->equ_map, n, eq);
+ d_skip_to_eol(d);
+ return;
+ }
+
+ /* CFI block + accepted-but-ignored directives. Keep parser
+ * forward-progress without aborting the whole TU. */
+ if (starts_with(d, name, "cfi_") || sym_eq(d, name, "file") ||
+ sym_eq(d, name, "loc") || sym_eq(d, name, "ident") ||
+ sym_eq(d, name, "popsection") || sym_eq(d, name, "pushsection") ||
+ sym_eq(d, name, "previous") ||
+ sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "comm") ||
+ sym_eq(d, name, "lcomm") || sym_eq(d, name, "uleb128") ||
+ sym_eq(d, name, "sleb128") || sym_eq(d, name, "macro") ||
+ sym_eq(d, name, "endm") || sym_eq(d, name, "if") ||
+ sym_eq(d, name, "endif") || sym_eq(d, name, "else") ||
+ sym_eq(d, name, "include")) {
+ d_skip_to_eol(d);
+ return;
+ }
+
+ /* Unknown directive — recover. */
+ d_skip_to_eol(d);
+}
+
+/* ---- driver loop ---- */
+
+static void process_label(AsmDriver* d, Sym name) {
+ ObjSymId id = intern_sym(d, name);
+ (void)asm_driver_cur_section(d);
+ const ObjSym* os = obj_symbol_get(d->ob, id);
+ if (os && os->section_id != OBJ_SEC_NONE)
+ d_panicf(d, "asm: symbol defined twice");
+ obj_symbol_define(d->ob, id, d->cur_sec, (u64)d->mc->pos(d->mc), 0);
+ /* Promote SK_UNDEF (forward ref via reloc) to SK_NOTYPE so it's a
+ * real defined symbol; explicit `.type SYM, @function` will refine. */
+ if (os && os->kind == SK_UNDEF) sym_mut(d, id)->kind = (u16)SK_NOTYPE;
+}
+
+static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) {
+ AsmTok t = d_peek(d);
+ if (!tok_is_punct(t, '.')) return head;
+ if (t.flags & ASM_TF_HAS_SPACE) return head;
+ (void)d_next(d);
+ AsmTok rest = d_next(d);
+ if (rest.kind != ASM_TOK_IDENT)
+ d_panicf(d, "asm: composite mnemonic: expected ident");
+ size_t hn = 0, rn = 0;
+ const char* hp = asm_str(d, head, &hn);
+ const char* rp = asm_str(d, rest.v.ident, &rn);
+ size_t n = hn + 1 + rn;
+ if (n >= 64) d_panicf(d, "asm: mnemonic too long");
+ char buf[64];
+ for (size_t i = 0; i < hn; ++i) buf[i] = hp[i];
+ buf[hn] = '.';
+ for (size_t i = 0; i < rn; ++i) buf[hn + 1 + i] = rp[i];
+ return pool_intern(d->pool, buf, n);
+}
+
+/* ---- inline-asm driver constructor ----
+ *
+ * Inline-asm template walkers (per-arch) re-lex pre-substituted source
+ * text through the same per-mnemonic parsers used by the standalone .s
+ * driver. This constructor builds a minimally-initialized AsmDriver
+ * around a caller-supplied memory-backed AsmLexer + MCEmitter.
+ *
+ * The driver does not own the AsmLexer or MCEmitter, does not allocate a
+ * default section (inline asm emits into whatever section the wrapping
+ * cg has selected on its MCEmitter), and skips the standalone driver's
+ * per-arch handle (`d->aa64`) — the caller has already opened its own
+ * AA64Asm to thread per-block bound state through. */
+AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, AsmLexer* lex) {
+ Heap* heap = (Heap*)c->env->heap;
+ AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver));
+ memset(d, 0, sizeof *d);
+ d->c = c;
+ d->lex = lex;
+ d->mc = mc;
+ d->ob = mc->obj;
+ d->pool = c->global;
+ d->heap = heap;
+ /* The MCEmitter's section is whatever cg has set; do not override it.
+ * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use
+ * mc->section_id directly via asm_driver_cur_section's lazy init for
+ * standalone, but inline asm should never reach that path because the
+ * MCEmitter already has its section. Pre-seed cur_sec from the
+ * MCEmitter so emit_reloc_at calls get the right section id. */
+ d->cur_sec = mc->section_id;
+ SymSecMap_init(&d->sec_map, heap);
+ SymSymMap_init(&d->sym_map, heap);
+ SymEquMap_init(&d->equ_map, heap);
+ d->aa64 = NULL; /* caller owns its own AA64Asm */
+ return d;
+}
+
+void asm_driver_close_inline(AsmDriver* d) {
+ if (!d) return;
+ SymSecMap_fini(&d->sec_map);
+ SymSymMap_fini(&d->sym_map);
+ SymEquMap_fini(&d->equ_map);
+ Heap* heap = d->heap;
+ heap->free(heap, d, sizeof *d);
+}
+
+void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
+ AsmDriver d;
+ memset(&d, 0, sizeof d);
+ d.c = c;
+ d.lex = l;
+ d.mc = mc;
+ d.ob = mc->obj;
+ d.pool = c->global;
+ d.heap = (Heap*)c->env->heap;
+ d.cur_sec = OBJ_SEC_NONE;
+ SymSecMap_init(&d.sec_map, d.heap);
+ SymSymMap_init(&d.sym_map, d.heap);
+ SymEquMap_init(&d.equ_map, d.heap);
+ d.aa64 = aa64_asm_open(c);
+
+ for (;;) {
+ AsmTok t = d_peek(&d);
+ if (t.kind == ASM_TOK_EOF) break;
+ if (t.kind == ASM_TOK_NEWLINE) {
+ (void)d_next(&d);
+ continue;
+ }
+ if (t.kind == ASM_TOK_HASH) {
+ /* cpp-style linemarker; skip the whole line. */
+ d_skip_to_eol(&d);
+ continue;
+ }
+ if (tok_is_punct(t, '.')) {
+ (void)d_next(&d);
+ AsmTok id = d_next(&d);
+ if (id.kind != ASM_TOK_IDENT)
+ d_panicf(&d, "asm: expected directive name after '.'");
+ do_directive(&d, id.v.ident);
+ d_eat_eol(&d);
+ continue;
+ }
+ if (t.kind == ASM_TOK_IDENT) {
+ Sym head = t.v.ident;
+ (void)d_next(&d);
+ AsmTok nxt = d_peek(&d);
+ if (tok_is_punct(nxt, ':')) {
+ (void)d_next(&d);
+ process_label(&d, head);
+ continue;
+ }
+ Sym mnemonic = maybe_compose_mnemonic(&d, head);
+ aa64_asm_insn(d.aa64, &d, mnemonic);
+ d_skip_to_eol(&d);
+ continue;
+ }
+ /* Anything else: recover by skipping the line. */
+ d_skip_to_eol(&d);
+ }
+
+ aa64_asm_close(d.aa64);
+ SymSecMap_fini(&d.sec_map);
+ SymSymMap_fini(&d.sym_map);
+ SymEquMap_fini(&d.equ_map);
+}
diff --git a/src/asm/asm.h b/src/asm/asm.h
@@ -0,0 +1,11 @@
+#ifndef CFREE_ASM_H
+#define CFREE_ASM_H
+
+#include "arch/arch.h"
+#include "asm/asm_lex.h"
+
+/* Standalone assembler. Reads tokens directly from an AsmLexer; emits via
+ * MCEmitter. */
+void asm_parse(Compiler*, AsmLexer*, MCEmitter*);
+
+#endif
diff --git a/src/asm/asm_helpers.h b/src/asm/asm_helpers.h
@@ -0,0 +1,63 @@
+#ifndef CFREE_ASM_HELPERS_H
+#define CFREE_ASM_HELPERS_H
+
+/* Lightweight asm-driver surface consumed by per-arch instruction
+ * parsers. The driver itself is opaque to per-arch code; these helpers
+ * are the only seam. Implementations live in src/asm/asm.c. */
+
+#include "arch/arch.h"
+#include "asm/asm_lex.h"
+#include "core/core.h"
+#include "obj/obj.h"
+
+typedef struct AsmDriver AsmDriver;
+
+/* ---- token plumbing ---- */
+AsmTok asm_driver_peek(AsmDriver*);
+AsmTok asm_driver_next(AsmDriver*);
+int asm_driver_at_eol(AsmDriver*);
+int asm_driver_tok_is_punct(AsmTok t, u32 p);
+int asm_driver_eat_comma(AsmDriver*);
+int asm_driver_eat_punct(AsmDriver*, u32 punct);
+void asm_driver_expect_punct(AsmDriver*, u32 punct, const char* what);
+
+/* Source position for diagnostics. */
+SrcLoc asm_driver_loc(AsmDriver*);
+
+/* Owning subsystems. */
+MCEmitter* asm_driver_mc(AsmDriver*);
+ObjBuilder* asm_driver_ob(AsmDriver*);
+Compiler* asm_driver_compiler(AsmDriver*);
+Pool* asm_driver_pool(AsmDriver*);
+ObjSecId asm_driver_cur_section(AsmDriver*);
+
+/* Diagnostics: emits then longjmps via Compiler.panic. No return. */
+_Noreturn void asm_driver_panic(AsmDriver*, const char* fmt, ...);
+
+/* ---- symbol + expression parsing ---- */
+ObjSymId asm_driver_intern_sym(AsmDriver*, Sym name);
+
+/* Parse a constant integer expression. Panics if the expression
+ * references a symbol. */
+i64 asm_driver_parse_const(AsmDriver*);
+
+/* Parse a `sym ± const` expression. Both outputs valid: pure constants
+ * leave *sym_out == OBJ_SYM_NONE. */
+void asm_driver_parse_sym_expr(AsmDriver*, ObjSymId* sym_out, i64* off_out);
+
+/* ---- inline-asm constructor ----
+ *
+ * Build an AsmDriver around a memory-backed AsmLexer + caller-supplied
+ * MCEmitter. Used by inline-asm template walkers (one driver per asm
+ * line) to reuse the existing per-arch instruction parsers verbatim
+ * over a substituted source buffer.
+ *
+ * The driver is heap-allocated through c->env->heap and must be released
+ * with asm_driver_close_inline. It does not own the AsmLexer or the
+ * MCEmitter — the caller retains ownership of both. The driver does
+ * not initialize a default section; inline asm always emits into the
+ * MCEmitter's currently-active section. */
+AsmDriver* asm_driver_open_inline(Compiler*, MCEmitter*, AsmLexer*);
+void asm_driver_close_inline(AsmDriver*);
+
+#endif
diff --git a/src/asm/asm_lex.c b/src/asm/asm_lex.c
@@ -0,0 +1,705 @@
+/* Assembler lexer. Streams tokens out of a borrowed source buffer.
+ *
+ * It intentionally keeps C-like number/string spelling rules because .S
+ * sources arrive after C preprocessing and GNU as accepts those spellings
+ * in directives and expressions. It does not own macro expansion or C
+ * keyword classification.
+ *
+ * Comments are consumed as whitespace; physical newlines surface as
+ * ASM_TOK_NEWLINE so the asm driver can keep line-oriented directive and
+ * instruction parsing. */
+
+#include "asm/asm_lex.h"
+
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/pool.h"
+
+struct AsmLexer {
+ Compiler* c;
+ Pool* pool;
+ Heap* heap;
+ const char* src;
+ size_t len;
+ size_t pos;
+ u32 file_id;
+ u32 line;
+ u32 col;
+ u8 at_bol;
+ u8 had_space;
+};
+
+/* §5.1.1.2 translation phase 2: splice physical lines joined by
+ * backslash-newline. Advance past any splice sequence at l->pos so the
+ * cursor never rests on the leading backslash of a splice. */
+static void skip_splices(AsmLexer* l) {
+ while (l->pos + 1 < l->len && l->src[l->pos] == '\\' &&
+ l->src[l->pos + 1] == '\n') {
+ l->pos += 2;
+ l->line++;
+ l->col = 1;
+ }
+}
+
+/* Logical peek: returns the off-th post-splice byte starting at l->pos,
+ * or -1 at end of input. Does not mutate l->pos. */
+static int peek(const AsmLexer* l, size_t off) {
+ size_t pos = l->pos;
+ size_t k = 0;
+ while (pos < l->len) {
+ if (pos + 1 < l->len && l->src[pos] == '\\' && l->src[pos + 1] == '\n') {
+ pos += 2;
+ continue;
+ }
+ if (k == off) return (unsigned char)l->src[pos];
+ ++pos;
+ ++k;
+ }
+ return -1;
+}
+
+static int bump(AsmLexer* l) {
+ int ch;
+ skip_splices(l);
+ if (l->pos >= l->len) return -1;
+ ch = (unsigned char)l->src[l->pos++];
+ if (ch == '\n') {
+ l->line++;
+ l->col = 1;
+ } else {
+ l->col++;
+ }
+ return ch;
+}
+
+static int is_digit(int c) { return c >= '0' && c <= '9'; }
+static int is_hex_digit(int c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+/* Identifier-start byte (§6.4.2.1). Letters and underscore are ASCII; bytes
+ * ≥ 0x80 are accepted as the implementation-defined "other characters"
+ * permitted in identifiers — in practice UTF-8 lead/continuation bytes for
+ * extended source characters. UCNs are matched separately via ucn_len since
+ * they span multiple source bytes. */
+static int is_alpha(int c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
+ c >= 0x80;
+}
+static int is_alnum(int c) { return is_alpha(c) || is_digit(c); }
+
+/* Match a UCN at offset `off` from the current position. Returns the total
+ * length (6 for \uXXXX, 10 for \UXXXXXXXX), or 0 if no UCN matches. The
+ * range constraints from §6.4.3 (no UCN < 00A0 except $/@/`, and none in
+ * D800–DFFF) are not enforced here — the lexical form is matched and any
+ * downstream phase that cares can diagnose. */
+static int ucn_len(const AsmLexer* l, size_t off) {
+ int n, i;
+ if (peek(l, off) != '\\') return 0;
+ if (peek(l, off + 1) == 'u')
+ n = 4;
+ else if (peek(l, off + 1) == 'U')
+ n = 8;
+ else
+ return 0;
+ for (i = 0; i < n; ++i) {
+ if (!is_hex_digit(peek(l, off + 2 + i))) return 0;
+ }
+ return 2 + n;
+}
+
+static SrcLoc asm_lex_here(const AsmLexer* l) {
+ SrcLoc loc;
+ loc.file_id = l->file_id;
+ loc.line = l->line;
+ loc.col = l->col;
+ return loc;
+}
+
+AsmLexer* asm_lex_open_mem(Compiler* c, const char* name, const char* src,
+ size_t len) {
+ Heap* h = (Heap*)c->env->heap;
+ AsmLexer* l = (AsmLexer*)h->alloc(h, sizeof(*l), _Alignof(AsmLexer));
+ if (!l) return NULL;
+ memset(l, 0, sizeof(*l));
+ l->c = c;
+ l->pool = c->global;
+ l->heap = h;
+ l->src = src ? src : "";
+ l->len = src ? len : 0;
+ l->pos = 0;
+ l->file_id = source_add_memory(c->sources, name);
+ l->line = 1;
+ l->col = 1;
+ l->at_bol = 1;
+ l->had_space = 0;
+ return l;
+}
+
+void asm_lex_close(AsmLexer* l) {
+ if (!l) return;
+ l->heap->free(l->heap, l, sizeof(*l));
+}
+
+SrcLoc asm_lex_loc(const AsmLexer* l) { return asm_lex_here(l); }
+u32 asm_lex_file_id(const AsmLexer* l) { return l->file_id; }
+const AsmLitInfo* asm_lex_lit(const AsmLexer* l, AsmLitId id) {
+ (void)l;
+ (void)id;
+ return NULL;
+}
+
+/* Intern bytes [start, end) with line splices (\<newline>) removed, so token
+ * spellings reflect post-phase-2 logical text. */
+static Sym intern_spliced(AsmLexer* l, size_t start, size_t end) {
+ size_t i;
+ int has_splice = 0;
+ char* buf;
+ size_t k;
+ Sym sym;
+
+ for (i = start; i + 1 < end; ++i) {
+ if (l->src[i] == '\\' && l->src[i + 1] == '\n') {
+ has_splice = 1;
+ break;
+ }
+ }
+ if (!has_splice) return pool_intern(l->pool, l->src + start, end - start);
+
+ buf = (char*)l->heap->alloc(l->heap, end - start, 1);
+ k = 0;
+ for (i = start; i < end;) {
+ if (i + 1 < end && l->src[i] == '\\' && l->src[i + 1] == '\n') {
+ i += 2;
+ continue;
+ }
+ buf[k++] = l->src[i++];
+ }
+ sym = pool_intern(l->pool, buf, k);
+ l->heap->free(l->heap, buf, end - start);
+ return sym;
+}
+
+/* Skip whitespace and comments. Returns 1 if a newline boundary was crossed
+ * via comment consumption (caller still emits the explicit newline token on
+ * an in-source '\n'). */
+static void skip_ws_and_comments(AsmLexer* l) {
+ for (;;) {
+ int ch = peek(l, 0);
+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\v' || ch == '\f') {
+ bump(l);
+ l->had_space = 1;
+ continue;
+ }
+ if (ch == '/' && peek(l, 1) == '/') {
+ bump(l);
+ bump(l);
+ while (peek(l, 0) >= 0 && peek(l, 0) != '\n') bump(l);
+ l->had_space = 1;
+ continue;
+ }
+ if (ch == '/' && peek(l, 1) == '*') {
+ bump(l);
+ bump(l);
+ while (peek(l, 0) >= 0) {
+ if (peek(l, 0) == '*' && peek(l, 1) == '/') {
+ bump(l);
+ bump(l);
+ break;
+ }
+ bump(l);
+ }
+ l->had_space = 1;
+ continue;
+ }
+ break;
+ }
+}
+
+/* Consume a pp-number per §6.4.8. The cursor is positioned at the leading
+ * digit (or `.` followed by a digit) on entry. */
+static void scan_pp_number(AsmLexer* l) {
+ if (peek(l, 0) == '.') bump(l);
+ bump(l); /* first digit */
+ while (l->pos < l->len) {
+ int c = peek(l, 0);
+ int n = peek(l, 1);
+ if ((c == 'e' || c == 'E' || c == 'p' || c == 'P') &&
+ (n == '+' || n == '-')) {
+ bump(l);
+ bump(l);
+ } else if (is_alnum(c) || c == '.') {
+ bump(l);
+ } else {
+ break;
+ }
+ }
+}
+
+/* 1 if the pp-number text is a floating constant (§6.4.4.2): contains a
+ * radix `.`, a hex `p`/`P` exponent, or a decimal `e`/`E` exponent. */
+static int pp_number_is_float(const char* s, size_t n) {
+ int is_hex = 0;
+ size_t i = 0;
+ if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ is_hex = 1;
+ i = 2;
+ }
+ for (; i < n; ++i) {
+ char c = s[i];
+ if (c == '.') return 1;
+ if (is_hex && (c == 'p' || c == 'P')) return 1;
+ if (!is_hex && (c == 'e' || c == 'E')) {
+ if (i + 1 < n) {
+ char nx = s[i + 1];
+ if (nx == '+' || nx == '-' || (nx >= '0' && nx <= '9')) return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Consume a quoted body — string ('"') or character ('\''). The cursor is
+ * positioned at the opening quote on entry. Returns 1 on an unterminated or
+ * newline-broken literal, 0 on a clean close. */
+static int scan_quoted(AsmLexer* l, int quote) {
+ bump(l); /* opening quote */
+ for (;;) {
+ int ch = peek(l, 0);
+ if (ch < 0) return 1;
+ if (ch == quote) {
+ bump(l);
+ return 0;
+ }
+ if (ch == '\n') return 1;
+ if (ch == '\\') {
+ bump(l); /* backslash */
+ if (peek(l, 0) < 0) return 1;
+ bump(l); /* the escaped char */
+ continue;
+ }
+ bump(l);
+ }
+}
+
+AsmTok asm_lex_next(AsmLexer* l) {
+ AsmTok t;
+ SrcLoc tloc;
+ size_t start;
+ int ch;
+
+ memset(&t, 0, sizeof(t));
+
+ /* Skip whitespace and comments. A newline token is emitted before any
+ * subsequent content tokens for the line that follows. */
+ for (;;) {
+ skip_ws_and_comments(l);
+ skip_splices(l);
+ if (l->pos >= l->len) {
+ t.kind = ASM_TOK_EOF;
+ t.loc = asm_lex_here(l);
+ return t;
+ }
+ if (peek(l, 0) == '\n') {
+ tloc = asm_lex_here(l);
+ bump(l);
+ t.kind = ASM_TOK_NEWLINE;
+ t.loc = tloc;
+ l->at_bol = 1;
+ l->had_space = 0;
+ return t;
+ }
+ break;
+ }
+
+ tloc = asm_lex_here(l);
+ start = l->pos;
+ ch = peek(l, 0);
+
+ if (l->at_bol) t.flags |= ASM_TF_AT_BOL;
+ if (l->had_space) t.flags |= ASM_TF_HAS_SPACE;
+ l->at_bol = 0;
+ l->had_space = 0;
+ t.loc = tloc;
+
+ /* String / character literal, with optional encoding prefix. The prefix
+ * length and encoding flag are decoded together so the spelling we
+ * intern includes the prefix bytes. */
+ {
+ int sp_len = -1;
+ int is_char = 0;
+ u32 encf = 0;
+
+ if (ch == '"') {
+ sp_len = 0;
+ is_char = 0;
+ } else if (ch == '\'') {
+ sp_len = 0;
+ is_char = 1;
+ } else if (ch == 'L' && peek(l, 1) == '"') {
+ sp_len = 1;
+ is_char = 0;
+ encf = ASM_TF_STR_WIDE;
+ } else if (ch == 'L' && peek(l, 1) == '\'') {
+ sp_len = 1;
+ is_char = 1;
+ encf = ASM_TF_STR_WIDE;
+ } else if (ch == 'u' && peek(l, 1) == '8' && peek(l, 2) == '"') {
+ sp_len = 2;
+ is_char = 0;
+ encf = ASM_TF_STR_U8;
+ } else if (ch == 'u' && peek(l, 1) == '"') {
+ sp_len = 1;
+ is_char = 0;
+ encf = ASM_TF_STR_U16;
+ } else if (ch == 'u' && peek(l, 1) == '\'') {
+ sp_len = 1;
+ is_char = 1;
+ encf = ASM_TF_STR_U16;
+ } else if (ch == 'U' && peek(l, 1) == '"') {
+ sp_len = 1;
+ is_char = 0;
+ encf = ASM_TF_STR_U32;
+ } else if (ch == 'U' && peek(l, 1) == '\'') {
+ sp_len = 1;
+ is_char = 1;
+ encf = ASM_TF_STR_U32;
+ }
+
+ if (sp_len >= 0) {
+ int i;
+ for (i = 0; i < sp_len; ++i) bump(l);
+ if (scan_quoted(l, is_char ? '\'' : '"')) t.flags |= ASM_TF_LITERAL_BAD;
+ t.kind = (u16)(is_char ? ASM_TOK_CHR : ASM_TOK_STR);
+ t.flags |= encf;
+ t.spelling = intern_spliced(l, start, l->pos);
+ t.v.str = t.spelling;
+ return t;
+ }
+ }
+
+ /* Identifier (§6.4.2). Encoding-prefix candidates above are matched
+ * before this since L/u/U followed by a quote is a literal, not an
+ * identifier. The grammar's identifier-nondigit covers letters, _,
+ * extended source chars (impl-defined; bytes ≥ 0x80 here), and UCNs
+ * (§6.4.3) — the latter span multiple source bytes so they're matched
+ * via ucn_len rather than the per-byte is_alpha predicate. */
+ {
+ int u = ucn_len(l, 0);
+ if (is_alpha(ch) || u) {
+ if (u) {
+ int i;
+ for (i = 0; i < u; ++i) bump(l);
+ } else
+ bump(l);
+ for (;;) {
+ int c = peek(l, 0);
+ if (is_alnum(c)) {
+ bump(l);
+ } else if ((u = ucn_len(l, 0))) {
+ int i;
+ for (i = 0; i < u; ++i) bump(l);
+ } else {
+ break;
+ }
+ }
+ t.kind = ASM_TOK_IDENT;
+ t.spelling = intern_spliced(l, start, l->pos);
+ t.v.ident = t.spelling;
+ return t;
+ }
+ }
+
+ /* Preprocessor-number shaped token, classified to ASM_TOK_NUM /
+ * ASM_TOK_FLT for expression diagnostics and future directive support. */
+ if (is_digit(ch) || (ch == '.' && is_digit(peek(l, 1)))) {
+ size_t plen;
+ char* pbuf;
+ size_t i, k;
+ scan_pp_number(l);
+ /* Classify on the post-splice text (the spelling we'll intern). */
+ plen = l->pos - start;
+ pbuf = (char*)l->heap->alloc(l->heap, plen ? plen : 1, 1);
+ k = 0;
+ for (i = start; i < l->pos;) {
+ if (i + 1 < l->pos && l->src[i] == '\\' && l->src[i + 1] == '\n') {
+ i += 2;
+ continue;
+ }
+ pbuf[k++] = l->src[i++];
+ }
+ t.kind = (u16)(pp_number_is_float(pbuf, k) ? ASM_TOK_FLT : ASM_TOK_NUM);
+ /* Preserve common C-style integer/float suffixes in token flags. The
+ * current assembler expression evaluator ignores them, but keeping the
+ * spelling metadata makes the lexer useful for future directive work. */
+ if (t.kind == ASM_TOK_FLT) {
+ size_t j = k;
+ while (j > 0) {
+ char c = pbuf[j - 1];
+ if (c == 'f' || c == 'F') {
+ t.flags |= ASM_TF_FLT_F;
+ --j;
+ continue;
+ }
+ if (c == 'l' || c == 'L') {
+ t.flags |= ASM_TF_FLT_L;
+ --j;
+ continue;
+ }
+ break;
+ }
+ } else {
+ size_t j = k;
+ while (j > 0) {
+ char c = pbuf[j - 1];
+ if (c == 'u' || c == 'U') {
+ t.flags |= ASM_TF_INT_U;
+ --j;
+ continue;
+ }
+ if (c == 'l' || c == 'L') {
+ if (j >= 2 && (pbuf[j - 2] == 'l' || pbuf[j - 2] == 'L')) {
+ t.flags |= ASM_TF_INT_LL;
+ j -= 2;
+ } else {
+ t.flags |= ASM_TF_INT_L;
+ --j;
+ }
+ continue;
+ }
+ break;
+ }
+ }
+ t.spelling = pool_intern(l->pool, pbuf, k);
+ l->heap->free(l->heap, pbuf, plen ? plen : 1);
+ return t;
+ }
+
+ /* Punctuator, longest match. `#` is a distinct token because it is both
+ * an asm immediate marker and, at BOL in preprocessed assembler, a line
+ * marker introducer. */
+ {
+ int n0 = peek(l, 0);
+ int n1 = peek(l, 1);
+ int n2 = peek(l, 2);
+ int n3 = peek(l, 3);
+ int adv = 1;
+ u32 punct = ASM_P_NONE;
+ u16 kind = ASM_TOK_PUNCT;
+ int i;
+
+ switch (n0) {
+ case '#':
+ if (n1 == '#') {
+ adv = 2;
+ kind = ASM_TOK_HASH_HASH;
+ punct = ASM_P_HASH_HASH;
+ } else {
+ adv = 1;
+ kind = ASM_TOK_HASH;
+ punct = '#';
+ }
+ break;
+ case '.':
+ if (n1 == '.' && n2 == '.') {
+ adv = 3;
+ punct = ASM_P_ELLIPSIS;
+ } else {
+ adv = 1;
+ punct = '.';
+ }
+ break;
+ case '-':
+ if (n1 == '>') {
+ adv = 2;
+ punct = ASM_P_ARROW;
+ } else if (n1 == '-') {
+ adv = 2;
+ punct = ASM_P_DEC;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_SUB_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '-';
+ }
+ break;
+ case '+':
+ if (n1 == '+') {
+ adv = 2;
+ punct = ASM_P_INC;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_ADD_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '+';
+ }
+ break;
+ case '<':
+ if (n1 == '<' && n2 == '=') {
+ adv = 3;
+ punct = ASM_P_SHL_ASSIGN;
+ } else if (n1 == '<') {
+ adv = 2;
+ punct = ASM_P_SHL;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_LE;
+ } else if (n1 == ':') {
+ adv = 2;
+ punct = '[';
+ } /* digraph */
+ else if (n1 == '%') {
+ adv = 2;
+ punct = '{';
+ } /* digraph */
+ else {
+ adv = 1;
+ punct = '<';
+ }
+ break;
+ case '>':
+ if (n1 == '>' && n2 == '=') {
+ adv = 3;
+ punct = ASM_P_SHR_ASSIGN;
+ } else if (n1 == '>') {
+ adv = 2;
+ punct = ASM_P_SHR;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_GE;
+ } else {
+ adv = 1;
+ punct = '>';
+ }
+ break;
+ case '=':
+ if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_EQ;
+ } else {
+ adv = 1;
+ punct = '=';
+ }
+ break;
+ case '!':
+ if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_NE;
+ } else {
+ adv = 1;
+ punct = '!';
+ }
+ break;
+ case '&':
+ if (n1 == '&') {
+ adv = 2;
+ punct = ASM_P_AND;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_AND_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '&';
+ }
+ break;
+ case '|':
+ if (n1 == '|') {
+ adv = 2;
+ punct = ASM_P_OR;
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_OR_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '|';
+ }
+ break;
+ case '^':
+ if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_XOR_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '^';
+ }
+ break;
+ case '*':
+ if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_MUL_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '*';
+ }
+ break;
+ case '/':
+ if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_DIV_ASSIGN;
+ } else {
+ adv = 1;
+ punct = '/';
+ }
+ break;
+ case '%':
+ if (n1 == ':' && n2 == '%' && n3 == ':') {
+ adv = 4;
+ kind = ASM_TOK_HASH_HASH;
+ punct = ASM_P_HASH_HASH;
+ } else if (n1 == ':') {
+ adv = 2;
+ kind = ASM_TOK_HASH;
+ punct = '#';
+ } else if (n1 == '=') {
+ adv = 2;
+ punct = ASM_P_MOD_ASSIGN;
+ } else if (n1 == '>') {
+ adv = 2;
+ punct = '}';
+ } /* digraph */
+ else {
+ adv = 1;
+ punct = '%';
+ }
+ break;
+ case ':':
+ if (n1 == '>') {
+ adv = 2;
+ punct = ']';
+ } /* digraph */
+ else {
+ adv = 1;
+ punct = ':';
+ }
+ break;
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ',':
+ case ';':
+ case '?':
+ case '~':
+ adv = 1;
+ punct = (u32)n0;
+ break;
+ default:
+ /* Unknown byte. Surface as a single-char punct so the token
+ * stream still progresses; PP/parse may diagnose. */
+ adv = 1;
+ punct = (u32)n0;
+ break;
+ }
+
+ for (i = 0; i < adv; ++i) bump(l);
+ t.kind = kind;
+ t.v.punct = punct;
+ t.spelling = intern_spliced(l, start, l->pos);
+ return t;
+ }
+}
diff --git a/src/asm/asm_lex.h b/src/asm/asm_lex.h
@@ -0,0 +1,111 @@
+#ifndef CFREE_ASM_LEX_H
+#define CFREE_ASM_LEX_H
+
+#include "core/core.h"
+
+typedef enum AsmTokKind {
+ ASM_TOK_EOF = 0,
+ ASM_TOK_IDENT,
+ ASM_TOK_NUM,
+ ASM_TOK_FLT,
+ ASM_TOK_STR,
+ ASM_TOK_CHR,
+ ASM_TOK_PUNCT,
+ ASM_TOK_HASH,
+ ASM_TOK_HASH_HASH,
+ ASM_TOK_NEWLINE,
+} AsmTokKind;
+
+typedef enum AsmTokFlag {
+ ASM_TF_AT_BOL = 1u << 0,
+ ASM_TF_HAS_SPACE = 1u << 1,
+ ASM_TF_INT_U = 1u << 3,
+ ASM_TF_INT_L = 1u << 4,
+ ASM_TF_INT_LL = 1u << 5,
+ ASM_TF_FLT_F = 1u << 6,
+ ASM_TF_FLT_L = 1u << 7,
+ ASM_TF_STR_WIDE = 1u << 8,
+ ASM_TF_STR_U8 = 1u << 9,
+ ASM_TF_STR_U16 = 1u << 10,
+ ASM_TF_STR_U32 = 1u << 11,
+ ASM_TF_LITERAL_BAD = 1u << 12,
+} AsmTokFlag;
+
+typedef enum AsmPunct {
+ ASM_P_NONE = 0,
+ ASM_P_ARROW = 256,
+ ASM_P_INC,
+ ASM_P_DEC,
+ ASM_P_SHL,
+ ASM_P_SHR,
+ ASM_P_LE,
+ ASM_P_GE,
+ ASM_P_EQ,
+ ASM_P_NE,
+ ASM_P_AND,
+ ASM_P_OR,
+ ASM_P_ADD_ASSIGN,
+ ASM_P_SUB_ASSIGN,
+ ASM_P_MUL_ASSIGN,
+ ASM_P_DIV_ASSIGN,
+ ASM_P_MOD_ASSIGN,
+ ASM_P_AND_ASSIGN,
+ ASM_P_OR_ASSIGN,
+ ASM_P_XOR_ASSIGN,
+ ASM_P_SHL_ASSIGN,
+ ASM_P_SHR_ASSIGN,
+ ASM_P_ELLIPSIS,
+ ASM_P_HASH_HASH,
+} AsmPunct;
+
+typedef u32 AsmLitId;
+#define ASM_LIT_NONE 0u
+
+typedef enum AsmLitKind {
+ ASM_LIT_INT,
+ ASM_LIT_FLOAT,
+ ASM_LIT_STRING,
+ ASM_LIT_CHAR,
+} AsmLitKind;
+
+typedef enum AsmLitEnc {
+ ASM_LENC_ORDINARY,
+ ASM_LENC_UTF8,
+ ASM_LENC_WIDE,
+ ASM_LENC_UTF16,
+ ASM_LENC_UTF32,
+} AsmLitEnc;
+
+typedef struct AsmLitInfo {
+ u8 kind;
+ u8 enc;
+ u16 flags;
+ Sym spelling;
+ BytesId bytes;
+} AsmLitInfo;
+
+typedef struct AsmTok {
+ u16 kind;
+ u16 flags;
+ SrcLoc loc;
+ Sym spelling;
+ AsmLitId lit;
+ union {
+ Sym ident;
+ Sym str;
+ u32 punct;
+ } v;
+} AsmTok;
+
+typedef struct AsmLexer AsmLexer;
+
+AsmLexer* asm_lex_open_mem(Compiler*, const char* name, const char* src,
+ size_t len);
+void asm_lex_close(AsmLexer*);
+
+AsmTok asm_lex_next(AsmLexer*);
+SrcLoc asm_lex_loc(const AsmLexer*);
+u32 asm_lex_file_id(const AsmLexer*);
+const AsmLitInfo* asm_lex_lit(const AsmLexer*, AsmLitId);
+
+#endif
diff --git a/src/core/source.c b/src/core/source.c
@@ -1,5 +1,5 @@
/* SourceManager — file-id authority for diagnostics, dependency output,
- * and DWARF. The lex/pp/parse subsystems aren't part of the obj/ELF
+ * and DWARF. The C frontend subsystems aren't part of the obj/ELF
* foundation, so this implementation is minimal: it stores a flat array
* of registered files and the include-edge list, and exposes lookups.
* It does not yet support macro-expansion pseudo files or
diff --git a/src/lex/lex.h b/src/lex/lex.h
@@ -1,114 +0,0 @@
-#ifndef CFREE_LEX_H
-#define CFREE_LEX_H
-
-#include "core/core.h"
-
-typedef enum TokKind {
- TOK_EOF = 0,
- TOK_IDENT,
- TOK_NUM,
- TOK_FLT,
- TOK_STR,
- TOK_CHR,
- TOK_PUNCT,
- TOK_PP_HASH,
- TOK_PP_PASTE,
- TOK_HEADER,
- TOK_NEWLINE,
- TOK_KW_FIRST,
- TOK_KW_LAST = 0x1000,
-} TokKind;
-
-typedef enum TokFlag {
- TF_AT_BOL = 1u << 0,
- TF_HAS_SPACE = 1u << 1,
- TF_NO_EXPAND = 1u << 2,
- TF_INT_U = 1u << 3,
- TF_INT_L = 1u << 4,
- TF_INT_LL = 1u << 5,
- TF_FLT_F = 1u << 6,
- TF_FLT_L = 1u << 7,
- TF_STR_WIDE = 1u << 8,
- TF_STR_U8 = 1u << 9,
- TF_STR_U16 = 1u << 10,
- TF_STR_U32 = 1u << 11,
- TF_LITERAL_BAD = 1u << 12,
-} TokFlag;
-
-typedef enum Punct {
- P_NONE = 0,
- P_ARROW = 256,
- P_INC,
- P_DEC,
- P_SHL,
- P_SHR,
- P_LE,
- P_GE,
- P_EQ,
- P_NE,
- P_AND,
- P_OR,
- P_ADD_ASSIGN,
- P_SUB_ASSIGN,
- P_MUL_ASSIGN,
- P_DIV_ASSIGN,
- P_MOD_ASSIGN,
- P_AND_ASSIGN,
- P_OR_ASSIGN,
- P_XOR_ASSIGN,
- P_SHL_ASSIGN,
- P_SHR_ASSIGN,
- P_ELLIPSIS,
- P_HASH_HASH,
-} Punct;
-
-typedef u32 LitId;
-#define LIT_NONE 0u
-
-typedef enum LitKind {
- LIT_INT,
- LIT_FLOAT,
- LIT_STRING,
- LIT_CHAR,
-} LitKind;
-
-typedef enum LitEnc {
- LENC_ORDINARY,
- LENC_UTF8,
- LENC_WIDE,
- LENC_UTF16,
- LENC_UTF32,
-} LitEnc;
-
-typedef struct LitInfo {
- u8 kind;
- u8 enc;
- u16 flags;
- Sym spelling;
- BytesId bytes;
-} LitInfo;
-
-typedef struct Tok {
- u16 kind;
- u16 flags;
- SrcLoc loc;
- Sym spelling;
- LitId lit;
- union {
- Sym ident;
- Sym str;
- u32 punct;
- } v;
-} Tok;
-
-typedef struct Lexer Lexer;
-
-Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len);
-void lex_close(Lexer*);
-
-Tok lex_next(Lexer*);
-SrcLoc lex_loc(const Lexer*);
-u32 lex_file_id(const Lexer*);
-const LitInfo* lex_lit(const Lexer*, LitId);
-
-#endif
diff --git a/src/parse/parse.h b/src/parse/parse.h
@@ -1,11 +0,0 @@
-#ifndef CFREE_PARSE_H
-#define CFREE_PARSE_H
-
-#include "arch/arch.h"
-#include "lex/lex.h"
-
-/* Standalone assembler. Reads tokens directly from a Lexer; emits via
- * MCEmitter. */
-void parse_asm(Compiler*, Lexer*, MCEmitter*);
-
-#endif
diff --git a/src/parse/parse_asm.c b/src/parse/parse_asm.c
@@ -1,983 +0,0 @@
-/* GNU-as compatible assembler driver — arch-agnostic.
- *
- * Reads tokens from a Lexer, dispatches directives, manages labels and
- * section state, and forwards mnemonic lines to the per-arch instruction
- * parser. Output goes through MCEmitter against an ObjBuilder.
- *
- * Lexer quirks worked around here:
- * - `#` is the immediate marker in asm but TOK_PP_HASH in the C lexer.
- * `#` at BOL is a cpp linemarker → skip to next newline; elsewhere
- * the per-arch parser treats it as the immediate prefix.
- * - composite mnemonics (`b.eq`, `b.ne`, ...) arrive as IDENT '.' IDENT
- * and are reassembled before dispatch.
- * - `.text` etc. arrive as PUNCT('.') + IDENT and are stitched here.
- *
- * Symbol bookkeeping: a Sym→ObjSymId map records the symbols introduced
- * by labels, `.globl`, and operand references so a forward reference
- * (`b foo` before `foo:`) shares one symbol with its later definition.
- * A second Sym→AsmEqu map carries `.set`/`.equ` constants. */
-
-#include "parse/parse.h"
-
-#include <stdarg.h>
-#include <string.h>
-
-#include "arch/aa64_asm.h"
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/hashmap.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "lex/lex.h"
-#include "obj/obj.h"
-#include "parse/parse_asm_helpers.h"
-
-HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32);
-HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32);
-
-typedef struct AsmEqu {
- i64 value;
- ObjSymId sym; /* nonzero when value is `sym + offset` */
- u8 has_sym;
- u8 pad[3];
-} AsmEqu;
-HASHMAP_DEFINE(SymEquMap, Sym, AsmEqu, hash_u32);
-
-struct AsmDriver {
- Compiler* c;
- Lexer* lex;
- MCEmitter* mc;
- ObjBuilder* ob;
- Pool* pool;
- Heap* heap;
-
- Tok cur;
- int has_cur;
-
- /* OBJ_SEC_NONE until first emit / explicit `.text` etc. */
- ObjSecId cur_sec;
-
- SymSecMap sec_map;
- SymSymMap sym_map;
- SymEquMap equ_map;
-
- Sym n_text, n_data, n_rodata, n_bss;
-
- /* Per-arch handle. Phase-3 ships aa64 only; phase-5 adds dispatch. */
- AA64Asm* aa64;
-};
-
-/* ---- token plumbing ---- */
-
-static Tok d_peek(AsmDriver* d) {
- if (!d->has_cur) {
- d->cur = lex_next(d->lex);
- d->has_cur = 1;
- }
- return d->cur;
-}
-
-static Tok d_next(AsmDriver* d) {
- Tok t = d_peek(d);
- d->has_cur = 0;
- return t;
-}
-
-static int d_is_eol(AsmDriver* d) {
- Tok t = d_peek(d);
- return t.kind == TOK_NEWLINE || t.kind == TOK_EOF;
-}
-
-static void d_skip_to_eol(AsmDriver* d) {
- while (!d_is_eol(d)) (void)d_next(d);
-}
-
-static void d_eat_eol(AsmDriver* d) {
- Tok t = d_peek(d);
- if (t.kind == TOK_NEWLINE) (void)d_next(d);
-}
-
-static SrcLoc d_loc(AsmDriver* d) {
- if (d->has_cur) return d->cur.loc;
- return lex_loc(d->lex);
-}
-
-_Noreturn static void d_panicf(AsmDriver* d, const char* fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- compiler_panicv(d->c, d_loc(d), fmt, ap);
- /* unreachable; va_end omitted because compiler_panicv is _Noreturn */
-}
-
-/* ---- spelling helpers ---- */
-
-static const char* asm_str(AsmDriver* d, Sym s, size_t* nout) {
- return pool_str(d->pool, s, nout);
-}
-
-static int sym_eq(AsmDriver* d, Sym s, const char* lit) {
- size_t n = 0;
- const char* p = asm_str(d, s, &n);
- size_t i;
- if (!p) return 0;
- for (i = 0; i < n; ++i) {
- if (!lit[i] || p[i] != lit[i]) return 0;
- }
- return lit[n] == '\0';
-}
-
-static int starts_with(AsmDriver* d, Sym s, const char* prefix) {
- size_t n = 0;
- const char* p = asm_str(d, s, &n);
- size_t i;
- if (!p) return 0;
- for (i = 0; prefix[i]; ++i) {
- if (i >= n || p[i] != prefix[i]) return 0;
- }
- return 1;
-}
-
-/* ---- section management ---- */
-
-static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind,
- u16 flags, u32 align) {
- ObjSecId* hit = SymSecMap_get(&d->sec_map, name);
- if (hit) return *hit;
- ObjSecId id = obj_section(d->ob, name, kind, flags, align);
- SymSecMap_set(&d->sec_map, name, id);
- return id;
-}
-
-static void set_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
- u32 align) {
- ObjSecId id = ensure_section(d, name, kind, flags, align);
- d->cur_sec = id;
- d->mc->set_section(d->mc, id);
-}
-
-/* ---- symbol management ---- */
-
-static ObjSymId intern_sym(AsmDriver* d, Sym name) {
- ObjSymId* hit = SymSymMap_get(&d->sym_map, name);
- if (hit) return *hit;
- ObjSymId id = obj_symbol_find(d->ob, name);
- if (id == OBJ_SYM_NONE) {
- id = obj_symbol_ex(d->ob, name, SB_LOCAL, SV_DEFAULT, SK_NOTYPE,
- OBJ_SEC_NONE, 0, 0, 0);
- }
- SymSymMap_set(&d->sym_map, name, id);
- return id;
-}
-
-static ObjSym* sym_mut(AsmDriver* d, ObjSymId id) {
- /* obj.h gives us a const view via obj_symbol_get; the underlying
- * record lives in the builder's arena and is safe to mutate
- * pre-finalize. Wrapping the cast keeps the const-stripping in
- * one place. */
- return (ObjSym*)obj_symbol_get(d->ob, id);
-}
-
-/* ---- expression evaluator (constants + sym ± const) ---- */
-
-typedef struct AsmExpr {
- ObjSymId sym;
- i64 value;
-} AsmExpr;
-
-static AsmExpr expr_c(i64 v) { AsmExpr e = {OBJ_SYM_NONE, v}; return e; }
-static AsmExpr expr_s(ObjSymId s, i64 v) { AsmExpr e = {s, v}; return e; }
-
-static int tok_is_punct(Tok t, u32 p) {
- return t.kind == TOK_PUNCT && t.v.punct == p;
-}
-
-static i64 lit_to_i64(AsmDriver* d, Sym spelling) {
- size_t n = 0;
- const char* p = asm_str(d, spelling, &n);
- u64 v = 0;
- int base = 10;
- size_t i = 0;
- if (!p || !n) return 0;
- if (n >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- base = 16; i = 2;
- } else if (n >= 2 && p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
- base = 2; i = 2;
- } else if (n >= 1 && p[0] == '0') {
- base = 8; i = 1;
- }
- for (; i < n; ++i) {
- char c = p[i];
- u32 dv;
- if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
- if (c >= '0' && c <= '9') dv = (u32)(c - '0');
- else if (c >= 'a' && c <= 'f') dv = 10 + (u32)(c - 'a');
- else if (c >= 'A' && c <= 'F') dv = 10 + (u32)(c - 'A');
- else d_panicf(d, "asm: bad digit in integer literal");
- if (dv >= (u32)base) d_panicf(d, "asm: digit out of base");
- v = v * (u64)base + dv;
- }
- return (i64)v;
-}
-
-static AsmExpr parse_expr(AsmDriver*);
-static AsmExpr parse_unary(AsmDriver*);
-
-static AsmExpr parse_primary(AsmDriver* d) {
- Tok t = d_peek(d);
- if (t.kind == TOK_NUM) {
- (void)d_next(d);
- return expr_c(lit_to_i64(d, t.spelling));
- }
- if (t.kind == TOK_IDENT) {
- (void)d_next(d);
- AsmEqu* eq = SymEquMap_get(&d->equ_map, t.v.ident);
- if (eq) {
- if (eq->has_sym) return expr_s(eq->sym, eq->value);
- return expr_c(eq->value);
- }
- return expr_s(intern_sym(d, t.v.ident), 0);
- }
- if (tok_is_punct(t, '(')) {
- (void)d_next(d);
- AsmExpr e = parse_expr(d);
- Tok cl = d_peek(d);
- if (!tok_is_punct(cl, ')')) d_panicf(d, "asm: expected ')'");
- (void)d_next(d);
- return e;
- }
- d_panicf(d, "asm: expected expression");
-}
-
-static AsmExpr parse_unary(AsmDriver* d) {
- Tok t = d_peek(d);
- if (tok_is_punct(t, '-')) {
- (void)d_next(d);
- AsmExpr e = parse_unary(d);
- if (e.sym) d_panicf(d, "asm: unary '-' on symbol");
- return expr_c(-e.value);
- }
- if (tok_is_punct(t, '+')) {
- (void)d_next(d);
- return parse_unary(d);
- }
- if (tok_is_punct(t, '~')) {
- (void)d_next(d);
- AsmExpr e = parse_unary(d);
- if (e.sym) d_panicf(d, "asm: unary '~' on symbol");
- return expr_c(~e.value);
- }
- return parse_primary(d);
-}
-
-static AsmExpr parse_mul(AsmDriver* d) {
- AsmExpr a = parse_unary(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '*') && !tok_is_punct(t, '/') &&
- !tok_is_punct(t, '%')) return a;
- u32 op = t.v.punct;
- (void)d_next(d);
- AsmExpr b = parse_unary(d);
- if (a.sym || b.sym) d_panicf(d, "asm: '*/%%' on symbolic operand");
- if (op == '*') a.value *= b.value;
- else if (op == '/') {
- if (!b.value) d_panicf(d, "asm: division by zero");
- a.value /= b.value;
- } else {
- if (!b.value) d_panicf(d, "asm: modulo by zero");
- a.value %= b.value;
- }
- }
-}
-
-static AsmExpr parse_add(AsmDriver* d) {
- AsmExpr a = parse_mul(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '+') && !tok_is_punct(t, '-')) return a;
- u32 op = t.v.punct;
- (void)d_next(d);
- AsmExpr b = parse_mul(d);
- if (op == '+') {
- if (a.sym && b.sym) d_panicf(d, "asm: cannot add two symbols");
- if (b.sym) { a.sym = b.sym; a.value += b.value; }
- else a.value += b.value;
- } else {
- if (b.sym) d_panicf(d, "asm: cannot subtract symbol from constant");
- a.value -= b.value;
- }
- }
-}
-
-static AsmExpr parse_shift(AsmDriver* d) {
- AsmExpr a = parse_add(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, P_SHL) && !tok_is_punct(t, P_SHR)) return a;
- u32 op = t.v.punct;
- (void)d_next(d);
- AsmExpr b = parse_add(d);
- if (a.sym || b.sym) d_panicf(d, "asm: shift on symbolic operand");
- if (op == P_SHL) a.value = (i64)((u64)a.value << (b.value & 63));
- else a.value = a.value >> (b.value & 63);
- }
-}
-
-static AsmExpr parse_band(AsmDriver* d) {
- AsmExpr a = parse_shift(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '&')) return a;
- (void)d_next(d);
- AsmExpr b = parse_shift(d);
- if (a.sym || b.sym) d_panicf(d, "asm: '&' on symbolic operand");
- a.value &= b.value;
- }
-}
-
-static AsmExpr parse_bxor(AsmDriver* d) {
- AsmExpr a = parse_band(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '^')) return a;
- (void)d_next(d);
- AsmExpr b = parse_band(d);
- if (a.sym || b.sym) d_panicf(d, "asm: '^' on symbolic operand");
- a.value ^= b.value;
- }
-}
-
-static AsmExpr parse_bor(AsmDriver* d) {
- AsmExpr a = parse_bxor(d);
- for (;;) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '|')) return a;
- (void)d_next(d);
- AsmExpr b = parse_bxor(d);
- if (a.sym || b.sym) d_panicf(d, "asm: '|' on symbolic operand");
- a.value |= b.value;
- }
-}
-
-static AsmExpr parse_expr(AsmDriver* d) { return parse_bor(d); }
-
-/* ---- public helpers exposed to per-arch parser ---- */
-
-Tok asm_driver_peek(AsmDriver* d) { return d_peek(d); }
-Tok asm_driver_next(AsmDriver* d) { return d_next(d); }
-int asm_driver_at_eol(AsmDriver* d) { return d_is_eol(d); }
-SrcLoc asm_driver_loc(AsmDriver* d) { return d_loc(d); }
-MCEmitter* asm_driver_mc(AsmDriver* d) { return d->mc; }
-ObjBuilder* asm_driver_ob(AsmDriver* d) { return d->ob; }
-Compiler* asm_driver_compiler(AsmDriver* d) { return d->c; }
-Pool* asm_driver_pool(AsmDriver* d) { return d->pool; }
-
-_Noreturn void asm_driver_panic(AsmDriver* d, const char* fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- compiler_panicv(d->c, d_loc(d), fmt, ap);
-}
-
-ObjSymId asm_driver_intern_sym(AsmDriver* d, Sym name) {
- return intern_sym(d, name);
-}
-
-ObjSecId asm_driver_cur_section(AsmDriver* d) {
- if (d->cur_sec == OBJ_SEC_NONE) {
- if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text");
- d->cur_sec = ensure_section(d, d->n_text, SEC_TEXT,
- (u16)(SF_ALLOC | SF_EXEC), 4);
- d->mc->set_section(d->mc, d->cur_sec);
- }
- return d->cur_sec;
-}
-
-int asm_driver_eat_comma(AsmDriver* d) {
- Tok t = d_peek(d);
- if (tok_is_punct(t, ',')) {
- (void)d_next(d);
- return 1;
- }
- return 0;
-}
-
-int asm_driver_eat_punct(AsmDriver* d, u32 p) {
- Tok t = d_peek(d);
- if (tok_is_punct(t, p)) {
- (void)d_next(d);
- return 1;
- }
- /* `#` arrives as TOK_PP_HASH from the C lexer; accept it as the
- * immediate-prefix punctuator here. */
- if (p == '#' && t.kind == TOK_PP_HASH) {
- (void)d_next(d);
- return 1;
- }
- return 0;
-}
-
-void asm_driver_expect_punct(AsmDriver* d, u32 p, const char* what) {
- if (!asm_driver_eat_punct(d, p))
- d_panicf(d, "asm: expected '%s' (%s)", "punct", what);
-}
-
-i64 asm_driver_parse_const(AsmDriver* d) {
- AsmExpr e = parse_expr(d);
- if (e.sym) d_panicf(d, "asm: constant expression expected");
- return e.value;
-}
-
-void asm_driver_parse_sym_expr(AsmDriver* d, ObjSymId* sym_out,
- i64* off_out) {
- AsmExpr e = parse_expr(d);
- *sym_out = e.sym;
- *off_out = e.value;
-}
-
-int asm_driver_tok_is_punct(Tok t, u32 p) {
- if (tok_is_punct(t, p)) return 1;
- /* `#` arrives as TOK_PP_HASH from the C lexer. */
- if (p == '#' && t.kind == TOK_PP_HASH) return 1;
- return 0;
-}
-
-/* ---- string-literal decoding ---- */
-
-static void decode_string(AsmDriver* d, Sym spelling, u8** out, u32* nout) {
- size_t n = 0;
- const char* p = asm_str(d, spelling, &n);
- /* Skip any encoding prefix (L/u/u8/U). */
- while (n && (*p == 'L' || *p == 'u' || *p == 'U' || *p == '8')) {
- ++p;
- --n;
- }
- if (n < 2 || p[0] != '"' || p[n - 1] != '"')
- d_panicf(d, "asm: malformed string literal");
- size_t cap = n;
- u8* buf = (u8*)d->heap->alloc(d->heap, cap ? cap : 1, 1);
- u32 k = 0;
- for (size_t i = 1; i + 1 < n; ++i) {
- char c = p[i];
- if (c != '\\') {
- buf[k++] = (u8)c;
- continue;
- }
- ++i;
- if (i + 1 >= n) break;
- char e = p[i];
- switch (e) {
- case 'n': buf[k++] = '\n'; break;
- case 't': buf[k++] = '\t'; break;
- case 'r': buf[k++] = '\r'; break;
- case '\\': buf[k++] = '\\'; break;
- case '"': buf[k++] = '"'; break;
- case '\'': buf[k++] = '\''; break;
- case '0': buf[k++] = 0; break;
- case 'b': buf[k++] = 8; break;
- case 'f': buf[k++] = 12; break;
- case 'v': buf[k++] = 11; break;
- case 'a': buf[k++] = 7; break;
- case 'x': {
- u32 v = 0;
- int dn = 0;
- while (i + 2 < n) {
- char h = p[i + 1];
- int dv;
- if (h >= '0' && h <= '9') dv = h - '0';
- else if (h >= 'a' && h <= 'f') dv = 10 + (h - 'a');
- else if (h >= 'A' && h <= 'F') dv = 10 + (h - 'A');
- else break;
- v = v * 16 + (u32)dv;
- ++i;
- if (++dn >= 2) break;
- }
- buf[k++] = (u8)v;
- break;
- }
- default:
- if (e >= '0' && e <= '7') {
- u32 v = (u32)(e - '0');
- int dn = 1;
- while (dn < 3 && i + 2 < n) {
- char h = p[i + 1];
- if (h < '0' || h > '7') break;
- v = v * 8 + (u32)(h - '0');
- ++i;
- ++dn;
- }
- buf[k++] = (u8)v;
- } else {
- buf[k++] = (u8)e;
- }
- break;
- }
- }
- *out = buf;
- *nout = k;
-}
-
-/* ---- directives ---- */
-
-static Sym expect_ident(AsmDriver* d, const char* what) {
- Tok t = d_peek(d);
- if (t.kind != TOK_IDENT) d_panicf(d, "asm: %s: expected identifier", what);
- (void)d_next(d);
- return t.v.ident;
-}
-
-static void emit_le(AsmDriver* d, u64 v, u32 width) {
- u8 buf[8];
- for (u32 i = 0; i < width; ++i) buf[i] = (u8)(v >> (8 * i));
- (void)asm_driver_cur_section(d);
- d->mc->emit_bytes(d->mc, buf, width);
-}
-
-static void emit_int_directive(AsmDriver* d, u32 width) {
- for (;;) {
- AsmExpr e = parse_expr(d);
- if (e.sym) {
- RelocKind k;
- if (width == 4) k = R_ABS32;
- else if (width == 8) k = R_ABS64;
- else d_panicf(d, "asm: symbolic .byte/.hword not supported");
- (void)asm_driver_cur_section(d);
- u32 ofs = d->mc->pos(d->mc);
- u8 zero[8] = {0};
- d->mc->emit_bytes(d->mc, zero, width);
- d->mc->emit_reloc_at(d->mc, d->cur_sec, ofs, k, e.sym, e.value, 1, 0);
- } else {
- emit_le(d, (u64)e.value, width);
- }
- if (!asm_driver_eat_comma(d)) break;
- }
-}
-
-static void do_directive(AsmDriver* d, Sym name) {
- if (sym_eq(d, name, "text")) {
- if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text");
- set_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "data")) {
- if (!d->n_data) d->n_data = pool_intern_cstr(d->pool, ".data");
- set_section(d, d->n_data, SEC_DATA, (u16)(SF_ALLOC | SF_WRITE), 8);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "rodata")) {
- if (!d->n_rodata) d->n_rodata = pool_intern_cstr(d->pool, ".rodata");
- set_section(d, d->n_rodata, SEC_RODATA, (u16)SF_ALLOC, 8);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "bss")) {
- if (!d->n_bss) d->n_bss = pool_intern_cstr(d->pool, ".bss");
- set_section(d, d->n_bss, SEC_BSS, (u16)(SF_ALLOC | SF_WRITE), 8);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "section")) {
- Sym sname = 0;
- Tok t = d_peek(d);
- if (t.kind == TOK_IDENT) {
- sname = t.v.ident;
- (void)d_next(d);
- } else if (t.kind == TOK_STR) {
- size_t n = 0;
- const char* p = asm_str(d, t.spelling, &n);
- if (n >= 2 && p[0] == '"') sname = pool_intern(d->pool, p + 1, n - 2);
- (void)d_next(d);
- } else if (tok_is_punct(t, '.')) {
- (void)d_next(d);
- Tok id = d_next(d);
- if (id.kind != TOK_IDENT) d_panicf(d, "asm: .section: bad name");
- size_t ni = 0;
- const char* nm = asm_str(d, id.v.ident, &ni);
- char buf[128];
- if (ni + 1 >= sizeof buf) d_panicf(d, "asm: .section: name too long");
- buf[0] = '.';
- for (size_t i = 0; i < ni; ++i) buf[i + 1] = nm[i];
- sname = pool_intern(d->pool, buf, ni + 1);
- } else {
- d_panicf(d, "asm: .section: expected name");
- }
- SecKind kind = SEC_OTHER;
- u16 flags = 0;
- {
- size_t nn = 0;
- const char* p = asm_str(d, sname, &nn);
- if (p) {
- if (nn >= 5 && memcmp(p, ".text", 5) == 0) {
- kind = SEC_TEXT;
- flags = (u16)(SF_ALLOC | SF_EXEC);
- } else if (nn >= 7 && memcmp(p, ".rodata", 7) == 0) {
- kind = SEC_RODATA;
- flags = (u16)SF_ALLOC;
- } else if (nn >= 5 && memcmp(p, ".data", 5) == 0) {
- kind = SEC_DATA;
- flags = (u16)(SF_ALLOC | SF_WRITE);
- } else if (nn >= 4 && memcmp(p, ".bss", 4) == 0) {
- kind = SEC_BSS;
- flags = (u16)(SF_ALLOC | SF_WRITE);
- }
- }
- }
- /* Skip optional remainder: flags string, type tag, etc. */
- d_skip_to_eol(d);
- set_section(d, sname, kind, flags, 1);
- return;
- }
- if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) {
- Sym n = expect_ident(d, ".globl");
- sym_mut(d, intern_sym(d, n))->bind = (u16)SB_GLOBAL;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "local")) {
- Sym n = expect_ident(d, ".local");
- sym_mut(d, intern_sym(d, n))->bind = (u16)SB_LOCAL;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "weak")) {
- Sym n = expect_ident(d, ".weak");
- sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "hidden")) {
- Sym n = expect_ident(d, ".hidden");
- sym_mut(d, intern_sym(d, n))->vis = (u8)SV_HIDDEN;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "protected")) {
- Sym n = expect_ident(d, ".protected");
- sym_mut(d, intern_sym(d, n))->vis = (u8)SV_PROTECTED;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "internal")) {
- Sym n = expect_ident(d, ".internal");
- sym_mut(d, intern_sym(d, n))->vis = (u8)SV_INTERNAL;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "type")) {
- Sym n = expect_ident(d, ".type");
- ObjSymId id = intern_sym(d, n);
- if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .type: expected ','");
- Tok t = d_next(d);
- Sym tag = 0;
- if (tok_is_punct(t, '@') || tok_is_punct(t, '%')) {
- Tok ti = d_next(d);
- if (ti.kind != TOK_IDENT) d_panicf(d, "asm: .type: tag");
- tag = ti.v.ident;
- } else if (t.kind == TOK_IDENT) {
- tag = t.v.ident;
- } else if (t.kind == TOK_STR) {
- size_t sn = 0;
- const char* sp = asm_str(d, t.spelling, &sn);
- if (sn >= 2 && sp[0] == '"' && sp[sn - 1] == '"')
- tag = pool_intern(d->pool, sp + 1, sn - 2);
- } else {
- d_panicf(d, "asm: .type: tag");
- }
- if (tag && sym_eq(d, tag, "function"))
- sym_mut(d, id)->kind = (u16)SK_FUNC;
- else if (tag && sym_eq(d, tag, "object"))
- sym_mut(d, id)->kind = (u16)SK_OBJ;
- else if (tag && sym_eq(d, tag, "tls_object"))
- sym_mut(d, id)->kind = (u16)SK_TLS;
- else if (tag && sym_eq(d, tag, "gnu_indirect_function"))
- sym_mut(d, id)->kind = (u16)SK_IFUNC;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "size")) {
- Sym n = expect_ident(d, ".size");
- ObjSymId id = intern_sym(d, n);
- if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .size: expected ','");
- /* Recognize `. - NAME`. */
- Tok t = d_peek(d);
- i64 sz = 0;
- if (tok_is_punct(t, '.')) {
- (void)d_next(d);
- if (tok_is_punct(d_peek(d), '-')) {
- (void)d_next(d);
- Tok rid = d_peek(d);
- if (rid.kind == TOK_IDENT && rid.v.ident == n) {
- (void)d_next(d);
- const ObjSym* os = obj_symbol_get(d->ob, id);
- if (os && os->section_id == d->cur_sec)
- sz = (i64)d->mc->pos(d->mc) - (i64)os->value;
- }
- }
- } else {
- AsmExpr e = parse_expr(d);
- if (!e.sym) sz = e.value;
- }
- if (sz < 0) sz = 0;
- sym_mut(d, id)->size = (u64)sz;
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "byte")) {
- emit_int_directive(d, 1);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "hword") || sym_eq(d, name, "short") ||
- sym_eq(d, name, "2byte")) {
- emit_int_directive(d, 2);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "word") || sym_eq(d, name, "long") ||
- sym_eq(d, name, "int") || sym_eq(d, name, "4byte")) {
- emit_int_directive(d, 4);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "quad") || sym_eq(d, name, "8byte") ||
- sym_eq(d, name, "dword") || sym_eq(d, name, "xword")) {
- emit_int_directive(d, 8);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "ascii") || sym_eq(d, name, "asciz") ||
- sym_eq(d, name, "string")) {
- int term = !sym_eq(d, name, "ascii");
- for (;;) {
- Tok t = d_peek(d);
- if (t.kind != TOK_STR)
- d_panicf(d, "asm: .ascii/.string: expected string");
- (void)d_next(d);
- u8* buf = NULL;
- u32 n = 0;
- decode_string(d, t.spelling, &buf, &n);
- (void)asm_driver_cur_section(d);
- d->mc->emit_bytes(d->mc, buf, n);
- if (term) emit_le(d, 0, 1);
- d->heap->free(d->heap, buf, n);
- if (!asm_driver_eat_comma(d)) break;
- }
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "zero") || sym_eq(d, name, "skip") ||
- sym_eq(d, name, "space")) {
- i64 n = asm_driver_parse_const(d);
- i64 fill = 0;
- if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
- if (n > 0) {
- (void)asm_driver_cur_section(d);
- d->mc->emit_fill(d->mc, (size_t)n, (u8)fill);
- }
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "fill")) {
- i64 n = asm_driver_parse_const(d);
- i64 size = 1, val = 0;
- if (asm_driver_eat_comma(d)) size = asm_driver_parse_const(d);
- if (asm_driver_eat_comma(d)) val = asm_driver_parse_const(d);
- if (size < 1 || size > 8) d_panicf(d, "asm: .fill: size out of range");
- (void)asm_driver_cur_section(d);
- for (i64 i = 0; i < n; ++i) emit_le(d, (u64)val, (u32)size);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "align") || sym_eq(d, name, "balign")) {
- i64 a = asm_driver_parse_const(d);
- i64 fill = 0;
- if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
- if (a <= 0 || (a & (a - 1))) d_panicf(d, "asm: .align: not a power of 2");
- (void)asm_driver_cur_section(d);
- d->mc->emit_align(d->mc, (u32)a, (u8)fill);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "p2align")) {
- i64 lg = asm_driver_parse_const(d);
- i64 fill = 0;
- if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
- if (lg < 0 || lg > 16) d_panicf(d, "asm: .p2align: out of range");
- (void)asm_driver_cur_section(d);
- d->mc->emit_align(d->mc, 1u << (u32)lg, (u8)fill);
- d_skip_to_eol(d);
- return;
- }
- if (sym_eq(d, name, "set") || sym_eq(d, name, "equ")) {
- Sym n = expect_ident(d, ".set");
- if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .set: expected ','");
- AsmExpr e = parse_expr(d);
- AsmEqu eq;
- eq.value = e.value;
- eq.sym = e.sym;
- eq.has_sym = e.sym ? 1 : 0;
- eq.pad[0] = eq.pad[1] = eq.pad[2] = 0;
- SymEquMap_set(&d->equ_map, n, eq);
- d_skip_to_eol(d);
- return;
- }
-
- /* CFI block + accepted-but-ignored directives. Keep parser
- * forward-progress without aborting the whole TU. */
- if (starts_with(d, name, "cfi_") ||
- sym_eq(d, name, "file") || sym_eq(d, name, "loc") ||
- sym_eq(d, name, "ident") || sym_eq(d, name, "popsection") ||
- sym_eq(d, name, "pushsection") || sym_eq(d, name, "previous") ||
- sym_eq(d, name, "subsections_via_symbols") ||
- sym_eq(d, name, "comm") || sym_eq(d, name, "lcomm") ||
- sym_eq(d, name, "uleb128") || sym_eq(d, name, "sleb128") ||
- sym_eq(d, name, "macro") || sym_eq(d, name, "endm") ||
- sym_eq(d, name, "if") || sym_eq(d, name, "endif") ||
- sym_eq(d, name, "else") || sym_eq(d, name, "include")) {
- d_skip_to_eol(d);
- return;
- }
-
- /* Unknown directive — recover. */
- d_skip_to_eol(d);
-}
-
-/* ---- driver loop ---- */
-
-static void process_label(AsmDriver* d, Sym name) {
- ObjSymId id = intern_sym(d, name);
- (void)asm_driver_cur_section(d);
- const ObjSym* os = obj_symbol_get(d->ob, id);
- if (os && os->section_id != OBJ_SEC_NONE)
- d_panicf(d, "asm: symbol defined twice");
- obj_symbol_define(d->ob, id, d->cur_sec, (u64)d->mc->pos(d->mc), 0);
- /* Promote SK_UNDEF (forward ref via reloc) to SK_NOTYPE so it's a
- * real defined symbol; explicit `.type SYM, @function` will refine. */
- if (os && os->kind == SK_UNDEF) sym_mut(d, id)->kind = (u16)SK_NOTYPE;
-}
-
-static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) {
- Tok t = d_peek(d);
- if (!tok_is_punct(t, '.')) return head;
- if (t.flags & TF_HAS_SPACE) return head;
- (void)d_next(d);
- Tok rest = d_next(d);
- if (rest.kind != TOK_IDENT)
- d_panicf(d, "asm: composite mnemonic: expected ident");
- size_t hn = 0, rn = 0;
- const char* hp = asm_str(d, head, &hn);
- const char* rp = asm_str(d, rest.v.ident, &rn);
- size_t n = hn + 1 + rn;
- if (n >= 64) d_panicf(d, "asm: mnemonic too long");
- char buf[64];
- for (size_t i = 0; i < hn; ++i) buf[i] = hp[i];
- buf[hn] = '.';
- for (size_t i = 0; i < rn; ++i) buf[hn + 1 + i] = rp[i];
- return pool_intern(d->pool, buf, n);
-}
-
-/* ---- inline-asm driver constructor ----
- *
- * Inline-asm template walkers (per-arch) re-lex pre-substituted source
- * text through the same per-mnemonic parsers used by the standalone .s
- * driver. This constructor builds a minimally-initialized AsmDriver
- * around a caller-supplied memory-backed Lexer + MCEmitter.
- *
- * The driver does not own the Lexer or MCEmitter, does not allocate a
- * default section (inline asm emits into whatever section the wrapping
- * cg has selected on its MCEmitter), and skips the standalone driver's
- * per-arch handle (`d->aa64`) — the caller has already opened its own
- * AA64Asm to thread per-block bound state through. */
-AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, Lexer* lex) {
- Heap* heap = (Heap*)c->env->heap;
- AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver));
- memset(d, 0, sizeof *d);
- d->c = c;
- d->lex = lex;
- d->mc = mc;
- d->ob = mc->obj;
- d->pool = c->global;
- d->heap = heap;
- /* The MCEmitter's section is whatever cg has set; do not override it.
- * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use
- * mc->section_id directly via asm_driver_cur_section's lazy init for
- * standalone, but inline asm should never reach that path because the
- * MCEmitter already has its section. Pre-seed cur_sec from the
- * MCEmitter so emit_reloc_at calls get the right section id. */
- d->cur_sec = mc->section_id;
- SymSecMap_init(&d->sec_map, heap);
- SymSymMap_init(&d->sym_map, heap);
- SymEquMap_init(&d->equ_map, heap);
- d->aa64 = NULL; /* caller owns its own AA64Asm */
- return d;
-}
-
-void asm_driver_close_inline(AsmDriver* d) {
- if (!d) return;
- SymSecMap_fini(&d->sec_map);
- SymSymMap_fini(&d->sym_map);
- SymEquMap_fini(&d->equ_map);
- Heap* heap = d->heap;
- heap->free(heap, d, sizeof *d);
-}
-
-void parse_asm(Compiler* c, Lexer* l, MCEmitter* mc) {
- AsmDriver d;
- memset(&d, 0, sizeof d);
- d.c = c;
- d.lex = l;
- d.mc = mc;
- d.ob = mc->obj;
- d.pool = c->global;
- d.heap = (Heap*)c->env->heap;
- d.cur_sec = OBJ_SEC_NONE;
- SymSecMap_init(&d.sec_map, d.heap);
- SymSymMap_init(&d.sym_map, d.heap);
- SymEquMap_init(&d.equ_map, d.heap);
- d.aa64 = aa64_asm_open(c);
-
- for (;;) {
- Tok t = d_peek(&d);
- if (t.kind == TOK_EOF) break;
- if (t.kind == TOK_NEWLINE) {
- (void)d_next(&d);
- continue;
- }
- if (t.kind == TOK_PP_HASH) {
- /* cpp-style linemarker; skip the whole line. */
- d_skip_to_eol(&d);
- continue;
- }
- if (tok_is_punct(t, '.')) {
- (void)d_next(&d);
- Tok id = d_next(&d);
- if (id.kind != TOK_IDENT)
- d_panicf(&d, "asm: expected directive name after '.'");
- do_directive(&d, id.v.ident);
- d_eat_eol(&d);
- continue;
- }
- if (t.kind == TOK_IDENT) {
- Sym head = t.v.ident;
- (void)d_next(&d);
- Tok nxt = d_peek(&d);
- if (tok_is_punct(nxt, ':')) {
- (void)d_next(&d);
- process_label(&d, head);
- continue;
- }
- Sym mnemonic = maybe_compose_mnemonic(&d, head);
- aa64_asm_insn(d.aa64, &d, mnemonic);
- d_skip_to_eol(&d);
- continue;
- }
- /* Anything else: recover by skipping the line. */
- d_skip_to_eol(&d);
- }
-
- aa64_asm_close(d.aa64);
- SymSecMap_fini(&d.sec_map);
- SymSymMap_fini(&d.sym_map);
- SymEquMap_fini(&d.equ_map);
-}
diff --git a/src/parse/parse_asm_helpers.h b/src/parse/parse_asm_helpers.h
@@ -1,63 +0,0 @@
-#ifndef CFREE_PARSE_ASM_HELPERS_H
-#define CFREE_PARSE_ASM_HELPERS_H
-
-/* Lightweight asm-driver surface consumed by per-arch instruction
- * parsers. The driver itself is opaque to per-arch code; these helpers
- * are the only seam. Implementations live in src/parse/parse_asm.c. */
-
-#include "arch/arch.h"
-#include "core/core.h"
-#include "lex/lex.h"
-#include "obj/obj.h"
-
-typedef struct AsmDriver AsmDriver;
-
-/* ---- token plumbing ---- */
-Tok asm_driver_peek(AsmDriver*);
-Tok asm_driver_next(AsmDriver*);
-int asm_driver_at_eol(AsmDriver*);
-int asm_driver_tok_is_punct(Tok t, u32 p);
-int asm_driver_eat_comma(AsmDriver*);
-int asm_driver_eat_punct(AsmDriver*, u32 punct);
-void asm_driver_expect_punct(AsmDriver*, u32 punct, const char* what);
-
-/* Source position for diagnostics. */
-SrcLoc asm_driver_loc(AsmDriver*);
-
-/* Owning subsystems. */
-MCEmitter* asm_driver_mc(AsmDriver*);
-ObjBuilder* asm_driver_ob(AsmDriver*);
-Compiler* asm_driver_compiler(AsmDriver*);
-Pool* asm_driver_pool(AsmDriver*);
-ObjSecId asm_driver_cur_section(AsmDriver*);
-
-/* Diagnostics: emits then longjmps via Compiler.panic. No return. */
-_Noreturn void asm_driver_panic(AsmDriver*, const char* fmt, ...);
-
-/* ---- symbol + expression parsing ---- */
-ObjSymId asm_driver_intern_sym(AsmDriver*, Sym name);
-
-/* Parse a constant integer expression. Panics if the expression
- * references a symbol. */
-i64 asm_driver_parse_const(AsmDriver*);
-
-/* Parse a `sym ± const` expression. Both outputs valid: pure constants
- * leave *sym_out == OBJ_SYM_NONE. */
-void asm_driver_parse_sym_expr(AsmDriver*, ObjSymId* sym_out, i64* off_out);
-
-/* ---- inline-asm constructor ----
- *
- * Build an AsmDriver around a memory-backed Lexer + caller-supplied
- * MCEmitter. Used by inline-asm template walkers (one driver per asm
- * line) to reuse the existing per-arch instruction parsers verbatim
- * over a substituted source buffer.
- *
- * The driver is heap-allocated through c->env->heap and must be released
- * with asm_driver_close_inline. It does not own the Lexer or the
- * MCEmitter — the caller retains ownership of both. The driver does
- * not initialize a default section; inline asm always emits into the
- * MCEmitter's currently-active section. */
-AsmDriver* asm_driver_open_inline(Compiler*, MCEmitter*, Lexer*);
-void asm_driver_close_inline(AsmDriver*);
-
-#endif
diff --git a/test/asm/CORPUS.md b/test/asm/CORPUS.md
@@ -55,7 +55,7 @@ as the exit code, mirroring the test/parse and test/cg conventions.
- `<name>.expected.lst` — golden listing for L.
- `<name>.skip` — single-line reason. The case is reported as
SKIP for every path it applies to. Every phase-1 case carries one
- because the underlying APIs (`parse_asm`, `cfree_disasm_iter_*`,
+ because the underlying APIs (`asm_parse`, `cfree_disasm_iter_*`,
`cfree_obj_disasm`) are still stubs. They drop as the matching
subsystems land.
diff --git a/test/asm/harness/asm_runner.c b/test/asm/harness/asm_runner.c
@@ -10,7 +10,7 @@
* consumers take). Built once; the shell runner walks the sub-corpora
* and invokes one mode per case-path pair.
*
- * Phase 1: parse_asm and the disasm iterator are still stubs in
+ * Phase 1: asm_parse and the disasm iterator are still stubs in
* src/api/stubs.c. The runner returns nonzero when the underlying API
* fails; smoke cases each carry a .skip sidecar so the harness reports
* them cleanly until phases 3 and 4 land.
diff --git a/test/asm/run.sh b/test/asm/run.sh
@@ -27,7 +27,7 @@
# Reuses the test/link harness binaries (link-exe-runner, jit-runner) plus
# test/link/harness/start.c verbatim — same convention as test/parse/run.sh.
#
-# Phase 1 (doc/ASM.md §5): parse_asm and the disasm iterator are still
+# Phase 1 (doc/ASM.md §5): asm_parse and the disasm iterator are still
# stubs in src/api/stubs.c. Every smoke case carries a .skip sidecar so
# the harness reports SKIP cleanly; the wiring still runs on every CI
# pass. CFREE_TEST_ALLOW_SKIP defaults to 1 here for the duration of
@@ -68,7 +68,7 @@ CLANG_TARGET="--target=$CLANG_TRIPLE"
CC="${CC:-cc}"
HARNESS_CFLAGS="-std=c11 -Wall -Wextra -I$ROOT/include -I$ROOT/test"
# Phase 1: ALLOW_SKIP defaults to 1 (smoke cases skip cleanly because
-# parse_asm / cfree_disasm_iter_* are still stubs). Flip to 0 once the
+# asm_parse / cfree_disasm_iter_* are still stubs). Flip to 0 once the
# assembler / disassembler land.
ALLOW_SKIP="${CFREE_TEST_ALLOW_SKIP:-1}"
diff --git a/test/test.mk b/test/test.mk
@@ -22,7 +22,7 @@
# - test-asm: file-driven assembler/disassembler harness in test/asm/.
# Three sub-corpora (encode/, decode/, listing/), one mode per
# sub-dir. Phase 1: every smoke case carries a .skip sidecar because
-# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds
+# asm_parse / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64