kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit bf6d1df8204f07146354dce4f52030f65135cf08
parent 5336a9a232e0099fbfbbabb4eb2be3e4cab93c02
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 10:04:00 -0700

Move assembler parsing under src/asm

Diffstat:
Msrc/api/ar.c | 2+-
Msrc/api/pipeline.c | 22++++++++++++----------
Msrc/api/stubs.c | 4++--
Msrc/arch/aa64_asm.c | 72++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/arch/aa64_asm.h | 6+++---
Msrc/arch/mc.c | 2+-
Asrc/asm/asm.c | 1032+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/asm/asm.h | 11+++++++++++
Asrc/asm/asm_helpers.h | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/asm/asm_lex.c | 705+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/asm/asm_lex.h | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/core/source.c | 2+-
Dsrc/lex/lex.h | 114-------------------------------------------------------------------------------
Dsrc/parse/parse.h | 11-----------
Dsrc/parse/parse_asm.c | 983-------------------------------------------------------------------------------
Dsrc/parse/parse_asm_helpers.h | 63---------------------------------------------------------------
Mtest/asm/CORPUS.md | 2+-
Mtest/asm/harness/asm_runner.c | 2+-
Mtest/asm/run.sh | 4++--
Mtest/test.mk | 2+-
20 files changed, 1983 insertions(+), 1230 deletions(-)

diff --git a/src/api/ar.c b/src/api/ar.c @@ -1,6 +1,6 @@ /* POSIX ar archive reader/writer (cfree_ar_write / cfree_ar_iter / * cfree_ar_list). Pure format I/O over CfreeWriter and a const byte - * range — no pp/lex/parse/cg/obj dependencies. Kept in its own TU so + * range — no C frontend/cg/obj dependencies. Kept in its own TU so * consumers that only need the ar surface (e.g. test/ar_test, the * driver's `ar` and `ld` paths once split out) don't drag in the full * compile/link pipeline through the linker. diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -7,12 +7,12 @@ #include <cfree.h> #include "arch/arch.h" +#include "asm/asm.h" #include "core/arena.h" #include "core/heap.h" #include "core/pool.h" #include "link/link.h" #include "obj/obj.h" -#include "parse/parse.h" /* CfreeCompiler lifecycle (cfree_compiler_new / cfree_compiler_free) * lives in src/api/lifecycle.c so consumers that only need lifecycle @@ -46,13 +46,13 @@ static _Noreturn void panic_bad_options(Compiler* c, const char* msg) { /* One-TU compile against a fresh ObjBuilder. The builder is finalized on * exit so it is immediately consumable by the linker or an emit_* function. - * The input bytes must outlive this call. Branches on input->lang: C goes - * through the preprocessor + C parser + codegen; ASM bypasses pp/cg and - * feeds tokens straight to the assembler. */ + * The input bytes must outlive this call. Registered language frontends own + * their compile path; ASM remains the built-in fallback and feeds tokens + * straight to the assembler. */ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, const CfreeBytesInput* input, ObjBuilder* ob) { CfreeCompileFn frontend = NULL; - Lexer* lex; + AsmLexer* lex; MCEmitter* mc; if (input->lang < CFREE_LANG_COUNT) { @@ -66,17 +66,19 @@ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, return; } - lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); - mc = mc_new(c, ob); - if (input->lang == CFREE_LANG_ASM) { + lex = asm_lex_open_mem(c, input->name, (const char*)input->data, input->len); + mc = mc_new(c, ob); /* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */ - parse_asm(c, lex, mc); + asm_parse(c, lex, mc); obj_finalize(ob); mc_free(mc); /* The assembler owns the lexer it was handed; no pp_free release. */ return; } + + compiler_panic(c, no_loc(), "no frontend registered for input language: %u", + (u32)input->lang); } /* Suffix-based language inference. See header. */ @@ -378,7 +380,7 @@ int cfree_pipeline_link_jit(CfreePipeline* p, const CfreeLinkOptions* opts, * plus their detect_* helpers) live in src/api/detect.c — pure byte parsing, * no internal-libcfree dependencies, kept separate so consumers that only * detect inputs (e.g. cfree-roundtrip tests) don't drag this TU's - * lex/pp/parse/cg/etc. dependencies in through the linker. */ + * C frontend/cg/etc. dependencies in through the linker. */ #if 0 /* moved to src/api/detect.c */ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -15,10 +15,10 @@ #include <cfree.h> #include "arch/arch.h" +#include "asm/asm.h" #include "debug/debug.h" #include "link/link.h" #include "obj/obj.h" -#include "parse/parse.h" /* Internal panic stub used when a not-yet-implemented subsystem is invoked * with a Compiler in hand. Public-API stubs that don't have a Compiler @@ -30,7 +30,7 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) { /* C preprocessing is owned by the C frontend. */ -/* parse_asm lives in src/parse/parse_asm.c. */ +/* asm_parse lives in src/asm/asm.c. */ /* mc_new / mc_free live in src/arch/mc.c. * cgtarget_new / cgtarget_finalize / cgtarget_free live in src/arch/<target>.c diff --git a/src/arch/aa64_asm.c b/src/arch/aa64_asm.c @@ -23,9 +23,9 @@ #include "core/arena.h" #include "core/pool.h" #include "core/strbuf.h" -#include "lex/lex.h" +#include "asm/asm_lex.h" #include "obj/obj.h" -#include "parse/parse_asm_helpers.h" +#include "asm/asm_helpers.h" /* ---- public handle ---- */ @@ -73,7 +73,7 @@ void aa64_inline_bind(AA64Asm* a, /* ---- helpers ---- */ -static int tok_punct(Tok t, u32 p) { return asm_driver_tok_is_punct(t, p); } +static int tok_punct(AsmTok t, u32 p) { return asm_driver_tok_is_punct(t, p); } static int icase_eq(const char* a, size_t an, const char* b) { size_t i; @@ -203,19 +203,19 @@ static int parse_fp_d_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) { } static AA64Reg parse_reg(AsmDriver* d) { - Tok t = asm_driver_next(d); + AsmTok t = asm_driver_next(d); AA64Reg r; memset(&r, 0, sizeof r); - if (t.kind != TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) asm_driver_panic(d, "asm: expected register"); return r; } static AA64Reg parse_ldstp_reg(AsmDriver* d) { - Tok t = asm_driver_next(d); + AsmTok t = asm_driver_next(d); AA64Reg r; memset(&r, 0, sizeof r); - if (t.kind != TOK_IDENT || + if (t.kind != ASM_TOK_IDENT || (!parse_reg_from_ident(d, t.v.ident, &r) && !parse_fp_d_reg_from_ident(d, t.v.ident, &r))) { asm_driver_panic(d, "asm: expected register"); @@ -281,9 +281,9 @@ static int parse_cond_from_ident(AsmDriver* d, Sym ident, u32* out) { } static u32 parse_cond(AsmDriver* d, const char* what) { - Tok t = asm_driver_next(d); + AsmTok t = asm_driver_next(d); u32 cond = 0; - if (t.kind != TOK_IDENT || !parse_cond_from_ident(d, t.v.ident, &cond)) + if (t.kind != ASM_TOK_IDENT || !parse_cond_from_ident(d, t.v.ident, &cond)) asm_driver_panic(d, "asm: %s: expected condition code", what); return cond; } @@ -333,8 +333,8 @@ static void p_nop(AsmDriver* d) { * clrex [#imm4] ; option defaults to sy (15) when omitted */ static u32 parse_barrier_option(AsmDriver* d, int allow_dmb_ld_st) { if (asm_driver_at_eol(d)) return AA64_BARRIER_OPT_SY; - Tok t = asm_driver_peek(d); - if (t.kind == TOK_IDENT) { + AsmTok t = asm_driver_peek(d); + if (t.kind == ASM_TOK_IDENT) { (void)asm_driver_next(d); size_t n = 0; const char* s = pool_str(asm_driver_pool(d), t.v.ident, &n); @@ -386,8 +386,8 @@ static void p_clrex(AsmDriver* d) { static void p_mov(AsmDriver* d) { AA64Reg rd = parse_reg(d); expect_comma(d, "mov"); - Tok t = asm_driver_peek(d); - if (t.kind == TOK_IDENT) { + AsmTok t = asm_driver_peek(d); + if (t.kind == ASM_TOK_IDENT) { AA64Reg src; memset(&src, 0, sizeof src); if (parse_reg_from_ident(d, t.v.ident, &src)) { @@ -455,8 +455,8 @@ static void p_movwide(AsmDriver* d, u32 opc) { u32 hw = 0; if (asm_driver_eat_comma(d)) { /* lsl #N (N is 0/16/32/48). */ - Tok lid = asm_driver_next(d); - if (lid.kind != TOK_IDENT) + AsmTok lid = asm_driver_next(d); + if (lid.kind != ASM_TOK_IDENT) asm_driver_panic(d, "asm: expected 'lsl'"); size_t ln = 0; const char* lp = pool_str(asm_driver_pool(d), lid.v.ident, &ln); @@ -493,8 +493,8 @@ static void p_except(AsmDriver* d, u32 form) { /* Read optional `, lsl|lsr|asr|ror #imm` shift modifier. Returns 1 if * present. */ static int parse_shift_mod(AsmDriver* d, u32* shift_out, u32* imm6_out) { - Tok t = asm_driver_peek(d); - if (t.kind != TOK_IDENT) return 0; + AsmTok t = asm_driver_peek(d); + if (t.kind != ASM_TOK_IDENT) return 0; size_t n = 0; const char* p = pool_str(asm_driver_pool(d), t.v.ident, &n); u32 sh; @@ -523,8 +523,8 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) { expect_comma(d, "add/sub"); AA64Reg rn = parse_reg(d); expect_comma(d, "add/sub"); - Tok t = asm_driver_peek(d); - if (tok_punct(t, '#') || t.kind == TOK_NUM || tok_punct(t, '-') || + AsmTok t = asm_driver_peek(d); + if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') || tok_punct(t, '+')) { /* immediate form */ if (rd.is64 != rn.is64) @@ -538,8 +538,8 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) { i64 imm = parse_imm_const(d); u32 sh = 0; if (asm_driver_eat_comma(d)) { - Tok lid = asm_driver_next(d); - if (lid.kind != TOK_IDENT) + AsmTok lid = asm_driver_next(d); + if (lid.kind != ASM_TOK_IDENT) asm_driver_panic(d, "asm: expected 'lsl #12'"); size_t ln = 0; const char* lp = pool_str(asm_driver_pool(d), lid.v.ident, &ln); @@ -581,17 +581,17 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) { static void p_cmp(AsmDriver* d, int is_neg /* cmn flips op */) { AA64Reg rn = parse_reg(d); expect_comma(d, "cmp"); - Tok t = asm_driver_peek(d); - if (tok_punct(t, '#') || t.kind == TOK_NUM || tok_punct(t, '-') || + AsmTok t = asm_driver_peek(d); + if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') || tok_punct(t, '+')) { require_sp_spelling(d, rn, "cmp imm"); i64 imm = parse_imm_const(d); u32 sh = 0; if (asm_driver_eat_comma(d)) { - Tok lid = asm_driver_next(d); + AsmTok lid = asm_driver_next(d); size_t ln = 0; const char* lp = - (lid.kind == TOK_IDENT) + (lid.kind == ASM_TOK_IDENT) ? pool_str(asm_driver_pool(d), lid.v.ident, &ln) : NULL; if (!lp || !icase_eq(lp, ln, "lsl")) @@ -1173,36 +1173,36 @@ static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text, } if (i == len) return; - Lexer* lx = lex_open_mem(a->c, "<inline-asm>", text, len); + AsmLexer* lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len); AsmDriver* d = asm_driver_open_inline(a->c, mc, lx); /* The first non-trivial token must be the mnemonic identifier (or a * `.directive`, but inline asm doesn't normally use directives — leave * that path unsupported until needed). */ - Tok t = asm_driver_peek(d); - while (t.kind == TOK_NEWLINE || t.kind == TOK_PP_HASH) { + AsmTok t = asm_driver_peek(d); + while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) { (void)asm_driver_next(d); - if (t.kind == TOK_PP_HASH) { + if (t.kind == ASM_TOK_HASH) { /* Skip cpp linemarker rest of line. */ while (!asm_driver_at_eol(d)) (void)asm_driver_next(d); } t = asm_driver_peek(d); } - if (t.kind == TOK_EOF) { + if (t.kind == ASM_TOK_EOF) { asm_driver_close_inline(d); - lex_close(lx); + asm_lex_close(lx); return; } - if (t.kind != TOK_IDENT) + if (t.kind != ASM_TOK_IDENT) inline_panic(a, "expected mnemonic at start of inline asm line"); (void)asm_driver_next(d); Sym mn = t.v.ident; /* Compose `b.eq` etc. — same trick as the standalone driver. */ - Tok dot = asm_driver_peek(d); + AsmTok dot = asm_driver_peek(d); if (asm_driver_tok_is_punct(dot, '.')) { (void)asm_driver_next(d); - Tok rest = asm_driver_next(d); - if (rest.kind != TOK_IDENT) + AsmTok rest = asm_driver_next(d); + if (rest.kind != ASM_TOK_IDENT) inline_panic(a, "composite mnemonic: expected ident after '.'"); size_t hn = 0, rn = 0; const char* hp = pool_str(asm_driver_pool(d), mn, &hn); @@ -1217,7 +1217,7 @@ static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text, } aa64_asm_insn(a, d, mn); asm_driver_close_inline(d); - lex_close(lx); + asm_lex_close(lx); } /* Substitute placeholders into one line's StrBuf, then dispatch. diff --git a/src/arch/aa64_asm.h b/src/arch/aa64_asm.h @@ -13,7 +13,7 @@ * resolution and label management live on the driver side. */ #include "core/core.h" -#include "lex/lex.h" +#include "asm/asm_lex.h" typedef struct AsmDriver AsmDriver; @@ -27,8 +27,8 @@ void aa64_asm_close(AA64Asm*); /* Parse one mnemonic line. `mnemonic` is the first identifier on the * line (or "b.cond" composite). The driver has already consumed the * mnemonic identifier and any trailing dot-suffix. This function - * consumes operands up to (but not including) the next TOK_NEWLINE or - * TOK_EOF, and writes the encoded instruction(s) through the driver's + * consumes operands up to (but not including) the next ASM_TOK_NEWLINE or + * ASM_TOK_EOF, and writes the encoded instruction(s) through the driver's * MCEmitter. Diagnostics on parse failure go through compiler_panic. */ void aa64_asm_insn(AA64Asm*, AsmDriver*, Sym mnemonic); diff --git a/src/arch/mc.c b/src/arch/mc.c @@ -1,6 +1,6 @@ /* Generic MCEmitter implementation. * - * MCEmitter sits between CGTarget (or parse_asm) and ObjBuilder. It owns + * MCEmitter sits between CGTarget (or asm_parse) and ObjBuilder. It owns * the current section, byte position, machine label table, and forwards * relocations / source-location stamps. Encoding is the caller's job — * MCEmitter writes whatever bytes it's handed. diff --git a/src/asm/asm.c b/src/asm/asm.c @@ -0,0 +1,1032 @@ +/* GNU-as compatible assembler driver — arch-agnostic. + * + * Reads tokens from an AsmLexer, dispatches directives, manages labels and + * section state, and forwards mnemonic lines to the per-arch instruction + * parser. Output goes through MCEmitter against an ObjBuilder. + * + * AsmLexer quirks worked around here: + * - `#` is both the immediate marker in asm and the token used for + * preprocessed-assembler line markers. + * `#` at BOL is a cpp linemarker → skip to next newline; elsewhere + * the per-arch parser treats it as the immediate prefix. + * - composite mnemonics (`b.eq`, `b.ne`, ...) arrive as IDENT '.' IDENT + * and are reassembled before dispatch. + * - `.text` etc. arrive as PUNCT('.') + IDENT and are stitched here. + * + * Symbol bookkeeping: a Sym→ObjSymId map records the symbols introduced + * by labels, `.globl`, and operand references so a forward reference + * (`b foo` before `foo:`) shares one symbol with its later definition. + * A second Sym→AsmEqu map carries `.set`/`.equ` constants. */ + +#include "asm/asm.h" + +#include <stdarg.h> +#include <string.h> + +#include "arch/aa64_asm.h" +#include "arch/arch.h" +#include "asm/asm_helpers.h" +#include "asm/asm_lex.h" +#include "core/arena.h" +#include "core/hashmap.h" +#include "core/heap.h" +#include "core/pool.h" +#include "obj/obj.h" + +HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32); +HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32); + +typedef struct AsmEqu { + i64 value; + ObjSymId sym; /* nonzero when value is `sym + offset` */ + u8 has_sym; + u8 pad[3]; +} AsmEqu; +HASHMAP_DEFINE(SymEquMap, Sym, AsmEqu, hash_u32); + +struct AsmDriver { + Compiler* c; + AsmLexer* lex; + MCEmitter* mc; + ObjBuilder* ob; + Pool* pool; + Heap* heap; + + AsmTok cur; + int has_cur; + + /* OBJ_SEC_NONE until first emit / explicit `.text` etc. */ + ObjSecId cur_sec; + + SymSecMap sec_map; + SymSymMap sym_map; + SymEquMap equ_map; + + Sym n_text, n_data, n_rodata, n_bss; + + /* Per-arch handle. Phase-3 ships aa64 only; phase-5 adds dispatch. */ + AA64Asm* aa64; +}; + +/* ---- token plumbing ---- */ + +static AsmTok d_peek(AsmDriver* d) { + if (!d->has_cur) { + d->cur = asm_lex_next(d->lex); + d->has_cur = 1; + } + return d->cur; +} + +static AsmTok d_next(AsmDriver* d) { + AsmTok t = d_peek(d); + d->has_cur = 0; + return t; +} + +static int d_is_eol(AsmDriver* d) { + AsmTok t = d_peek(d); + return t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_EOF; +} + +static void d_skip_to_eol(AsmDriver* d) { + while (!d_is_eol(d)) (void)d_next(d); +} + +static void d_eat_eol(AsmDriver* d) { + AsmTok t = d_peek(d); + if (t.kind == ASM_TOK_NEWLINE) (void)d_next(d); +} + +static SrcLoc d_loc(AsmDriver* d) { + if (d->has_cur) return d->cur.loc; + return asm_lex_loc(d->lex); +} + +_Noreturn static void d_panicf(AsmDriver* d, const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + compiler_panicv(d->c, d_loc(d), fmt, ap); + /* unreachable; va_end omitted because compiler_panicv is _Noreturn */ +} + +/* ---- spelling helpers ---- */ + +static const char* asm_str(AsmDriver* d, Sym s, size_t* nout) { + return pool_str(d->pool, s, nout); +} + +static int sym_eq(AsmDriver* d, Sym s, const char* lit) { + size_t n = 0; + const char* p = asm_str(d, s, &n); + size_t i; + if (!p) return 0; + for (i = 0; i < n; ++i) { + if (!lit[i] || p[i] != lit[i]) return 0; + } + return lit[n] == '\0'; +} + +static int starts_with(AsmDriver* d, Sym s, const char* prefix) { + size_t n = 0; + const char* p = asm_str(d, s, &n); + size_t i; + if (!p) return 0; + for (i = 0; prefix[i]; ++i) { + if (i >= n || p[i] != prefix[i]) return 0; + } + return 1; +} + +/* ---- section management ---- */ + +static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind, u16 flags, + u32 align) { + ObjSecId* hit = SymSecMap_get(&d->sec_map, name); + if (hit) return *hit; + ObjSecId id = obj_section(d->ob, name, kind, flags, align); + SymSecMap_set(&d->sec_map, name, id); + return id; +} + +static void set_section(AsmDriver* d, Sym name, SecKind kind, u16 flags, + u32 align) { + ObjSecId id = ensure_section(d, name, kind, flags, align); + d->cur_sec = id; + d->mc->set_section(d->mc, id); +} + +/* ---- symbol management ---- */ + +static ObjSymId intern_sym(AsmDriver* d, Sym name) { + ObjSymId* hit = SymSymMap_get(&d->sym_map, name); + if (hit) return *hit; + ObjSymId id = obj_symbol_find(d->ob, name); + if (id == OBJ_SYM_NONE) { + id = obj_symbol_ex(d->ob, name, SB_LOCAL, SV_DEFAULT, SK_NOTYPE, + OBJ_SEC_NONE, 0, 0, 0); + } + SymSymMap_set(&d->sym_map, name, id); + return id; +} + +static ObjSym* sym_mut(AsmDriver* d, ObjSymId id) { + /* obj.h gives us a const view via obj_symbol_get; the underlying + * record lives in the builder's arena and is safe to mutate + * pre-finalize. Wrapping the cast keeps the const-stripping in + * one place. */ + return (ObjSym*)obj_symbol_get(d->ob, id); +} + +/* ---- expression evaluator (constants + sym ± const) ---- */ + +typedef struct AsmExpr { + ObjSymId sym; + i64 value; +} AsmExpr; + +static AsmExpr expr_c(i64 v) { + AsmExpr e = {OBJ_SYM_NONE, v}; + return e; +} +static AsmExpr expr_s(ObjSymId s, i64 v) { + AsmExpr e = {s, v}; + return e; +} + +static int tok_is_punct(AsmTok t, u32 p) { + return t.kind == ASM_TOK_PUNCT && t.v.punct == p; +} + +static i64 lit_to_i64(AsmDriver* d, Sym spelling) { + size_t n = 0; + const char* p = asm_str(d, spelling, &n); + u64 v = 0; + int base = 10; + size_t i = 0; + if (!p || !n) return 0; + if (n >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + base = 16; + i = 2; + } else if (n >= 2 && p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) { + base = 2; + i = 2; + } else if (n >= 1 && p[0] == '0') { + base = 8; + i = 1; + } + for (; i < n; ++i) { + char c = p[i]; + u32 dv; + if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; + if (c >= '0' && c <= '9') + dv = (u32)(c - '0'); + else if (c >= 'a' && c <= 'f') + dv = 10 + (u32)(c - 'a'); + else if (c >= 'A' && c <= 'F') + dv = 10 + (u32)(c - 'A'); + else + d_panicf(d, "asm: bad digit in integer literal"); + if (dv >= (u32)base) d_panicf(d, "asm: digit out of base"); + v = v * (u64)base + dv; + } + return (i64)v; +} + +static AsmExpr parse_expr(AsmDriver*); +static AsmExpr parse_unary(AsmDriver*); + +static AsmExpr parse_primary(AsmDriver* d) { + AsmTok t = d_peek(d); + if (t.kind == ASM_TOK_NUM) { + (void)d_next(d); + return expr_c(lit_to_i64(d, t.spelling)); + } + if (t.kind == ASM_TOK_IDENT) { + (void)d_next(d); + AsmEqu* eq = SymEquMap_get(&d->equ_map, t.v.ident); + if (eq) { + if (eq->has_sym) return expr_s(eq->sym, eq->value); + return expr_c(eq->value); + } + return expr_s(intern_sym(d, t.v.ident), 0); + } + if (tok_is_punct(t, '(')) { + (void)d_next(d); + AsmExpr e = parse_expr(d); + AsmTok cl = d_peek(d); + if (!tok_is_punct(cl, ')')) d_panicf(d, "asm: expected ')'"); + (void)d_next(d); + return e; + } + d_panicf(d, "asm: expected expression"); +} + +static AsmExpr parse_unary(AsmDriver* d) { + AsmTok t = d_peek(d); + if (tok_is_punct(t, '-')) { + (void)d_next(d); + AsmExpr e = parse_unary(d); + if (e.sym) d_panicf(d, "asm: unary '-' on symbol"); + return expr_c(-e.value); + } + if (tok_is_punct(t, '+')) { + (void)d_next(d); + return parse_unary(d); + } + if (tok_is_punct(t, '~')) { + (void)d_next(d); + AsmExpr e = parse_unary(d); + if (e.sym) d_panicf(d, "asm: unary '~' on symbol"); + return expr_c(~e.value); + } + return parse_primary(d); +} + +static AsmExpr parse_mul(AsmDriver* d) { + AsmExpr a = parse_unary(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '*') && !tok_is_punct(t, '/') && !tok_is_punct(t, '%')) + return a; + u32 op = t.v.punct; + (void)d_next(d); + AsmExpr b = parse_unary(d); + if (a.sym || b.sym) d_panicf(d, "asm: '*/%%' on symbolic operand"); + if (op == '*') + a.value *= b.value; + else if (op == '/') { + if (!b.value) d_panicf(d, "asm: division by zero"); + a.value /= b.value; + } else { + if (!b.value) d_panicf(d, "asm: modulo by zero"); + a.value %= b.value; + } + } +} + +static AsmExpr parse_add(AsmDriver* d) { + AsmExpr a = parse_mul(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '+') && !tok_is_punct(t, '-')) return a; + u32 op = t.v.punct; + (void)d_next(d); + AsmExpr b = parse_mul(d); + if (op == '+') { + if (a.sym && b.sym) d_panicf(d, "asm: cannot add two symbols"); + if (b.sym) { + a.sym = b.sym; + a.value += b.value; + } else + a.value += b.value; + } else { + if (b.sym) d_panicf(d, "asm: cannot subtract symbol from constant"); + a.value -= b.value; + } + } +} + +static AsmExpr parse_shift(AsmDriver* d) { + AsmExpr a = parse_add(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, ASM_P_SHL) && !tok_is_punct(t, ASM_P_SHR)) return a; + u32 op = t.v.punct; + (void)d_next(d); + AsmExpr b = parse_add(d); + if (a.sym || b.sym) d_panicf(d, "asm: shift on symbolic operand"); + if (op == ASM_P_SHL) + a.value = (i64)((u64)a.value << (b.value & 63)); + else + a.value = a.value >> (b.value & 63); + } +} + +static AsmExpr parse_band(AsmDriver* d) { + AsmExpr a = parse_shift(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '&')) return a; + (void)d_next(d); + AsmExpr b = parse_shift(d); + if (a.sym || b.sym) d_panicf(d, "asm: '&' on symbolic operand"); + a.value &= b.value; + } +} + +static AsmExpr parse_bxor(AsmDriver* d) { + AsmExpr a = parse_band(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '^')) return a; + (void)d_next(d); + AsmExpr b = parse_band(d); + if (a.sym || b.sym) d_panicf(d, "asm: '^' on symbolic operand"); + a.value ^= b.value; + } +} + +static AsmExpr parse_bor(AsmDriver* d) { + AsmExpr a = parse_bxor(d); + for (;;) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '|')) return a; + (void)d_next(d); + AsmExpr b = parse_bxor(d); + if (a.sym || b.sym) d_panicf(d, "asm: '|' on symbolic operand"); + a.value |= b.value; + } +} + +static AsmExpr parse_expr(AsmDriver* d) { return parse_bor(d); } + +/* ---- public helpers exposed to per-arch parser ---- */ + +AsmTok asm_driver_peek(AsmDriver* d) { return d_peek(d); } +AsmTok asm_driver_next(AsmDriver* d) { return d_next(d); } +int asm_driver_at_eol(AsmDriver* d) { return d_is_eol(d); } +SrcLoc asm_driver_loc(AsmDriver* d) { return d_loc(d); } +MCEmitter* asm_driver_mc(AsmDriver* d) { return d->mc; } +ObjBuilder* asm_driver_ob(AsmDriver* d) { return d->ob; } +Compiler* asm_driver_compiler(AsmDriver* d) { return d->c; } +Pool* asm_driver_pool(AsmDriver* d) { return d->pool; } + +_Noreturn void asm_driver_panic(AsmDriver* d, const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + compiler_panicv(d->c, d_loc(d), fmt, ap); +} + +ObjSymId asm_driver_intern_sym(AsmDriver* d, Sym name) { + return intern_sym(d, name); +} + +ObjSecId asm_driver_cur_section(AsmDriver* d) { + if (d->cur_sec == OBJ_SEC_NONE) { + if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text"); + d->cur_sec = + ensure_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4); + d->mc->set_section(d->mc, d->cur_sec); + } + return d->cur_sec; +} + +int asm_driver_eat_comma(AsmDriver* d) { + AsmTok t = d_peek(d); + if (tok_is_punct(t, ',')) { + (void)d_next(d); + return 1; + } + return 0; +} + +int asm_driver_eat_punct(AsmDriver* d, u32 p) { + AsmTok t = d_peek(d); + if (tok_is_punct(t, p)) { + (void)d_next(d); + return 1; + } + /* `#` arrives as ASM_TOK_HASH from the C lexer; accept it as the + * immediate-prefix punctuator here. */ + if (p == '#' && t.kind == ASM_TOK_HASH) { + (void)d_next(d); + return 1; + } + return 0; +} + +void asm_driver_expect_punct(AsmDriver* d, u32 p, const char* what) { + if (!asm_driver_eat_punct(d, p)) + d_panicf(d, "asm: expected '%s' (%s)", "punct", what); +} + +i64 asm_driver_parse_const(AsmDriver* d) { + AsmExpr e = parse_expr(d); + if (e.sym) d_panicf(d, "asm: constant expression expected"); + return e.value; +} + +void asm_driver_parse_sym_expr(AsmDriver* d, ObjSymId* sym_out, i64* off_out) { + AsmExpr e = parse_expr(d); + *sym_out = e.sym; + *off_out = e.value; +} + +int asm_driver_tok_is_punct(AsmTok t, u32 p) { + if (tok_is_punct(t, p)) return 1; + /* `#` arrives as ASM_TOK_HASH from the C lexer. */ + if (p == '#' && t.kind == ASM_TOK_HASH) return 1; + return 0; +} + +/* ---- string-literal decoding ---- */ + +static void decode_string(AsmDriver* d, Sym spelling, u8** out, u32* nout) { + size_t n = 0; + const char* p = asm_str(d, spelling, &n); + /* Skip any encoding prefix (L/u/u8/U). */ + while (n && (*p == 'L' || *p == 'u' || *p == 'U' || *p == '8')) { + ++p; + --n; + } + if (n < 2 || p[0] != '"' || p[n - 1] != '"') + d_panicf(d, "asm: malformed string literal"); + size_t cap = n; + u8* buf = (u8*)d->heap->alloc(d->heap, cap ? cap : 1, 1); + u32 k = 0; + for (size_t i = 1; i + 1 < n; ++i) { + char c = p[i]; + if (c != '\\') { + buf[k++] = (u8)c; + continue; + } + ++i; + if (i + 1 >= n) break; + char e = p[i]; + switch (e) { + case 'n': + buf[k++] = '\n'; + break; + case 't': + buf[k++] = '\t'; + break; + case 'r': + buf[k++] = '\r'; + break; + case '\\': + buf[k++] = '\\'; + break; + case '"': + buf[k++] = '"'; + break; + case '\'': + buf[k++] = '\''; + break; + case '0': + buf[k++] = 0; + break; + case 'b': + buf[k++] = 8; + break; + case 'f': + buf[k++] = 12; + break; + case 'v': + buf[k++] = 11; + break; + case 'a': + buf[k++] = 7; + break; + case 'x': { + u32 v = 0; + int dn = 0; + while (i + 2 < n) { + char h = p[i + 1]; + int dv; + if (h >= '0' && h <= '9') + dv = h - '0'; + else if (h >= 'a' && h <= 'f') + dv = 10 + (h - 'a'); + else if (h >= 'A' && h <= 'F') + dv = 10 + (h - 'A'); + else + break; + v = v * 16 + (u32)dv; + ++i; + if (++dn >= 2) break; + } + buf[k++] = (u8)v; + break; + } + default: + if (e >= '0' && e <= '7') { + u32 v = (u32)(e - '0'); + int dn = 1; + while (dn < 3 && i + 2 < n) { + char h = p[i + 1]; + if (h < '0' || h > '7') break; + v = v * 8 + (u32)(h - '0'); + ++i; + ++dn; + } + buf[k++] = (u8)v; + } else { + buf[k++] = (u8)e; + } + break; + } + } + *out = buf; + *nout = k; +} + +/* ---- directives ---- */ + +static Sym expect_ident(AsmDriver* d, const char* what) { + AsmTok t = d_peek(d); + if (t.kind != ASM_TOK_IDENT) + d_panicf(d, "asm: %s: expected identifier", what); + (void)d_next(d); + return t.v.ident; +} + +static void emit_le(AsmDriver* d, u64 v, u32 width) { + u8 buf[8]; + for (u32 i = 0; i < width; ++i) buf[i] = (u8)(v >> (8 * i)); + (void)asm_driver_cur_section(d); + d->mc->emit_bytes(d->mc, buf, width); +} + +static void emit_int_directive(AsmDriver* d, u32 width) { + for (;;) { + AsmExpr e = parse_expr(d); + if (e.sym) { + RelocKind k; + if (width == 4) + k = R_ABS32; + else if (width == 8) + k = R_ABS64; + else + d_panicf(d, "asm: symbolic .byte/.hword not supported"); + (void)asm_driver_cur_section(d); + u32 ofs = d->mc->pos(d->mc); + u8 zero[8] = {0}; + d->mc->emit_bytes(d->mc, zero, width); + d->mc->emit_reloc_at(d->mc, d->cur_sec, ofs, k, e.sym, e.value, 1, 0); + } else { + emit_le(d, (u64)e.value, width); + } + if (!asm_driver_eat_comma(d)) break; + } +} + +static void do_directive(AsmDriver* d, Sym name) { + if (sym_eq(d, name, "text")) { + if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text"); + set_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "data")) { + if (!d->n_data) d->n_data = pool_intern_cstr(d->pool, ".data"); + set_section(d, d->n_data, SEC_DATA, (u16)(SF_ALLOC | SF_WRITE), 8); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "rodata")) { + if (!d->n_rodata) d->n_rodata = pool_intern_cstr(d->pool, ".rodata"); + set_section(d, d->n_rodata, SEC_RODATA, (u16)SF_ALLOC, 8); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "bss")) { + if (!d->n_bss) d->n_bss = pool_intern_cstr(d->pool, ".bss"); + set_section(d, d->n_bss, SEC_BSS, (u16)(SF_ALLOC | SF_WRITE), 8); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "section")) { + Sym sname = 0; + AsmTok t = d_peek(d); + if (t.kind == ASM_TOK_IDENT) { + sname = t.v.ident; + (void)d_next(d); + } else if (t.kind == ASM_TOK_STR) { + size_t n = 0; + const char* p = asm_str(d, t.spelling, &n); + if (n >= 2 && p[0] == '"') sname = pool_intern(d->pool, p + 1, n - 2); + (void)d_next(d); + } else if (tok_is_punct(t, '.')) { + (void)d_next(d); + AsmTok id = d_next(d); + if (id.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .section: bad name"); + size_t ni = 0; + const char* nm = asm_str(d, id.v.ident, &ni); + char buf[128]; + if (ni + 1 >= sizeof buf) d_panicf(d, "asm: .section: name too long"); + buf[0] = '.'; + for (size_t i = 0; i < ni; ++i) buf[i + 1] = nm[i]; + sname = pool_intern(d->pool, buf, ni + 1); + } else { + d_panicf(d, "asm: .section: expected name"); + } + SecKind kind = SEC_OTHER; + u16 flags = 0; + { + size_t nn = 0; + const char* p = asm_str(d, sname, &nn); + if (p) { + if (nn >= 5 && memcmp(p, ".text", 5) == 0) { + kind = SEC_TEXT; + flags = (u16)(SF_ALLOC | SF_EXEC); + } else if (nn >= 7 && memcmp(p, ".rodata", 7) == 0) { + kind = SEC_RODATA; + flags = (u16)SF_ALLOC; + } else if (nn >= 5 && memcmp(p, ".data", 5) == 0) { + kind = SEC_DATA; + flags = (u16)(SF_ALLOC | SF_WRITE); + } else if (nn >= 4 && memcmp(p, ".bss", 4) == 0) { + kind = SEC_BSS; + flags = (u16)(SF_ALLOC | SF_WRITE); + } + } + } + /* Skip optional remainder: flags string, type tag, etc. */ + d_skip_to_eol(d); + set_section(d, sname, kind, flags, 1); + return; + } + if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) { + Sym n = expect_ident(d, ".globl"); + sym_mut(d, intern_sym(d, n))->bind = (u16)SB_GLOBAL; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "local")) { + Sym n = expect_ident(d, ".local"); + sym_mut(d, intern_sym(d, n))->bind = (u16)SB_LOCAL; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "weak")) { + Sym n = expect_ident(d, ".weak"); + sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "hidden")) { + Sym n = expect_ident(d, ".hidden"); + sym_mut(d, intern_sym(d, n))->vis = (u8)SV_HIDDEN; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "protected")) { + Sym n = expect_ident(d, ".protected"); + sym_mut(d, intern_sym(d, n))->vis = (u8)SV_PROTECTED; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "internal")) { + Sym n = expect_ident(d, ".internal"); + sym_mut(d, intern_sym(d, n))->vis = (u8)SV_INTERNAL; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "type")) { + Sym n = expect_ident(d, ".type"); + ObjSymId id = intern_sym(d, n); + if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .type: expected ','"); + AsmTok t = d_next(d); + Sym tag = 0; + if (tok_is_punct(t, '@') || tok_is_punct(t, '%')) { + AsmTok ti = d_next(d); + if (ti.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .type: tag"); + tag = ti.v.ident; + } else if (t.kind == ASM_TOK_IDENT) { + tag = t.v.ident; + } else if (t.kind == ASM_TOK_STR) { + size_t sn = 0; + const char* sp = asm_str(d, t.spelling, &sn); + if (sn >= 2 && sp[0] == '"' && sp[sn - 1] == '"') + tag = pool_intern(d->pool, sp + 1, sn - 2); + } else { + d_panicf(d, "asm: .type: tag"); + } + if (tag && sym_eq(d, tag, "function")) + sym_mut(d, id)->kind = (u16)SK_FUNC; + else if (tag && sym_eq(d, tag, "object")) + sym_mut(d, id)->kind = (u16)SK_OBJ; + else if (tag && sym_eq(d, tag, "tls_object")) + sym_mut(d, id)->kind = (u16)SK_TLS; + else if (tag && sym_eq(d, tag, "gnu_indirect_function")) + sym_mut(d, id)->kind = (u16)SK_IFUNC; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "size")) { + Sym n = expect_ident(d, ".size"); + ObjSymId id = intern_sym(d, n); + if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .size: expected ','"); + /* Recognize `. - NAME`. */ + AsmTok t = d_peek(d); + i64 sz = 0; + if (tok_is_punct(t, '.')) { + (void)d_next(d); + if (tok_is_punct(d_peek(d), '-')) { + (void)d_next(d); + AsmTok rid = d_peek(d); + if (rid.kind == ASM_TOK_IDENT && rid.v.ident == n) { + (void)d_next(d); + const ObjSym* os = obj_symbol_get(d->ob, id); + if (os && os->section_id == d->cur_sec) + sz = (i64)d->mc->pos(d->mc) - (i64)os->value; + } + } + } else { + AsmExpr e = parse_expr(d); + if (!e.sym) sz = e.value; + } + if (sz < 0) sz = 0; + sym_mut(d, id)->size = (u64)sz; + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "byte")) { + emit_int_directive(d, 1); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "hword") || sym_eq(d, name, "short") || + sym_eq(d, name, "2byte")) { + emit_int_directive(d, 2); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "word") || sym_eq(d, name, "long") || + sym_eq(d, name, "int") || sym_eq(d, name, "4byte")) { + emit_int_directive(d, 4); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "quad") || sym_eq(d, name, "8byte") || + sym_eq(d, name, "dword") || sym_eq(d, name, "xword")) { + emit_int_directive(d, 8); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "ascii") || sym_eq(d, name, "asciz") || + sym_eq(d, name, "string")) { + int term = !sym_eq(d, name, "ascii"); + for (;;) { + AsmTok t = d_peek(d); + if (t.kind != ASM_TOK_STR) + d_panicf(d, "asm: .ascii/.string: expected string"); + (void)d_next(d); + u8* buf = NULL; + u32 n = 0; + decode_string(d, t.spelling, &buf, &n); + (void)asm_driver_cur_section(d); + d->mc->emit_bytes(d->mc, buf, n); + if (term) emit_le(d, 0, 1); + d->heap->free(d->heap, buf, n); + if (!asm_driver_eat_comma(d)) break; + } + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "zero") || sym_eq(d, name, "skip") || + sym_eq(d, name, "space")) { + i64 n = asm_driver_parse_const(d); + i64 fill = 0; + if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); + if (n > 0) { + (void)asm_driver_cur_section(d); + d->mc->emit_fill(d->mc, (size_t)n, (u8)fill); + } + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "fill")) { + i64 n = asm_driver_parse_const(d); + i64 size = 1, val = 0; + if (asm_driver_eat_comma(d)) size = asm_driver_parse_const(d); + if (asm_driver_eat_comma(d)) val = asm_driver_parse_const(d); + if (size < 1 || size > 8) d_panicf(d, "asm: .fill: size out of range"); + (void)asm_driver_cur_section(d); + for (i64 i = 0; i < n; ++i) emit_le(d, (u64)val, (u32)size); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "align") || sym_eq(d, name, "balign")) { + i64 a = asm_driver_parse_const(d); + i64 fill = 0; + if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); + if (a <= 0 || (a & (a - 1))) d_panicf(d, "asm: .align: not a power of 2"); + (void)asm_driver_cur_section(d); + d->mc->emit_align(d->mc, (u32)a, (u8)fill); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "p2align")) { + i64 lg = asm_driver_parse_const(d); + i64 fill = 0; + if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); + if (lg < 0 || lg > 16) d_panicf(d, "asm: .p2align: out of range"); + (void)asm_driver_cur_section(d); + d->mc->emit_align(d->mc, 1u << (u32)lg, (u8)fill); + d_skip_to_eol(d); + return; + } + if (sym_eq(d, name, "set") || sym_eq(d, name, "equ")) { + Sym n = expect_ident(d, ".set"); + if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .set: expected ','"); + AsmExpr e = parse_expr(d); + AsmEqu eq; + eq.value = e.value; + eq.sym = e.sym; + eq.has_sym = e.sym ? 1 : 0; + eq.pad[0] = eq.pad[1] = eq.pad[2] = 0; + SymEquMap_set(&d->equ_map, n, eq); + d_skip_to_eol(d); + return; + } + + /* CFI block + accepted-but-ignored directives. Keep parser + * forward-progress without aborting the whole TU. */ + if (starts_with(d, name, "cfi_") || sym_eq(d, name, "file") || + sym_eq(d, name, "loc") || sym_eq(d, name, "ident") || + sym_eq(d, name, "popsection") || sym_eq(d, name, "pushsection") || + sym_eq(d, name, "previous") || + sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "comm") || + sym_eq(d, name, "lcomm") || sym_eq(d, name, "uleb128") || + sym_eq(d, name, "sleb128") || sym_eq(d, name, "macro") || + sym_eq(d, name, "endm") || sym_eq(d, name, "if") || + sym_eq(d, name, "endif") || sym_eq(d, name, "else") || + sym_eq(d, name, "include")) { + d_skip_to_eol(d); + return; + } + + /* Unknown directive — recover. */ + d_skip_to_eol(d); +} + +/* ---- driver loop ---- */ + +static void process_label(AsmDriver* d, Sym name) { + ObjSymId id = intern_sym(d, name); + (void)asm_driver_cur_section(d); + const ObjSym* os = obj_symbol_get(d->ob, id); + if (os && os->section_id != OBJ_SEC_NONE) + d_panicf(d, "asm: symbol defined twice"); + obj_symbol_define(d->ob, id, d->cur_sec, (u64)d->mc->pos(d->mc), 0); + /* Promote SK_UNDEF (forward ref via reloc) to SK_NOTYPE so it's a + * real defined symbol; explicit `.type SYM, @function` will refine. */ + if (os && os->kind == SK_UNDEF) sym_mut(d, id)->kind = (u16)SK_NOTYPE; +} + +static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) { + AsmTok t = d_peek(d); + if (!tok_is_punct(t, '.')) return head; + if (t.flags & ASM_TF_HAS_SPACE) return head; + (void)d_next(d); + AsmTok rest = d_next(d); + if (rest.kind != ASM_TOK_IDENT) + d_panicf(d, "asm: composite mnemonic: expected ident"); + size_t hn = 0, rn = 0; + const char* hp = asm_str(d, head, &hn); + const char* rp = asm_str(d, rest.v.ident, &rn); + size_t n = hn + 1 + rn; + if (n >= 64) d_panicf(d, "asm: mnemonic too long"); + char buf[64]; + for (size_t i = 0; i < hn; ++i) buf[i] = hp[i]; + buf[hn] = '.'; + for (size_t i = 0; i < rn; ++i) buf[hn + 1 + i] = rp[i]; + return pool_intern(d->pool, buf, n); +} + +/* ---- inline-asm driver constructor ---- + * + * Inline-asm template walkers (per-arch) re-lex pre-substituted source + * text through the same per-mnemonic parsers used by the standalone .s + * driver. This constructor builds a minimally-initialized AsmDriver + * around a caller-supplied memory-backed AsmLexer + MCEmitter. + * + * The driver does not own the AsmLexer or MCEmitter, does not allocate a + * default section (inline asm emits into whatever section the wrapping + * cg has selected on its MCEmitter), and skips the standalone driver's + * per-arch handle (`d->aa64`) — the caller has already opened its own + * AA64Asm to thread per-block bound state through. */ +AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, AsmLexer* lex) { + Heap* heap = (Heap*)c->env->heap; + AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver)); + memset(d, 0, sizeof *d); + d->c = c; + d->lex = lex; + d->mc = mc; + d->ob = mc->obj; + d->pool = c->global; + d->heap = heap; + /* The MCEmitter's section is whatever cg has set; do not override it. + * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use + * mc->section_id directly via asm_driver_cur_section's lazy init for + * standalone, but inline asm should never reach that path because the + * MCEmitter already has its section. Pre-seed cur_sec from the + * MCEmitter so emit_reloc_at calls get the right section id. */ + d->cur_sec = mc->section_id; + SymSecMap_init(&d->sec_map, heap); + SymSymMap_init(&d->sym_map, heap); + SymEquMap_init(&d->equ_map, heap); + d->aa64 = NULL; /* caller owns its own AA64Asm */ + return d; +} + +void asm_driver_close_inline(AsmDriver* d) { + if (!d) return; + SymSecMap_fini(&d->sec_map); + SymSymMap_fini(&d->sym_map); + SymEquMap_fini(&d->equ_map); + Heap* heap = d->heap; + heap->free(heap, d, sizeof *d); +} + +void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) { + AsmDriver d; + memset(&d, 0, sizeof d); + d.c = c; + d.lex = l; + d.mc = mc; + d.ob = mc->obj; + d.pool = c->global; + d.heap = (Heap*)c->env->heap; + d.cur_sec = OBJ_SEC_NONE; + SymSecMap_init(&d.sec_map, d.heap); + SymSymMap_init(&d.sym_map, d.heap); + SymEquMap_init(&d.equ_map, d.heap); + d.aa64 = aa64_asm_open(c); + + for (;;) { + AsmTok t = d_peek(&d); + if (t.kind == ASM_TOK_EOF) break; + if (t.kind == ASM_TOK_NEWLINE) { + (void)d_next(&d); + continue; + } + if (t.kind == ASM_TOK_HASH) { + /* cpp-style linemarker; skip the whole line. */ + d_skip_to_eol(&d); + continue; + } + if (tok_is_punct(t, '.')) { + (void)d_next(&d); + AsmTok id = d_next(&d); + if (id.kind != ASM_TOK_IDENT) + d_panicf(&d, "asm: expected directive name after '.'"); + do_directive(&d, id.v.ident); + d_eat_eol(&d); + continue; + } + if (t.kind == ASM_TOK_IDENT) { + Sym head = t.v.ident; + (void)d_next(&d); + AsmTok nxt = d_peek(&d); + if (tok_is_punct(nxt, ':')) { + (void)d_next(&d); + process_label(&d, head); + continue; + } + Sym mnemonic = maybe_compose_mnemonic(&d, head); + aa64_asm_insn(d.aa64, &d, mnemonic); + d_skip_to_eol(&d); + continue; + } + /* Anything else: recover by skipping the line. */ + d_skip_to_eol(&d); + } + + aa64_asm_close(d.aa64); + SymSecMap_fini(&d.sec_map); + SymSymMap_fini(&d.sym_map); + SymEquMap_fini(&d.equ_map); +} diff --git a/src/asm/asm.h b/src/asm/asm.h @@ -0,0 +1,11 @@ +#ifndef CFREE_ASM_H +#define CFREE_ASM_H + +#include "arch/arch.h" +#include "asm/asm_lex.h" + +/* Standalone assembler. Reads tokens directly from an AsmLexer; emits via + * MCEmitter. */ +void asm_parse(Compiler*, AsmLexer*, MCEmitter*); + +#endif diff --git a/src/asm/asm_helpers.h b/src/asm/asm_helpers.h @@ -0,0 +1,63 @@ +#ifndef CFREE_ASM_HELPERS_H +#define CFREE_ASM_HELPERS_H + +/* Lightweight asm-driver surface consumed by per-arch instruction + * parsers. The driver itself is opaque to per-arch code; these helpers + * are the only seam. Implementations live in src/asm/asm.c. */ + +#include "arch/arch.h" +#include "asm/asm_lex.h" +#include "core/core.h" +#include "obj/obj.h" + +typedef struct AsmDriver AsmDriver; + +/* ---- token plumbing ---- */ +AsmTok asm_driver_peek(AsmDriver*); +AsmTok asm_driver_next(AsmDriver*); +int asm_driver_at_eol(AsmDriver*); +int asm_driver_tok_is_punct(AsmTok t, u32 p); +int asm_driver_eat_comma(AsmDriver*); +int asm_driver_eat_punct(AsmDriver*, u32 punct); +void asm_driver_expect_punct(AsmDriver*, u32 punct, const char* what); + +/* Source position for diagnostics. */ +SrcLoc asm_driver_loc(AsmDriver*); + +/* Owning subsystems. */ +MCEmitter* asm_driver_mc(AsmDriver*); +ObjBuilder* asm_driver_ob(AsmDriver*); +Compiler* asm_driver_compiler(AsmDriver*); +Pool* asm_driver_pool(AsmDriver*); +ObjSecId asm_driver_cur_section(AsmDriver*); + +/* Diagnostics: emits then longjmps via Compiler.panic. No return. */ +_Noreturn void asm_driver_panic(AsmDriver*, const char* fmt, ...); + +/* ---- symbol + expression parsing ---- */ +ObjSymId asm_driver_intern_sym(AsmDriver*, Sym name); + +/* Parse a constant integer expression. Panics if the expression + * references a symbol. */ +i64 asm_driver_parse_const(AsmDriver*); + +/* Parse a `sym ± const` expression. Both outputs valid: pure constants + * leave *sym_out == OBJ_SYM_NONE. */ +void asm_driver_parse_sym_expr(AsmDriver*, ObjSymId* sym_out, i64* off_out); + +/* ---- inline-asm constructor ---- + * + * Build an AsmDriver around a memory-backed AsmLexer + caller-supplied + * MCEmitter. Used by inline-asm template walkers (one driver per asm + * line) to reuse the existing per-arch instruction parsers verbatim + * over a substituted source buffer. + * + * The driver is heap-allocated through c->env->heap and must be released + * with asm_driver_close_inline. It does not own the AsmLexer or the + * MCEmitter — the caller retains ownership of both. The driver does + * not initialize a default section; inline asm always emits into the + * MCEmitter's currently-active section. */ +AsmDriver* asm_driver_open_inline(Compiler*, MCEmitter*, AsmLexer*); +void asm_driver_close_inline(AsmDriver*); + +#endif diff --git a/src/asm/asm_lex.c b/src/asm/asm_lex.c @@ -0,0 +1,705 @@ +/* Assembler lexer. Streams tokens out of a borrowed source buffer. + * + * It intentionally keeps C-like number/string spelling rules because .S + * sources arrive after C preprocessing and GNU as accepts those spellings + * in directives and expressions. It does not own macro expansion or C + * keyword classification. + * + * Comments are consumed as whitespace; physical newlines surface as + * ASM_TOK_NEWLINE so the asm driver can keep line-oriented directive and + * instruction parsing. */ + +#include "asm/asm_lex.h" + +#include <string.h> + +#include "core/heap.h" +#include "core/pool.h" + +struct AsmLexer { + Compiler* c; + Pool* pool; + Heap* heap; + const char* src; + size_t len; + size_t pos; + u32 file_id; + u32 line; + u32 col; + u8 at_bol; + u8 had_space; +}; + +/* §5.1.1.2 translation phase 2: splice physical lines joined by + * backslash-newline. Advance past any splice sequence at l->pos so the + * cursor never rests on the leading backslash of a splice. */ +static void skip_splices(AsmLexer* l) { + while (l->pos + 1 < l->len && l->src[l->pos] == '\\' && + l->src[l->pos + 1] == '\n') { + l->pos += 2; + l->line++; + l->col = 1; + } +} + +/* Logical peek: returns the off-th post-splice byte starting at l->pos, + * or -1 at end of input. Does not mutate l->pos. */ +static int peek(const AsmLexer* l, size_t off) { + size_t pos = l->pos; + size_t k = 0; + while (pos < l->len) { + if (pos + 1 < l->len && l->src[pos] == '\\' && l->src[pos + 1] == '\n') { + pos += 2; + continue; + } + if (k == off) return (unsigned char)l->src[pos]; + ++pos; + ++k; + } + return -1; +} + +static int bump(AsmLexer* l) { + int ch; + skip_splices(l); + if (l->pos >= l->len) return -1; + ch = (unsigned char)l->src[l->pos++]; + if (ch == '\n') { + l->line++; + l->col = 1; + } else { + l->col++; + } + return ch; +} + +static int is_digit(int c) { return c >= '0' && c <= '9'; } +static int is_hex_digit(int c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); +} +/* Identifier-start byte (§6.4.2.1). Letters and underscore are ASCII; bytes + * ≥ 0x80 are accepted as the implementation-defined "other characters" + * permitted in identifiers — in practice UTF-8 lead/continuation bytes for + * extended source characters. UCNs are matched separately via ucn_len since + * they span multiple source bytes. */ +static int is_alpha(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || + c >= 0x80; +} +static int is_alnum(int c) { return is_alpha(c) || is_digit(c); } + +/* Match a UCN at offset `off` from the current position. Returns the total + * length (6 for \uXXXX, 10 for \UXXXXXXXX), or 0 if no UCN matches. The + * range constraints from §6.4.3 (no UCN < 00A0 except $/@/`, and none in + * D800–DFFF) are not enforced here — the lexical form is matched and any + * downstream phase that cares can diagnose. */ +static int ucn_len(const AsmLexer* l, size_t off) { + int n, i; + if (peek(l, off) != '\\') return 0; + if (peek(l, off + 1) == 'u') + n = 4; + else if (peek(l, off + 1) == 'U') + n = 8; + else + return 0; + for (i = 0; i < n; ++i) { + if (!is_hex_digit(peek(l, off + 2 + i))) return 0; + } + return 2 + n; +} + +static SrcLoc asm_lex_here(const AsmLexer* l) { + SrcLoc loc; + loc.file_id = l->file_id; + loc.line = l->line; + loc.col = l->col; + return loc; +} + +AsmLexer* asm_lex_open_mem(Compiler* c, const char* name, const char* src, + size_t len) { + Heap* h = (Heap*)c->env->heap; + AsmLexer* l = (AsmLexer*)h->alloc(h, sizeof(*l), _Alignof(AsmLexer)); + if (!l) return NULL; + memset(l, 0, sizeof(*l)); + l->c = c; + l->pool = c->global; + l->heap = h; + l->src = src ? src : ""; + l->len = src ? len : 0; + l->pos = 0; + l->file_id = source_add_memory(c->sources, name); + l->line = 1; + l->col = 1; + l->at_bol = 1; + l->had_space = 0; + return l; +} + +void asm_lex_close(AsmLexer* l) { + if (!l) return; + l->heap->free(l->heap, l, sizeof(*l)); +} + +SrcLoc asm_lex_loc(const AsmLexer* l) { return asm_lex_here(l); } +u32 asm_lex_file_id(const AsmLexer* l) { return l->file_id; } +const AsmLitInfo* asm_lex_lit(const AsmLexer* l, AsmLitId id) { + (void)l; + (void)id; + return NULL; +} + +/* Intern bytes [start, end) with line splices (\<newline>) removed, so token + * spellings reflect post-phase-2 logical text. */ +static Sym intern_spliced(AsmLexer* l, size_t start, size_t end) { + size_t i; + int has_splice = 0; + char* buf; + size_t k; + Sym sym; + + for (i = start; i + 1 < end; ++i) { + if (l->src[i] == '\\' && l->src[i + 1] == '\n') { + has_splice = 1; + break; + } + } + if (!has_splice) return pool_intern(l->pool, l->src + start, end - start); + + buf = (char*)l->heap->alloc(l->heap, end - start, 1); + k = 0; + for (i = start; i < end;) { + if (i + 1 < end && l->src[i] == '\\' && l->src[i + 1] == '\n') { + i += 2; + continue; + } + buf[k++] = l->src[i++]; + } + sym = pool_intern(l->pool, buf, k); + l->heap->free(l->heap, buf, end - start); + return sym; +} + +/* Skip whitespace and comments. Returns 1 if a newline boundary was crossed + * via comment consumption (caller still emits the explicit newline token on + * an in-source '\n'). */ +static void skip_ws_and_comments(AsmLexer* l) { + for (;;) { + int ch = peek(l, 0); + if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\v' || ch == '\f') { + bump(l); + l->had_space = 1; + continue; + } + if (ch == '/' && peek(l, 1) == '/') { + bump(l); + bump(l); + while (peek(l, 0) >= 0 && peek(l, 0) != '\n') bump(l); + l->had_space = 1; + continue; + } + if (ch == '/' && peek(l, 1) == '*') { + bump(l); + bump(l); + while (peek(l, 0) >= 0) { + if (peek(l, 0) == '*' && peek(l, 1) == '/') { + bump(l); + bump(l); + break; + } + bump(l); + } + l->had_space = 1; + continue; + } + break; + } +} + +/* Consume a pp-number per §6.4.8. The cursor is positioned at the leading + * digit (or `.` followed by a digit) on entry. */ +static void scan_pp_number(AsmLexer* l) { + if (peek(l, 0) == '.') bump(l); + bump(l); /* first digit */ + while (l->pos < l->len) { + int c = peek(l, 0); + int n = peek(l, 1); + if ((c == 'e' || c == 'E' || c == 'p' || c == 'P') && + (n == '+' || n == '-')) { + bump(l); + bump(l); + } else if (is_alnum(c) || c == '.') { + bump(l); + } else { + break; + } + } +} + +/* 1 if the pp-number text is a floating constant (§6.4.4.2): contains a + * radix `.`, a hex `p`/`P` exponent, or a decimal `e`/`E` exponent. */ +static int pp_number_is_float(const char* s, size_t n) { + int is_hex = 0; + size_t i = 0; + if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + is_hex = 1; + i = 2; + } + for (; i < n; ++i) { + char c = s[i]; + if (c == '.') return 1; + if (is_hex && (c == 'p' || c == 'P')) return 1; + if (!is_hex && (c == 'e' || c == 'E')) { + if (i + 1 < n) { + char nx = s[i + 1]; + if (nx == '+' || nx == '-' || (nx >= '0' && nx <= '9')) return 1; + } + } + } + return 0; +} + +/* Consume a quoted body — string ('"') or character ('\''). The cursor is + * positioned at the opening quote on entry. Returns 1 on an unterminated or + * newline-broken literal, 0 on a clean close. */ +static int scan_quoted(AsmLexer* l, int quote) { + bump(l); /* opening quote */ + for (;;) { + int ch = peek(l, 0); + if (ch < 0) return 1; + if (ch == quote) { + bump(l); + return 0; + } + if (ch == '\n') return 1; + if (ch == '\\') { + bump(l); /* backslash */ + if (peek(l, 0) < 0) return 1; + bump(l); /* the escaped char */ + continue; + } + bump(l); + } +} + +AsmTok asm_lex_next(AsmLexer* l) { + AsmTok t; + SrcLoc tloc; + size_t start; + int ch; + + memset(&t, 0, sizeof(t)); + + /* Skip whitespace and comments. A newline token is emitted before any + * subsequent content tokens for the line that follows. */ + for (;;) { + skip_ws_and_comments(l); + skip_splices(l); + if (l->pos >= l->len) { + t.kind = ASM_TOK_EOF; + t.loc = asm_lex_here(l); + return t; + } + if (peek(l, 0) == '\n') { + tloc = asm_lex_here(l); + bump(l); + t.kind = ASM_TOK_NEWLINE; + t.loc = tloc; + l->at_bol = 1; + l->had_space = 0; + return t; + } + break; + } + + tloc = asm_lex_here(l); + start = l->pos; + ch = peek(l, 0); + + if (l->at_bol) t.flags |= ASM_TF_AT_BOL; + if (l->had_space) t.flags |= ASM_TF_HAS_SPACE; + l->at_bol = 0; + l->had_space = 0; + t.loc = tloc; + + /* String / character literal, with optional encoding prefix. The prefix + * length and encoding flag are decoded together so the spelling we + * intern includes the prefix bytes. */ + { + int sp_len = -1; + int is_char = 0; + u32 encf = 0; + + if (ch == '"') { + sp_len = 0; + is_char = 0; + } else if (ch == '\'') { + sp_len = 0; + is_char = 1; + } else if (ch == 'L' && peek(l, 1) == '"') { + sp_len = 1; + is_char = 0; + encf = ASM_TF_STR_WIDE; + } else if (ch == 'L' && peek(l, 1) == '\'') { + sp_len = 1; + is_char = 1; + encf = ASM_TF_STR_WIDE; + } else if (ch == 'u' && peek(l, 1) == '8' && peek(l, 2) == '"') { + sp_len = 2; + is_char = 0; + encf = ASM_TF_STR_U8; + } else if (ch == 'u' && peek(l, 1) == '"') { + sp_len = 1; + is_char = 0; + encf = ASM_TF_STR_U16; + } else if (ch == 'u' && peek(l, 1) == '\'') { + sp_len = 1; + is_char = 1; + encf = ASM_TF_STR_U16; + } else if (ch == 'U' && peek(l, 1) == '"') { + sp_len = 1; + is_char = 0; + encf = ASM_TF_STR_U32; + } else if (ch == 'U' && peek(l, 1) == '\'') { + sp_len = 1; + is_char = 1; + encf = ASM_TF_STR_U32; + } + + if (sp_len >= 0) { + int i; + for (i = 0; i < sp_len; ++i) bump(l); + if (scan_quoted(l, is_char ? '\'' : '"')) t.flags |= ASM_TF_LITERAL_BAD; + t.kind = (u16)(is_char ? ASM_TOK_CHR : ASM_TOK_STR); + t.flags |= encf; + t.spelling = intern_spliced(l, start, l->pos); + t.v.str = t.spelling; + return t; + } + } + + /* Identifier (§6.4.2). Encoding-prefix candidates above are matched + * before this since L/u/U followed by a quote is a literal, not an + * identifier. The grammar's identifier-nondigit covers letters, _, + * extended source chars (impl-defined; bytes ≥ 0x80 here), and UCNs + * (§6.4.3) — the latter span multiple source bytes so they're matched + * via ucn_len rather than the per-byte is_alpha predicate. */ + { + int u = ucn_len(l, 0); + if (is_alpha(ch) || u) { + if (u) { + int i; + for (i = 0; i < u; ++i) bump(l); + } else + bump(l); + for (;;) { + int c = peek(l, 0); + if (is_alnum(c)) { + bump(l); + } else if ((u = ucn_len(l, 0))) { + int i; + for (i = 0; i < u; ++i) bump(l); + } else { + break; + } + } + t.kind = ASM_TOK_IDENT; + t.spelling = intern_spliced(l, start, l->pos); + t.v.ident = t.spelling; + return t; + } + } + + /* Preprocessor-number shaped token, classified to ASM_TOK_NUM / + * ASM_TOK_FLT for expression diagnostics and future directive support. */ + if (is_digit(ch) || (ch == '.' && is_digit(peek(l, 1)))) { + size_t plen; + char* pbuf; + size_t i, k; + scan_pp_number(l); + /* Classify on the post-splice text (the spelling we'll intern). */ + plen = l->pos - start; + pbuf = (char*)l->heap->alloc(l->heap, plen ? plen : 1, 1); + k = 0; + for (i = start; i < l->pos;) { + if (i + 1 < l->pos && l->src[i] == '\\' && l->src[i + 1] == '\n') { + i += 2; + continue; + } + pbuf[k++] = l->src[i++]; + } + t.kind = (u16)(pp_number_is_float(pbuf, k) ? ASM_TOK_FLT : ASM_TOK_NUM); + /* Preserve common C-style integer/float suffixes in token flags. The + * current assembler expression evaluator ignores them, but keeping the + * spelling metadata makes the lexer useful for future directive work. */ + if (t.kind == ASM_TOK_FLT) { + size_t j = k; + while (j > 0) { + char c = pbuf[j - 1]; + if (c == 'f' || c == 'F') { + t.flags |= ASM_TF_FLT_F; + --j; + continue; + } + if (c == 'l' || c == 'L') { + t.flags |= ASM_TF_FLT_L; + --j; + continue; + } + break; + } + } else { + size_t j = k; + while (j > 0) { + char c = pbuf[j - 1]; + if (c == 'u' || c == 'U') { + t.flags |= ASM_TF_INT_U; + --j; + continue; + } + if (c == 'l' || c == 'L') { + if (j >= 2 && (pbuf[j - 2] == 'l' || pbuf[j - 2] == 'L')) { + t.flags |= ASM_TF_INT_LL; + j -= 2; + } else { + t.flags |= ASM_TF_INT_L; + --j; + } + continue; + } + break; + } + } + t.spelling = pool_intern(l->pool, pbuf, k); + l->heap->free(l->heap, pbuf, plen ? plen : 1); + return t; + } + + /* Punctuator, longest match. `#` is a distinct token because it is both + * an asm immediate marker and, at BOL in preprocessed assembler, a line + * marker introducer. */ + { + int n0 = peek(l, 0); + int n1 = peek(l, 1); + int n2 = peek(l, 2); + int n3 = peek(l, 3); + int adv = 1; + u32 punct = ASM_P_NONE; + u16 kind = ASM_TOK_PUNCT; + int i; + + switch (n0) { + case '#': + if (n1 == '#') { + adv = 2; + kind = ASM_TOK_HASH_HASH; + punct = ASM_P_HASH_HASH; + } else { + adv = 1; + kind = ASM_TOK_HASH; + punct = '#'; + } + break; + case '.': + if (n1 == '.' && n2 == '.') { + adv = 3; + punct = ASM_P_ELLIPSIS; + } else { + adv = 1; + punct = '.'; + } + break; + case '-': + if (n1 == '>') { + adv = 2; + punct = ASM_P_ARROW; + } else if (n1 == '-') { + adv = 2; + punct = ASM_P_DEC; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_SUB_ASSIGN; + } else { + adv = 1; + punct = '-'; + } + break; + case '+': + if (n1 == '+') { + adv = 2; + punct = ASM_P_INC; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_ADD_ASSIGN; + } else { + adv = 1; + punct = '+'; + } + break; + case '<': + if (n1 == '<' && n2 == '=') { + adv = 3; + punct = ASM_P_SHL_ASSIGN; + } else if (n1 == '<') { + adv = 2; + punct = ASM_P_SHL; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_LE; + } else if (n1 == ':') { + adv = 2; + punct = '['; + } /* digraph */ + else if (n1 == '%') { + adv = 2; + punct = '{'; + } /* digraph */ + else { + adv = 1; + punct = '<'; + } + break; + case '>': + if (n1 == '>' && n2 == '=') { + adv = 3; + punct = ASM_P_SHR_ASSIGN; + } else if (n1 == '>') { + adv = 2; + punct = ASM_P_SHR; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_GE; + } else { + adv = 1; + punct = '>'; + } + break; + case '=': + if (n1 == '=') { + adv = 2; + punct = ASM_P_EQ; + } else { + adv = 1; + punct = '='; + } + break; + case '!': + if (n1 == '=') { + adv = 2; + punct = ASM_P_NE; + } else { + adv = 1; + punct = '!'; + } + break; + case '&': + if (n1 == '&') { + adv = 2; + punct = ASM_P_AND; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_AND_ASSIGN; + } else { + adv = 1; + punct = '&'; + } + break; + case '|': + if (n1 == '|') { + adv = 2; + punct = ASM_P_OR; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_OR_ASSIGN; + } else { + adv = 1; + punct = '|'; + } + break; + case '^': + if (n1 == '=') { + adv = 2; + punct = ASM_P_XOR_ASSIGN; + } else { + adv = 1; + punct = '^'; + } + break; + case '*': + if (n1 == '=') { + adv = 2; + punct = ASM_P_MUL_ASSIGN; + } else { + adv = 1; + punct = '*'; + } + break; + case '/': + if (n1 == '=') { + adv = 2; + punct = ASM_P_DIV_ASSIGN; + } else { + adv = 1; + punct = '/'; + } + break; + case '%': + if (n1 == ':' && n2 == '%' && n3 == ':') { + adv = 4; + kind = ASM_TOK_HASH_HASH; + punct = ASM_P_HASH_HASH; + } else if (n1 == ':') { + adv = 2; + kind = ASM_TOK_HASH; + punct = '#'; + } else if (n1 == '=') { + adv = 2; + punct = ASM_P_MOD_ASSIGN; + } else if (n1 == '>') { + adv = 2; + punct = '}'; + } /* digraph */ + else { + adv = 1; + punct = '%'; + } + break; + case ':': + if (n1 == '>') { + adv = 2; + punct = ']'; + } /* digraph */ + else { + adv = 1; + punct = ':'; + } + break; + case '(': + case ')': + case '{': + case '}': + case '[': + case ']': + case ',': + case ';': + case '?': + case '~': + adv = 1; + punct = (u32)n0; + break; + default: + /* Unknown byte. Surface as a single-char punct so the token + * stream still progresses; PP/parse may diagnose. */ + adv = 1; + punct = (u32)n0; + break; + } + + for (i = 0; i < adv; ++i) bump(l); + t.kind = kind; + t.v.punct = punct; + t.spelling = intern_spliced(l, start, l->pos); + return t; + } +} diff --git a/src/asm/asm_lex.h b/src/asm/asm_lex.h @@ -0,0 +1,111 @@ +#ifndef CFREE_ASM_LEX_H +#define CFREE_ASM_LEX_H + +#include "core/core.h" + +typedef enum AsmTokKind { + ASM_TOK_EOF = 0, + ASM_TOK_IDENT, + ASM_TOK_NUM, + ASM_TOK_FLT, + ASM_TOK_STR, + ASM_TOK_CHR, + ASM_TOK_PUNCT, + ASM_TOK_HASH, + ASM_TOK_HASH_HASH, + ASM_TOK_NEWLINE, +} AsmTokKind; + +typedef enum AsmTokFlag { + ASM_TF_AT_BOL = 1u << 0, + ASM_TF_HAS_SPACE = 1u << 1, + ASM_TF_INT_U = 1u << 3, + ASM_TF_INT_L = 1u << 4, + ASM_TF_INT_LL = 1u << 5, + ASM_TF_FLT_F = 1u << 6, + ASM_TF_FLT_L = 1u << 7, + ASM_TF_STR_WIDE = 1u << 8, + ASM_TF_STR_U8 = 1u << 9, + ASM_TF_STR_U16 = 1u << 10, + ASM_TF_STR_U32 = 1u << 11, + ASM_TF_LITERAL_BAD = 1u << 12, +} AsmTokFlag; + +typedef enum AsmPunct { + ASM_P_NONE = 0, + ASM_P_ARROW = 256, + ASM_P_INC, + ASM_P_DEC, + ASM_P_SHL, + ASM_P_SHR, + ASM_P_LE, + ASM_P_GE, + ASM_P_EQ, + ASM_P_NE, + ASM_P_AND, + ASM_P_OR, + ASM_P_ADD_ASSIGN, + ASM_P_SUB_ASSIGN, + ASM_P_MUL_ASSIGN, + ASM_P_DIV_ASSIGN, + ASM_P_MOD_ASSIGN, + ASM_P_AND_ASSIGN, + ASM_P_OR_ASSIGN, + ASM_P_XOR_ASSIGN, + ASM_P_SHL_ASSIGN, + ASM_P_SHR_ASSIGN, + ASM_P_ELLIPSIS, + ASM_P_HASH_HASH, +} AsmPunct; + +typedef u32 AsmLitId; +#define ASM_LIT_NONE 0u + +typedef enum AsmLitKind { + ASM_LIT_INT, + ASM_LIT_FLOAT, + ASM_LIT_STRING, + ASM_LIT_CHAR, +} AsmLitKind; + +typedef enum AsmLitEnc { + ASM_LENC_ORDINARY, + ASM_LENC_UTF8, + ASM_LENC_WIDE, + ASM_LENC_UTF16, + ASM_LENC_UTF32, +} AsmLitEnc; + +typedef struct AsmLitInfo { + u8 kind; + u8 enc; + u16 flags; + Sym spelling; + BytesId bytes; +} AsmLitInfo; + +typedef struct AsmTok { + u16 kind; + u16 flags; + SrcLoc loc; + Sym spelling; + AsmLitId lit; + union { + Sym ident; + Sym str; + u32 punct; + } v; +} AsmTok; + +typedef struct AsmLexer AsmLexer; + +AsmLexer* asm_lex_open_mem(Compiler*, const char* name, const char* src, + size_t len); +void asm_lex_close(AsmLexer*); + +AsmTok asm_lex_next(AsmLexer*); +SrcLoc asm_lex_loc(const AsmLexer*); +u32 asm_lex_file_id(const AsmLexer*); +const AsmLitInfo* asm_lex_lit(const AsmLexer*, AsmLitId); + +#endif diff --git a/src/core/source.c b/src/core/source.c @@ -1,5 +1,5 @@ /* SourceManager — file-id authority for diagnostics, dependency output, - * and DWARF. The lex/pp/parse subsystems aren't part of the obj/ELF + * and DWARF. The C frontend subsystems aren't part of the obj/ELF * foundation, so this implementation is minimal: it stores a flat array * of registered files and the include-edge list, and exposes lookups. * It does not yet support macro-expansion pseudo files or diff --git a/src/lex/lex.h b/src/lex/lex.h @@ -1,114 +0,0 @@ -#ifndef CFREE_LEX_H -#define CFREE_LEX_H - -#include "core/core.h" - -typedef enum TokKind { - TOK_EOF = 0, - TOK_IDENT, - TOK_NUM, - TOK_FLT, - TOK_STR, - TOK_CHR, - TOK_PUNCT, - TOK_PP_HASH, - TOK_PP_PASTE, - TOK_HEADER, - TOK_NEWLINE, - TOK_KW_FIRST, - TOK_KW_LAST = 0x1000, -} TokKind; - -typedef enum TokFlag { - TF_AT_BOL = 1u << 0, - TF_HAS_SPACE = 1u << 1, - TF_NO_EXPAND = 1u << 2, - TF_INT_U = 1u << 3, - TF_INT_L = 1u << 4, - TF_INT_LL = 1u << 5, - TF_FLT_F = 1u << 6, - TF_FLT_L = 1u << 7, - TF_STR_WIDE = 1u << 8, - TF_STR_U8 = 1u << 9, - TF_STR_U16 = 1u << 10, - TF_STR_U32 = 1u << 11, - TF_LITERAL_BAD = 1u << 12, -} TokFlag; - -typedef enum Punct { - P_NONE = 0, - P_ARROW = 256, - P_INC, - P_DEC, - P_SHL, - P_SHR, - P_LE, - P_GE, - P_EQ, - P_NE, - P_AND, - P_OR, - P_ADD_ASSIGN, - P_SUB_ASSIGN, - P_MUL_ASSIGN, - P_DIV_ASSIGN, - P_MOD_ASSIGN, - P_AND_ASSIGN, - P_OR_ASSIGN, - P_XOR_ASSIGN, - P_SHL_ASSIGN, - P_SHR_ASSIGN, - P_ELLIPSIS, - P_HASH_HASH, -} Punct; - -typedef u32 LitId; -#define LIT_NONE 0u - -typedef enum LitKind { - LIT_INT, - LIT_FLOAT, - LIT_STRING, - LIT_CHAR, -} LitKind; - -typedef enum LitEnc { - LENC_ORDINARY, - LENC_UTF8, - LENC_WIDE, - LENC_UTF16, - LENC_UTF32, -} LitEnc; - -typedef struct LitInfo { - u8 kind; - u8 enc; - u16 flags; - Sym spelling; - BytesId bytes; -} LitInfo; - -typedef struct Tok { - u16 kind; - u16 flags; - SrcLoc loc; - Sym spelling; - LitId lit; - union { - Sym ident; - Sym str; - u32 punct; - } v; -} Tok; - -typedef struct Lexer Lexer; - -Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); -void lex_close(Lexer*); - -Tok lex_next(Lexer*); -SrcLoc lex_loc(const Lexer*); -u32 lex_file_id(const Lexer*); -const LitInfo* lex_lit(const Lexer*, LitId); - -#endif diff --git a/src/parse/parse.h b/src/parse/parse.h @@ -1,11 +0,0 @@ -#ifndef CFREE_PARSE_H -#define CFREE_PARSE_H - -#include "arch/arch.h" -#include "lex/lex.h" - -/* Standalone assembler. Reads tokens directly from a Lexer; emits via - * MCEmitter. */ -void parse_asm(Compiler*, Lexer*, MCEmitter*); - -#endif diff --git a/src/parse/parse_asm.c b/src/parse/parse_asm.c @@ -1,983 +0,0 @@ -/* GNU-as compatible assembler driver — arch-agnostic. - * - * Reads tokens from a Lexer, dispatches directives, manages labels and - * section state, and forwards mnemonic lines to the per-arch instruction - * parser. Output goes through MCEmitter against an ObjBuilder. - * - * Lexer quirks worked around here: - * - `#` is the immediate marker in asm but TOK_PP_HASH in the C lexer. - * `#` at BOL is a cpp linemarker → skip to next newline; elsewhere - * the per-arch parser treats it as the immediate prefix. - * - composite mnemonics (`b.eq`, `b.ne`, ...) arrive as IDENT '.' IDENT - * and are reassembled before dispatch. - * - `.text` etc. arrive as PUNCT('.') + IDENT and are stitched here. - * - * Symbol bookkeeping: a Sym→ObjSymId map records the symbols introduced - * by labels, `.globl`, and operand references so a forward reference - * (`b foo` before `foo:`) shares one symbol with its later definition. - * A second Sym→AsmEqu map carries `.set`/`.equ` constants. */ - -#include "parse/parse.h" - -#include <stdarg.h> -#include <string.h> - -#include "arch/aa64_asm.h" -#include "arch/arch.h" -#include "core/arena.h" -#include "core/hashmap.h" -#include "core/heap.h" -#include "core/pool.h" -#include "lex/lex.h" -#include "obj/obj.h" -#include "parse/parse_asm_helpers.h" - -HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32); -HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32); - -typedef struct AsmEqu { - i64 value; - ObjSymId sym; /* nonzero when value is `sym + offset` */ - u8 has_sym; - u8 pad[3]; -} AsmEqu; -HASHMAP_DEFINE(SymEquMap, Sym, AsmEqu, hash_u32); - -struct AsmDriver { - Compiler* c; - Lexer* lex; - MCEmitter* mc; - ObjBuilder* ob; - Pool* pool; - Heap* heap; - - Tok cur; - int has_cur; - - /* OBJ_SEC_NONE until first emit / explicit `.text` etc. */ - ObjSecId cur_sec; - - SymSecMap sec_map; - SymSymMap sym_map; - SymEquMap equ_map; - - Sym n_text, n_data, n_rodata, n_bss; - - /* Per-arch handle. Phase-3 ships aa64 only; phase-5 adds dispatch. */ - AA64Asm* aa64; -}; - -/* ---- token plumbing ---- */ - -static Tok d_peek(AsmDriver* d) { - if (!d->has_cur) { - d->cur = lex_next(d->lex); - d->has_cur = 1; - } - return d->cur; -} - -static Tok d_next(AsmDriver* d) { - Tok t = d_peek(d); - d->has_cur = 0; - return t; -} - -static int d_is_eol(AsmDriver* d) { - Tok t = d_peek(d); - return t.kind == TOK_NEWLINE || t.kind == TOK_EOF; -} - -static void d_skip_to_eol(AsmDriver* d) { - while (!d_is_eol(d)) (void)d_next(d); -} - -static void d_eat_eol(AsmDriver* d) { - Tok t = d_peek(d); - if (t.kind == TOK_NEWLINE) (void)d_next(d); -} - -static SrcLoc d_loc(AsmDriver* d) { - if (d->has_cur) return d->cur.loc; - return lex_loc(d->lex); -} - -_Noreturn static void d_panicf(AsmDriver* d, const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - compiler_panicv(d->c, d_loc(d), fmt, ap); - /* unreachable; va_end omitted because compiler_panicv is _Noreturn */ -} - -/* ---- spelling helpers ---- */ - -static const char* asm_str(AsmDriver* d, Sym s, size_t* nout) { - return pool_str(d->pool, s, nout); -} - -static int sym_eq(AsmDriver* d, Sym s, const char* lit) { - size_t n = 0; - const char* p = asm_str(d, s, &n); - size_t i; - if (!p) return 0; - for (i = 0; i < n; ++i) { - if (!lit[i] || p[i] != lit[i]) return 0; - } - return lit[n] == '\0'; -} - -static int starts_with(AsmDriver* d, Sym s, const char* prefix) { - size_t n = 0; - const char* p = asm_str(d, s, &n); - size_t i; - if (!p) return 0; - for (i = 0; prefix[i]; ++i) { - if (i >= n || p[i] != prefix[i]) return 0; - } - return 1; -} - -/* ---- section management ---- */ - -static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind, - u16 flags, u32 align) { - ObjSecId* hit = SymSecMap_get(&d->sec_map, name); - if (hit) return *hit; - ObjSecId id = obj_section(d->ob, name, kind, flags, align); - SymSecMap_set(&d->sec_map, name, id); - return id; -} - -static void set_section(AsmDriver* d, Sym name, SecKind kind, u16 flags, - u32 align) { - ObjSecId id = ensure_section(d, name, kind, flags, align); - d->cur_sec = id; - d->mc->set_section(d->mc, id); -} - -/* ---- symbol management ---- */ - -static ObjSymId intern_sym(AsmDriver* d, Sym name) { - ObjSymId* hit = SymSymMap_get(&d->sym_map, name); - if (hit) return *hit; - ObjSymId id = obj_symbol_find(d->ob, name); - if (id == OBJ_SYM_NONE) { - id = obj_symbol_ex(d->ob, name, SB_LOCAL, SV_DEFAULT, SK_NOTYPE, - OBJ_SEC_NONE, 0, 0, 0); - } - SymSymMap_set(&d->sym_map, name, id); - return id; -} - -static ObjSym* sym_mut(AsmDriver* d, ObjSymId id) { - /* obj.h gives us a const view via obj_symbol_get; the underlying - * record lives in the builder's arena and is safe to mutate - * pre-finalize. Wrapping the cast keeps the const-stripping in - * one place. */ - return (ObjSym*)obj_symbol_get(d->ob, id); -} - -/* ---- expression evaluator (constants + sym ± const) ---- */ - -typedef struct AsmExpr { - ObjSymId sym; - i64 value; -} AsmExpr; - -static AsmExpr expr_c(i64 v) { AsmExpr e = {OBJ_SYM_NONE, v}; return e; } -static AsmExpr expr_s(ObjSymId s, i64 v) { AsmExpr e = {s, v}; return e; } - -static int tok_is_punct(Tok t, u32 p) { - return t.kind == TOK_PUNCT && t.v.punct == p; -} - -static i64 lit_to_i64(AsmDriver* d, Sym spelling) { - size_t n = 0; - const char* p = asm_str(d, spelling, &n); - u64 v = 0; - int base = 10; - size_t i = 0; - if (!p || !n) return 0; - if (n >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - base = 16; i = 2; - } else if (n >= 2 && p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) { - base = 2; i = 2; - } else if (n >= 1 && p[0] == '0') { - base = 8; i = 1; - } - for (; i < n; ++i) { - char c = p[i]; - u32 dv; - if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; - if (c >= '0' && c <= '9') dv = (u32)(c - '0'); - else if (c >= 'a' && c <= 'f') dv = 10 + (u32)(c - 'a'); - else if (c >= 'A' && c <= 'F') dv = 10 + (u32)(c - 'A'); - else d_panicf(d, "asm: bad digit in integer literal"); - if (dv >= (u32)base) d_panicf(d, "asm: digit out of base"); - v = v * (u64)base + dv; - } - return (i64)v; -} - -static AsmExpr parse_expr(AsmDriver*); -static AsmExpr parse_unary(AsmDriver*); - -static AsmExpr parse_primary(AsmDriver* d) { - Tok t = d_peek(d); - if (t.kind == TOK_NUM) { - (void)d_next(d); - return expr_c(lit_to_i64(d, t.spelling)); - } - if (t.kind == TOK_IDENT) { - (void)d_next(d); - AsmEqu* eq = SymEquMap_get(&d->equ_map, t.v.ident); - if (eq) { - if (eq->has_sym) return expr_s(eq->sym, eq->value); - return expr_c(eq->value); - } - return expr_s(intern_sym(d, t.v.ident), 0); - } - if (tok_is_punct(t, '(')) { - (void)d_next(d); - AsmExpr e = parse_expr(d); - Tok cl = d_peek(d); - if (!tok_is_punct(cl, ')')) d_panicf(d, "asm: expected ')'"); - (void)d_next(d); - return e; - } - d_panicf(d, "asm: expected expression"); -} - -static AsmExpr parse_unary(AsmDriver* d) { - Tok t = d_peek(d); - if (tok_is_punct(t, '-')) { - (void)d_next(d); - AsmExpr e = parse_unary(d); - if (e.sym) d_panicf(d, "asm: unary '-' on symbol"); - return expr_c(-e.value); - } - if (tok_is_punct(t, '+')) { - (void)d_next(d); - return parse_unary(d); - } - if (tok_is_punct(t, '~')) { - (void)d_next(d); - AsmExpr e = parse_unary(d); - if (e.sym) d_panicf(d, "asm: unary '~' on symbol"); - return expr_c(~e.value); - } - return parse_primary(d); -} - -static AsmExpr parse_mul(AsmDriver* d) { - AsmExpr a = parse_unary(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '*') && !tok_is_punct(t, '/') && - !tok_is_punct(t, '%')) return a; - u32 op = t.v.punct; - (void)d_next(d); - AsmExpr b = parse_unary(d); - if (a.sym || b.sym) d_panicf(d, "asm: '*/%%' on symbolic operand"); - if (op == '*') a.value *= b.value; - else if (op == '/') { - if (!b.value) d_panicf(d, "asm: division by zero"); - a.value /= b.value; - } else { - if (!b.value) d_panicf(d, "asm: modulo by zero"); - a.value %= b.value; - } - } -} - -static AsmExpr parse_add(AsmDriver* d) { - AsmExpr a = parse_mul(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '+') && !tok_is_punct(t, '-')) return a; - u32 op = t.v.punct; - (void)d_next(d); - AsmExpr b = parse_mul(d); - if (op == '+') { - if (a.sym && b.sym) d_panicf(d, "asm: cannot add two symbols"); - if (b.sym) { a.sym = b.sym; a.value += b.value; } - else a.value += b.value; - } else { - if (b.sym) d_panicf(d, "asm: cannot subtract symbol from constant"); - a.value -= b.value; - } - } -} - -static AsmExpr parse_shift(AsmDriver* d) { - AsmExpr a = parse_add(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, P_SHL) && !tok_is_punct(t, P_SHR)) return a; - u32 op = t.v.punct; - (void)d_next(d); - AsmExpr b = parse_add(d); - if (a.sym || b.sym) d_panicf(d, "asm: shift on symbolic operand"); - if (op == P_SHL) a.value = (i64)((u64)a.value << (b.value & 63)); - else a.value = a.value >> (b.value & 63); - } -} - -static AsmExpr parse_band(AsmDriver* d) { - AsmExpr a = parse_shift(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '&')) return a; - (void)d_next(d); - AsmExpr b = parse_shift(d); - if (a.sym || b.sym) d_panicf(d, "asm: '&' on symbolic operand"); - a.value &= b.value; - } -} - -static AsmExpr parse_bxor(AsmDriver* d) { - AsmExpr a = parse_band(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '^')) return a; - (void)d_next(d); - AsmExpr b = parse_band(d); - if (a.sym || b.sym) d_panicf(d, "asm: '^' on symbolic operand"); - a.value ^= b.value; - } -} - -static AsmExpr parse_bor(AsmDriver* d) { - AsmExpr a = parse_bxor(d); - for (;;) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '|')) return a; - (void)d_next(d); - AsmExpr b = parse_bxor(d); - if (a.sym || b.sym) d_panicf(d, "asm: '|' on symbolic operand"); - a.value |= b.value; - } -} - -static AsmExpr parse_expr(AsmDriver* d) { return parse_bor(d); } - -/* ---- public helpers exposed to per-arch parser ---- */ - -Tok asm_driver_peek(AsmDriver* d) { return d_peek(d); } -Tok asm_driver_next(AsmDriver* d) { return d_next(d); } -int asm_driver_at_eol(AsmDriver* d) { return d_is_eol(d); } -SrcLoc asm_driver_loc(AsmDriver* d) { return d_loc(d); } -MCEmitter* asm_driver_mc(AsmDriver* d) { return d->mc; } -ObjBuilder* asm_driver_ob(AsmDriver* d) { return d->ob; } -Compiler* asm_driver_compiler(AsmDriver* d) { return d->c; } -Pool* asm_driver_pool(AsmDriver* d) { return d->pool; } - -_Noreturn void asm_driver_panic(AsmDriver* d, const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - compiler_panicv(d->c, d_loc(d), fmt, ap); -} - -ObjSymId asm_driver_intern_sym(AsmDriver* d, Sym name) { - return intern_sym(d, name); -} - -ObjSecId asm_driver_cur_section(AsmDriver* d) { - if (d->cur_sec == OBJ_SEC_NONE) { - if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text"); - d->cur_sec = ensure_section(d, d->n_text, SEC_TEXT, - (u16)(SF_ALLOC | SF_EXEC), 4); - d->mc->set_section(d->mc, d->cur_sec); - } - return d->cur_sec; -} - -int asm_driver_eat_comma(AsmDriver* d) { - Tok t = d_peek(d); - if (tok_is_punct(t, ',')) { - (void)d_next(d); - return 1; - } - return 0; -} - -int asm_driver_eat_punct(AsmDriver* d, u32 p) { - Tok t = d_peek(d); - if (tok_is_punct(t, p)) { - (void)d_next(d); - return 1; - } - /* `#` arrives as TOK_PP_HASH from the C lexer; accept it as the - * immediate-prefix punctuator here. */ - if (p == '#' && t.kind == TOK_PP_HASH) { - (void)d_next(d); - return 1; - } - return 0; -} - -void asm_driver_expect_punct(AsmDriver* d, u32 p, const char* what) { - if (!asm_driver_eat_punct(d, p)) - d_panicf(d, "asm: expected '%s' (%s)", "punct", what); -} - -i64 asm_driver_parse_const(AsmDriver* d) { - AsmExpr e = parse_expr(d); - if (e.sym) d_panicf(d, "asm: constant expression expected"); - return e.value; -} - -void asm_driver_parse_sym_expr(AsmDriver* d, ObjSymId* sym_out, - i64* off_out) { - AsmExpr e = parse_expr(d); - *sym_out = e.sym; - *off_out = e.value; -} - -int asm_driver_tok_is_punct(Tok t, u32 p) { - if (tok_is_punct(t, p)) return 1; - /* `#` arrives as TOK_PP_HASH from the C lexer. */ - if (p == '#' && t.kind == TOK_PP_HASH) return 1; - return 0; -} - -/* ---- string-literal decoding ---- */ - -static void decode_string(AsmDriver* d, Sym spelling, u8** out, u32* nout) { - size_t n = 0; - const char* p = asm_str(d, spelling, &n); - /* Skip any encoding prefix (L/u/u8/U). */ - while (n && (*p == 'L' || *p == 'u' || *p == 'U' || *p == '8')) { - ++p; - --n; - } - if (n < 2 || p[0] != '"' || p[n - 1] != '"') - d_panicf(d, "asm: malformed string literal"); - size_t cap = n; - u8* buf = (u8*)d->heap->alloc(d->heap, cap ? cap : 1, 1); - u32 k = 0; - for (size_t i = 1; i + 1 < n; ++i) { - char c = p[i]; - if (c != '\\') { - buf[k++] = (u8)c; - continue; - } - ++i; - if (i + 1 >= n) break; - char e = p[i]; - switch (e) { - case 'n': buf[k++] = '\n'; break; - case 't': buf[k++] = '\t'; break; - case 'r': buf[k++] = '\r'; break; - case '\\': buf[k++] = '\\'; break; - case '"': buf[k++] = '"'; break; - case '\'': buf[k++] = '\''; break; - case '0': buf[k++] = 0; break; - case 'b': buf[k++] = 8; break; - case 'f': buf[k++] = 12; break; - case 'v': buf[k++] = 11; break; - case 'a': buf[k++] = 7; break; - case 'x': { - u32 v = 0; - int dn = 0; - while (i + 2 < n) { - char h = p[i + 1]; - int dv; - if (h >= '0' && h <= '9') dv = h - '0'; - else if (h >= 'a' && h <= 'f') dv = 10 + (h - 'a'); - else if (h >= 'A' && h <= 'F') dv = 10 + (h - 'A'); - else break; - v = v * 16 + (u32)dv; - ++i; - if (++dn >= 2) break; - } - buf[k++] = (u8)v; - break; - } - default: - if (e >= '0' && e <= '7') { - u32 v = (u32)(e - '0'); - int dn = 1; - while (dn < 3 && i + 2 < n) { - char h = p[i + 1]; - if (h < '0' || h > '7') break; - v = v * 8 + (u32)(h - '0'); - ++i; - ++dn; - } - buf[k++] = (u8)v; - } else { - buf[k++] = (u8)e; - } - break; - } - } - *out = buf; - *nout = k; -} - -/* ---- directives ---- */ - -static Sym expect_ident(AsmDriver* d, const char* what) { - Tok t = d_peek(d); - if (t.kind != TOK_IDENT) d_panicf(d, "asm: %s: expected identifier", what); - (void)d_next(d); - return t.v.ident; -} - -static void emit_le(AsmDriver* d, u64 v, u32 width) { - u8 buf[8]; - for (u32 i = 0; i < width; ++i) buf[i] = (u8)(v >> (8 * i)); - (void)asm_driver_cur_section(d); - d->mc->emit_bytes(d->mc, buf, width); -} - -static void emit_int_directive(AsmDriver* d, u32 width) { - for (;;) { - AsmExpr e = parse_expr(d); - if (e.sym) { - RelocKind k; - if (width == 4) k = R_ABS32; - else if (width == 8) k = R_ABS64; - else d_panicf(d, "asm: symbolic .byte/.hword not supported"); - (void)asm_driver_cur_section(d); - u32 ofs = d->mc->pos(d->mc); - u8 zero[8] = {0}; - d->mc->emit_bytes(d->mc, zero, width); - d->mc->emit_reloc_at(d->mc, d->cur_sec, ofs, k, e.sym, e.value, 1, 0); - } else { - emit_le(d, (u64)e.value, width); - } - if (!asm_driver_eat_comma(d)) break; - } -} - -static void do_directive(AsmDriver* d, Sym name) { - if (sym_eq(d, name, "text")) { - if (!d->n_text) d->n_text = pool_intern_cstr(d->pool, ".text"); - set_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "data")) { - if (!d->n_data) d->n_data = pool_intern_cstr(d->pool, ".data"); - set_section(d, d->n_data, SEC_DATA, (u16)(SF_ALLOC | SF_WRITE), 8); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "rodata")) { - if (!d->n_rodata) d->n_rodata = pool_intern_cstr(d->pool, ".rodata"); - set_section(d, d->n_rodata, SEC_RODATA, (u16)SF_ALLOC, 8); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "bss")) { - if (!d->n_bss) d->n_bss = pool_intern_cstr(d->pool, ".bss"); - set_section(d, d->n_bss, SEC_BSS, (u16)(SF_ALLOC | SF_WRITE), 8); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "section")) { - Sym sname = 0; - Tok t = d_peek(d); - if (t.kind == TOK_IDENT) { - sname = t.v.ident; - (void)d_next(d); - } else if (t.kind == TOK_STR) { - size_t n = 0; - const char* p = asm_str(d, t.spelling, &n); - if (n >= 2 && p[0] == '"') sname = pool_intern(d->pool, p + 1, n - 2); - (void)d_next(d); - } else if (tok_is_punct(t, '.')) { - (void)d_next(d); - Tok id = d_next(d); - if (id.kind != TOK_IDENT) d_panicf(d, "asm: .section: bad name"); - size_t ni = 0; - const char* nm = asm_str(d, id.v.ident, &ni); - char buf[128]; - if (ni + 1 >= sizeof buf) d_panicf(d, "asm: .section: name too long"); - buf[0] = '.'; - for (size_t i = 0; i < ni; ++i) buf[i + 1] = nm[i]; - sname = pool_intern(d->pool, buf, ni + 1); - } else { - d_panicf(d, "asm: .section: expected name"); - } - SecKind kind = SEC_OTHER; - u16 flags = 0; - { - size_t nn = 0; - const char* p = asm_str(d, sname, &nn); - if (p) { - if (nn >= 5 && memcmp(p, ".text", 5) == 0) { - kind = SEC_TEXT; - flags = (u16)(SF_ALLOC | SF_EXEC); - } else if (nn >= 7 && memcmp(p, ".rodata", 7) == 0) { - kind = SEC_RODATA; - flags = (u16)SF_ALLOC; - } else if (nn >= 5 && memcmp(p, ".data", 5) == 0) { - kind = SEC_DATA; - flags = (u16)(SF_ALLOC | SF_WRITE); - } else if (nn >= 4 && memcmp(p, ".bss", 4) == 0) { - kind = SEC_BSS; - flags = (u16)(SF_ALLOC | SF_WRITE); - } - } - } - /* Skip optional remainder: flags string, type tag, etc. */ - d_skip_to_eol(d); - set_section(d, sname, kind, flags, 1); - return; - } - if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) { - Sym n = expect_ident(d, ".globl"); - sym_mut(d, intern_sym(d, n))->bind = (u16)SB_GLOBAL; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "local")) { - Sym n = expect_ident(d, ".local"); - sym_mut(d, intern_sym(d, n))->bind = (u16)SB_LOCAL; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "weak")) { - Sym n = expect_ident(d, ".weak"); - sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "hidden")) { - Sym n = expect_ident(d, ".hidden"); - sym_mut(d, intern_sym(d, n))->vis = (u8)SV_HIDDEN; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "protected")) { - Sym n = expect_ident(d, ".protected"); - sym_mut(d, intern_sym(d, n))->vis = (u8)SV_PROTECTED; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "internal")) { - Sym n = expect_ident(d, ".internal"); - sym_mut(d, intern_sym(d, n))->vis = (u8)SV_INTERNAL; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "type")) { - Sym n = expect_ident(d, ".type"); - ObjSymId id = intern_sym(d, n); - if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .type: expected ','"); - Tok t = d_next(d); - Sym tag = 0; - if (tok_is_punct(t, '@') || tok_is_punct(t, '%')) { - Tok ti = d_next(d); - if (ti.kind != TOK_IDENT) d_panicf(d, "asm: .type: tag"); - tag = ti.v.ident; - } else if (t.kind == TOK_IDENT) { - tag = t.v.ident; - } else if (t.kind == TOK_STR) { - size_t sn = 0; - const char* sp = asm_str(d, t.spelling, &sn); - if (sn >= 2 && sp[0] == '"' && sp[sn - 1] == '"') - tag = pool_intern(d->pool, sp + 1, sn - 2); - } else { - d_panicf(d, "asm: .type: tag"); - } - if (tag && sym_eq(d, tag, "function")) - sym_mut(d, id)->kind = (u16)SK_FUNC; - else if (tag && sym_eq(d, tag, "object")) - sym_mut(d, id)->kind = (u16)SK_OBJ; - else if (tag && sym_eq(d, tag, "tls_object")) - sym_mut(d, id)->kind = (u16)SK_TLS; - else if (tag && sym_eq(d, tag, "gnu_indirect_function")) - sym_mut(d, id)->kind = (u16)SK_IFUNC; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "size")) { - Sym n = expect_ident(d, ".size"); - ObjSymId id = intern_sym(d, n); - if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .size: expected ','"); - /* Recognize `. - NAME`. */ - Tok t = d_peek(d); - i64 sz = 0; - if (tok_is_punct(t, '.')) { - (void)d_next(d); - if (tok_is_punct(d_peek(d), '-')) { - (void)d_next(d); - Tok rid = d_peek(d); - if (rid.kind == TOK_IDENT && rid.v.ident == n) { - (void)d_next(d); - const ObjSym* os = obj_symbol_get(d->ob, id); - if (os && os->section_id == d->cur_sec) - sz = (i64)d->mc->pos(d->mc) - (i64)os->value; - } - } - } else { - AsmExpr e = parse_expr(d); - if (!e.sym) sz = e.value; - } - if (sz < 0) sz = 0; - sym_mut(d, id)->size = (u64)sz; - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "byte")) { - emit_int_directive(d, 1); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "hword") || sym_eq(d, name, "short") || - sym_eq(d, name, "2byte")) { - emit_int_directive(d, 2); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "word") || sym_eq(d, name, "long") || - sym_eq(d, name, "int") || sym_eq(d, name, "4byte")) { - emit_int_directive(d, 4); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "quad") || sym_eq(d, name, "8byte") || - sym_eq(d, name, "dword") || sym_eq(d, name, "xword")) { - emit_int_directive(d, 8); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "ascii") || sym_eq(d, name, "asciz") || - sym_eq(d, name, "string")) { - int term = !sym_eq(d, name, "ascii"); - for (;;) { - Tok t = d_peek(d); - if (t.kind != TOK_STR) - d_panicf(d, "asm: .ascii/.string: expected string"); - (void)d_next(d); - u8* buf = NULL; - u32 n = 0; - decode_string(d, t.spelling, &buf, &n); - (void)asm_driver_cur_section(d); - d->mc->emit_bytes(d->mc, buf, n); - if (term) emit_le(d, 0, 1); - d->heap->free(d->heap, buf, n); - if (!asm_driver_eat_comma(d)) break; - } - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "zero") || sym_eq(d, name, "skip") || - sym_eq(d, name, "space")) { - i64 n = asm_driver_parse_const(d); - i64 fill = 0; - if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); - if (n > 0) { - (void)asm_driver_cur_section(d); - d->mc->emit_fill(d->mc, (size_t)n, (u8)fill); - } - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "fill")) { - i64 n = asm_driver_parse_const(d); - i64 size = 1, val = 0; - if (asm_driver_eat_comma(d)) size = asm_driver_parse_const(d); - if (asm_driver_eat_comma(d)) val = asm_driver_parse_const(d); - if (size < 1 || size > 8) d_panicf(d, "asm: .fill: size out of range"); - (void)asm_driver_cur_section(d); - for (i64 i = 0; i < n; ++i) emit_le(d, (u64)val, (u32)size); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "align") || sym_eq(d, name, "balign")) { - i64 a = asm_driver_parse_const(d); - i64 fill = 0; - if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); - if (a <= 0 || (a & (a - 1))) d_panicf(d, "asm: .align: not a power of 2"); - (void)asm_driver_cur_section(d); - d->mc->emit_align(d->mc, (u32)a, (u8)fill); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "p2align")) { - i64 lg = asm_driver_parse_const(d); - i64 fill = 0; - if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d); - if (lg < 0 || lg > 16) d_panicf(d, "asm: .p2align: out of range"); - (void)asm_driver_cur_section(d); - d->mc->emit_align(d->mc, 1u << (u32)lg, (u8)fill); - d_skip_to_eol(d); - return; - } - if (sym_eq(d, name, "set") || sym_eq(d, name, "equ")) { - Sym n = expect_ident(d, ".set"); - if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .set: expected ','"); - AsmExpr e = parse_expr(d); - AsmEqu eq; - eq.value = e.value; - eq.sym = e.sym; - eq.has_sym = e.sym ? 1 : 0; - eq.pad[0] = eq.pad[1] = eq.pad[2] = 0; - SymEquMap_set(&d->equ_map, n, eq); - d_skip_to_eol(d); - return; - } - - /* CFI block + accepted-but-ignored directives. Keep parser - * forward-progress without aborting the whole TU. */ - if (starts_with(d, name, "cfi_") || - sym_eq(d, name, "file") || sym_eq(d, name, "loc") || - sym_eq(d, name, "ident") || sym_eq(d, name, "popsection") || - sym_eq(d, name, "pushsection") || sym_eq(d, name, "previous") || - sym_eq(d, name, "subsections_via_symbols") || - sym_eq(d, name, "comm") || sym_eq(d, name, "lcomm") || - sym_eq(d, name, "uleb128") || sym_eq(d, name, "sleb128") || - sym_eq(d, name, "macro") || sym_eq(d, name, "endm") || - sym_eq(d, name, "if") || sym_eq(d, name, "endif") || - sym_eq(d, name, "else") || sym_eq(d, name, "include")) { - d_skip_to_eol(d); - return; - } - - /* Unknown directive — recover. */ - d_skip_to_eol(d); -} - -/* ---- driver loop ---- */ - -static void process_label(AsmDriver* d, Sym name) { - ObjSymId id = intern_sym(d, name); - (void)asm_driver_cur_section(d); - const ObjSym* os = obj_symbol_get(d->ob, id); - if (os && os->section_id != OBJ_SEC_NONE) - d_panicf(d, "asm: symbol defined twice"); - obj_symbol_define(d->ob, id, d->cur_sec, (u64)d->mc->pos(d->mc), 0); - /* Promote SK_UNDEF (forward ref via reloc) to SK_NOTYPE so it's a - * real defined symbol; explicit `.type SYM, @function` will refine. */ - if (os && os->kind == SK_UNDEF) sym_mut(d, id)->kind = (u16)SK_NOTYPE; -} - -static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) { - Tok t = d_peek(d); - if (!tok_is_punct(t, '.')) return head; - if (t.flags & TF_HAS_SPACE) return head; - (void)d_next(d); - Tok rest = d_next(d); - if (rest.kind != TOK_IDENT) - d_panicf(d, "asm: composite mnemonic: expected ident"); - size_t hn = 0, rn = 0; - const char* hp = asm_str(d, head, &hn); - const char* rp = asm_str(d, rest.v.ident, &rn); - size_t n = hn + 1 + rn; - if (n >= 64) d_panicf(d, "asm: mnemonic too long"); - char buf[64]; - for (size_t i = 0; i < hn; ++i) buf[i] = hp[i]; - buf[hn] = '.'; - for (size_t i = 0; i < rn; ++i) buf[hn + 1 + i] = rp[i]; - return pool_intern(d->pool, buf, n); -} - -/* ---- inline-asm driver constructor ---- - * - * Inline-asm template walkers (per-arch) re-lex pre-substituted source - * text through the same per-mnemonic parsers used by the standalone .s - * driver. This constructor builds a minimally-initialized AsmDriver - * around a caller-supplied memory-backed Lexer + MCEmitter. - * - * The driver does not own the Lexer or MCEmitter, does not allocate a - * default section (inline asm emits into whatever section the wrapping - * cg has selected on its MCEmitter), and skips the standalone driver's - * per-arch handle (`d->aa64`) — the caller has already opened its own - * AA64Asm to thread per-block bound state through. */ -AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, Lexer* lex) { - Heap* heap = (Heap*)c->env->heap; - AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver)); - memset(d, 0, sizeof *d); - d->c = c; - d->lex = lex; - d->mc = mc; - d->ob = mc->obj; - d->pool = c->global; - d->heap = heap; - /* The MCEmitter's section is whatever cg has set; do not override it. - * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use - * mc->section_id directly via asm_driver_cur_section's lazy init for - * standalone, but inline asm should never reach that path because the - * MCEmitter already has its section. Pre-seed cur_sec from the - * MCEmitter so emit_reloc_at calls get the right section id. */ - d->cur_sec = mc->section_id; - SymSecMap_init(&d->sec_map, heap); - SymSymMap_init(&d->sym_map, heap); - SymEquMap_init(&d->equ_map, heap); - d->aa64 = NULL; /* caller owns its own AA64Asm */ - return d; -} - -void asm_driver_close_inline(AsmDriver* d) { - if (!d) return; - SymSecMap_fini(&d->sec_map); - SymSymMap_fini(&d->sym_map); - SymEquMap_fini(&d->equ_map); - Heap* heap = d->heap; - heap->free(heap, d, sizeof *d); -} - -void parse_asm(Compiler* c, Lexer* l, MCEmitter* mc) { - AsmDriver d; - memset(&d, 0, sizeof d); - d.c = c; - d.lex = l; - d.mc = mc; - d.ob = mc->obj; - d.pool = c->global; - d.heap = (Heap*)c->env->heap; - d.cur_sec = OBJ_SEC_NONE; - SymSecMap_init(&d.sec_map, d.heap); - SymSymMap_init(&d.sym_map, d.heap); - SymEquMap_init(&d.equ_map, d.heap); - d.aa64 = aa64_asm_open(c); - - for (;;) { - Tok t = d_peek(&d); - if (t.kind == TOK_EOF) break; - if (t.kind == TOK_NEWLINE) { - (void)d_next(&d); - continue; - } - if (t.kind == TOK_PP_HASH) { - /* cpp-style linemarker; skip the whole line. */ - d_skip_to_eol(&d); - continue; - } - if (tok_is_punct(t, '.')) { - (void)d_next(&d); - Tok id = d_next(&d); - if (id.kind != TOK_IDENT) - d_panicf(&d, "asm: expected directive name after '.'"); - do_directive(&d, id.v.ident); - d_eat_eol(&d); - continue; - } - if (t.kind == TOK_IDENT) { - Sym head = t.v.ident; - (void)d_next(&d); - Tok nxt = d_peek(&d); - if (tok_is_punct(nxt, ':')) { - (void)d_next(&d); - process_label(&d, head); - continue; - } - Sym mnemonic = maybe_compose_mnemonic(&d, head); - aa64_asm_insn(d.aa64, &d, mnemonic); - d_skip_to_eol(&d); - continue; - } - /* Anything else: recover by skipping the line. */ - d_skip_to_eol(&d); - } - - aa64_asm_close(d.aa64); - SymSecMap_fini(&d.sec_map); - SymSymMap_fini(&d.sym_map); - SymEquMap_fini(&d.equ_map); -} diff --git a/src/parse/parse_asm_helpers.h b/src/parse/parse_asm_helpers.h @@ -1,63 +0,0 @@ -#ifndef CFREE_PARSE_ASM_HELPERS_H -#define CFREE_PARSE_ASM_HELPERS_H - -/* Lightweight asm-driver surface consumed by per-arch instruction - * parsers. The driver itself is opaque to per-arch code; these helpers - * are the only seam. Implementations live in src/parse/parse_asm.c. */ - -#include "arch/arch.h" -#include "core/core.h" -#include "lex/lex.h" -#include "obj/obj.h" - -typedef struct AsmDriver AsmDriver; - -/* ---- token plumbing ---- */ -Tok asm_driver_peek(AsmDriver*); -Tok asm_driver_next(AsmDriver*); -int asm_driver_at_eol(AsmDriver*); -int asm_driver_tok_is_punct(Tok t, u32 p); -int asm_driver_eat_comma(AsmDriver*); -int asm_driver_eat_punct(AsmDriver*, u32 punct); -void asm_driver_expect_punct(AsmDriver*, u32 punct, const char* what); - -/* Source position for diagnostics. */ -SrcLoc asm_driver_loc(AsmDriver*); - -/* Owning subsystems. */ -MCEmitter* asm_driver_mc(AsmDriver*); -ObjBuilder* asm_driver_ob(AsmDriver*); -Compiler* asm_driver_compiler(AsmDriver*); -Pool* asm_driver_pool(AsmDriver*); -ObjSecId asm_driver_cur_section(AsmDriver*); - -/* Diagnostics: emits then longjmps via Compiler.panic. No return. */ -_Noreturn void asm_driver_panic(AsmDriver*, const char* fmt, ...); - -/* ---- symbol + expression parsing ---- */ -ObjSymId asm_driver_intern_sym(AsmDriver*, Sym name); - -/* Parse a constant integer expression. Panics if the expression - * references a symbol. */ -i64 asm_driver_parse_const(AsmDriver*); - -/* Parse a `sym ± const` expression. Both outputs valid: pure constants - * leave *sym_out == OBJ_SYM_NONE. */ -void asm_driver_parse_sym_expr(AsmDriver*, ObjSymId* sym_out, i64* off_out); - -/* ---- inline-asm constructor ---- - * - * Build an AsmDriver around a memory-backed Lexer + caller-supplied - * MCEmitter. Used by inline-asm template walkers (one driver per asm - * line) to reuse the existing per-arch instruction parsers verbatim - * over a substituted source buffer. - * - * The driver is heap-allocated through c->env->heap and must be released - * with asm_driver_close_inline. It does not own the Lexer or the - * MCEmitter — the caller retains ownership of both. The driver does - * not initialize a default section; inline asm always emits into the - * MCEmitter's currently-active section. */ -AsmDriver* asm_driver_open_inline(Compiler*, MCEmitter*, Lexer*); -void asm_driver_close_inline(AsmDriver*); - -#endif diff --git a/test/asm/CORPUS.md b/test/asm/CORPUS.md @@ -55,7 +55,7 @@ as the exit code, mirroring the test/parse and test/cg conventions. - `<name>.expected.lst` — golden listing for L. - `<name>.skip` — single-line reason. The case is reported as SKIP for every path it applies to. Every phase-1 case carries one - because the underlying APIs (`parse_asm`, `cfree_disasm_iter_*`, + because the underlying APIs (`asm_parse`, `cfree_disasm_iter_*`, `cfree_obj_disasm`) are still stubs. They drop as the matching subsystems land. diff --git a/test/asm/harness/asm_runner.c b/test/asm/harness/asm_runner.c @@ -10,7 +10,7 @@ * consumers take). Built once; the shell runner walks the sub-corpora * and invokes one mode per case-path pair. * - * Phase 1: parse_asm and the disasm iterator are still stubs in + * Phase 1: asm_parse and the disasm iterator are still stubs in * src/api/stubs.c. The runner returns nonzero when the underlying API * fails; smoke cases each carry a .skip sidecar so the harness reports * them cleanly until phases 3 and 4 land. diff --git a/test/asm/run.sh b/test/asm/run.sh @@ -27,7 +27,7 @@ # Reuses the test/link harness binaries (link-exe-runner, jit-runner) plus # test/link/harness/start.c verbatim — same convention as test/parse/run.sh. # -# Phase 1 (doc/ASM.md §5): parse_asm and the disasm iterator are still +# Phase 1 (doc/ASM.md §5): asm_parse and the disasm iterator are still # stubs in src/api/stubs.c. Every smoke case carries a .skip sidecar so # the harness reports SKIP cleanly; the wiring still runs on every CI # pass. CFREE_TEST_ALLOW_SKIP defaults to 1 here for the duration of @@ -68,7 +68,7 @@ CLANG_TARGET="--target=$CLANG_TRIPLE" CC="${CC:-cc}" HARNESS_CFLAGS="-std=c11 -Wall -Wextra -I$ROOT/include -I$ROOT/test" # Phase 1: ALLOW_SKIP defaults to 1 (smoke cases skip cleanly because -# parse_asm / cfree_disasm_iter_* are still stubs). Flip to 0 once the +# asm_parse / cfree_disasm_iter_* are still stubs). Flip to 0 once the # assembler / disassembler land. ALLOW_SKIP="${CFREE_TEST_ALLOW_SKIP:-1}" diff --git a/test/test.mk b/test/test.mk @@ -22,7 +22,7 @@ # - test-asm: file-driven assembler/disassembler harness in test/asm/. # Three sub-corpora (encode/, decode/, listing/), one mode per # sub-dir. Phase 1: every smoke case carries a .skip sidecar because -# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds +# asm_parse / cfree_disasm_iter_* are still stubs; the harness builds # and runs end-to-end so the wiring stays exercised. See doc/ASM.md. .PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64