kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 662ea223050b63f620b330a9f129a1b352dfc0af
parent 44c6219f7bef26da23a7e87681417a7b51ffca1c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 23 May 2026 14:19:04 -0700

lang: extract C preprocessor into lang/cpp/ behind CFREE_LANG_CPP_ENABLED

Pulls the lexer and preprocessor out of lang/c/ so the driver is
generic over frontends and a cpp-only build (no C parser/types) is
possible. lang/c/ now depends on lang/cpp/.

  - lang/cpp/lex/, lang/cpp/pp/: moved verbatim from lang/c/
  - lang/cpp/cpp_support.h: shared substrate (typedefs, Pool, arena
    macros, compiler_panic) extracted from lang/c/c_support.h
  - lang/cpp/cpp.c: cfree_cpp_preprocess() — moved from lang/c/c.c
  - include/cfree/preprocess.h: new public header owning
    CfreePreprocessOptions/CfreeDefine and declaring
    cfree_cpp_preprocess(); compile.h re-includes it
  - CFREE_LANG_CPP_ENABLED flag in config.h + mk/config.mk; the
    Makefile gates lang/cpp/, driver/cpp.c, and threads -Ilang/cpp
    into lang/c/ builds; main.c gates the `cpp` subcommand
  - _Static_assert in src/api/lang_registry.c enforces C ⇒ CPP
  - cfree_c_preprocess() replaced by cfree_cpp_preprocess() at all
    callers (driver/cpp.c, cc.c, as.c, runtime.c) — public API break

Verified: default all-on build clean; test-pp 85/85, test-pp-err
15/15, test-cg-api 955/0, test-toy, test-smoke-x64 pass. cpp-only
build (CFREE_LANG_C_ENABLED=0) builds clean and `cfree cpp` works
end-to-end. The bad combo (C on, CPP off) trips the static assert.

Diffstat:
MMakefile | 16+++++++++++++++-
Mdriver/as.c | 6+++---
Mdriver/cc.c | 8+++++---
Mdriver/cpp.c | 4++--
Mdriver/main.c | 6++++++
Mdriver/runtime.c | 5++---
Minclude/cfree/compile.h | 19+------------------
Minclude/cfree/config.h | 8+++++++-
Ainclude/cfree/preprocess.h | 37+++++++++++++++++++++++++++++++++++++
Mlang/c/c.c | 50+++-----------------------------------------------
Mlang/c/c.h | 8++------
Mlang/c/c_support.h | 88++++---------------------------------------------------------------------------
Dlang/c/lex/lex.h | 127-------------------------------------------------------------------------------
Dlang/c/pp/pp_priv.h | 328-------------------------------------------------------------------------------
Alang/cpp/cpp.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/cpp/cpp_support.h | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rlang/c/lex/lex.c -> lang/cpp/lex/lex.c | 0
Alang/cpp/lex/lex.h | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rlang/c/pp/pp.c -> lang/cpp/pp/pp.c | 0
Rlang/c/pp/pp.h -> lang/cpp/pp/pp.h | 0
Rlang/c/pp/pp_directive.c -> lang/cpp/pp/pp_directive.c | 0
Rlang/c/pp/pp_expand.c -> lang/cpp/pp/pp_expand.c | 0
Alang/cpp/pp/pp_priv.h | 328+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmk/config.mk | 1+
Msrc/api/lang_registry.c | 3+++
25 files changed, 729 insertions(+), 623 deletions(-)

diff --git a/Makefile b/Makefile @@ -51,11 +51,15 @@ endif # Per-frontend source sets. Each is gated by its CFREE_LANG_*_ENABLED flag # from mk/config.mk so the matching `#if` in src/api/lang_registry.c and # the build agree on which frontends are compiled in. +LANG_CPP_SRCS = $(shell find lang/cpp -name '*.c' 2>/dev/null) LANG_C_SRCS = $(shell find lang/c -name '*.c' 2>/dev/null) LANG_WASM_SRCS = $(shell find lang/wasm -name '*.c' 2>/dev/null) LANG_TOY_SRCS = $(wildcard lang/toy/*.c) LANG_OBJS = +ifeq ($(CFREE_LANG_CPP_ENABLED),1) +LANG_OBJS += $(patsubst lang/cpp/%.c,build/lang/cpp/%.o,$(LANG_CPP_SRCS)) +endif ifeq ($(CFREE_LANG_C_ENABLED),1) LANG_OBJS += $(patsubst lang/c/%.c,build/lang/c/%.o,$(LANG_C_SRCS)) endif @@ -73,6 +77,9 @@ LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) \ LIB_DEPS = $(LIB_OBJS:.o=.d) DRIVER_SRCS = $(wildcard driver/*.c) +ifneq ($(CFREE_LANG_CPP_ENABLED),1) +DRIVER_SRCS := $(filter-out driver/cpp.c,$(DRIVER_SRCS)) +endif DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS)) DRIVER_DEPS = $(DRIVER_OBJS:.o=.d) @@ -107,9 +114,16 @@ build/lib/api/lang_registry.o: src/api/lang_registry.c Makefile @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) -Ilang $(DEPFLAGS) -c $< -o $@ +build/lang/cpp/%.o: lang/cpp/%.c Makefile + @mkdir -p $(dir $@) + $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp $(DEPFLAGS) -c $< -o $@ + +# The C frontend includes the lexer and preprocessor headers (pp/pp.h, +# lex/lex.h) which now live under lang/cpp/, and cpp_support.h is the +# shared substrate. So lang/c objects build with -Ilang/cpp -Ilang/c. build/lang/c/%.o: lang/c/%.c Makefile @mkdir -p $(dir $@) - $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/c $(DEPFLAGS) -c $< -o $@ + $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp -Ilang/c $(DEPFLAGS) -c $< -o $@ build/lang/wasm/%.o: lang/wasm/%.c Makefile @mkdir -p $(dir $@) diff --git a/driver/as.c b/driver/as.c @@ -1,15 +1,15 @@ #include <cfree/compile.h> #include <cfree/core.h> +#include <cfree/preprocess.h> #include <stdint.h> #include <string.h> #include "cflags.h" #include "driver.h" -#include "lang/c/c.h" /* `cfree as` — standalone assembler. Reads a single text source, writes a * relocatable object via a CfreeCompileSession. `.S` inputs are - * preprocessed first via cfree_c_preprocess; `.s` inputs are not. The + * preprocessed first via cfree_cpp_preprocess; `.s` inputs are not. The * accepted input is a GAS subset (AT&T syntax on x86). */ #define AS_TOOL "as" @@ -196,7 +196,7 @@ int driver_as(int argc, char** argv) { driver_errf(AS_TOOL, "out of memory"); goto out; } - if (cfree_c_preprocess(compiler, &pp, &input, pp_writer) != CFREE_OK) + if (cfree_cpp_preprocess(compiler, &pp, &input, pp_writer) != CFREE_OK) goto out; if (cfree_writer_status(pp_writer) != CFREE_OK) { driver_errf(AS_TOOL, "failed to preprocess: %s", o.source); diff --git a/driver/cc.c b/driver/cc.c @@ -3,6 +3,7 @@ #include <cfree/compile.h> #include <cfree/core.h> #include <cfree/link.h> +#include <cfree/preprocess.h> #include <stdint.h> #include <string.h> @@ -1620,8 +1621,9 @@ static int cc_preprocess(DriverEnv* env, const CcOptions* o, goto out; } - rc = - cfree_c_preprocess(compiler, pp_opts, &input, writer) == CFREE_OK ? 0 : 1; + rc = cfree_cpp_preprocess(compiler, pp_opts, &input, writer) == CFREE_OK + ? 0 + : 1; out: if (compiler) driver_compiler_free(compiler); @@ -2003,7 +2005,7 @@ static int cc_run_deps_only(DriverEnv* env, const CcOptions* o, goto out; } - if (cfree_c_preprocess(compiler, pp, &input, discard) != CFREE_OK) goto out; + if (cfree_cpp_preprocess(compiler, pp, &input, discard) != CFREE_OK) goto out; rc = cc_dep_finish(env, &ctx, compiler, o); diff --git a/driver/cpp.c b/driver/cpp.c @@ -2,10 +2,10 @@ #include <cfree/compile.h> #include <cfree/core.h> +#include <cfree/preprocess.h> #include "cflags.h" #include "driver.h" -#include "lang/c/c.h" /* `cfree cpp` — standalone C preprocessor. Reads one C source (path or * `-` for stdin), writes the preprocessed token stream to `-o PATH` or @@ -210,7 +210,7 @@ int driver_cpp(int argc, char** argv) { } rc = - cfree_c_preprocess(compiler, &pp, &input, writer) == CFREE_OK ? 0 : 1; + cfree_cpp_preprocess(compiler, &pp, &input, writer) == CFREE_OK ? 0 : 1; out: if (compiler) driver_compiler_free(compiler); diff --git a/driver/main.c b/driver/main.c @@ -1,3 +1,5 @@ +#include <cfree/config.h> + #include "driver.h" /* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "as", "ld", @@ -21,7 +23,9 @@ static int dispatch(const char* name, int argc, char** argv) { if (driver_streq(name, "cc")) return driver_cc(argc, argv); +#if CFREE_LANG_CPP_ENABLED if (driver_streq(name, "cpp")) return driver_cpp(argc, argv); +#endif if (driver_streq(name, "as")) return driver_as(argc, argv); if (driver_streq(name, "ld")) return driver_ld(argc, argv); if (driver_streq(name, "ar")) return driver_ar(argc, argv); @@ -42,10 +46,12 @@ static int print_tool_help(const char* name) { driver_help_cc(); return 0; } +#if CFREE_LANG_CPP_ENABLED if (driver_streq(name, "cpp")) { driver_help_cpp(); return 0; } +#endif if (driver_streq(name, "as")) { driver_help_as(); return 0; diff --git a/driver/runtime.c b/driver/runtime.c @@ -3,11 +3,10 @@ #include <cfree/archive.h> #include <cfree/compile.h> #include <cfree/core.h> +#include <cfree/preprocess.h> #include <stdint.h> #include <string.h> -#include "lang/c/c.h" - #define RT_TOOL "cc" typedef struct RuntimeVariant { @@ -538,7 +537,7 @@ static int rt_compile_source(DriverEnv* env, rt_free_pp(env, &pp, owned_dirs, owned_sizes, owned_count); goto out; } - st = cfree_c_preprocess(compiler, &pp, &input, pp_writer); + st = cfree_cpp_preprocess(compiler, &pp, &input, pp_writer); rt_free_pp(env, &pp, owned_dirs, owned_sizes, owned_count); if (st != CFREE_OK || cfree_writer_status(pp_writer) != CFREE_OK) goto out; diff --git a/include/cfree/compile.h b/include/cfree/compile.h @@ -3,6 +3,7 @@ #include <cfree/core.h> #include <cfree/object.h> +#include <cfree/preprocess.h> /* * Source compiler embedding API. @@ -21,24 +22,6 @@ typedef enum CfreeLanguage { CFREE_LANG_COUNT = 4, } CfreeLanguage; -typedef struct CfreeDefine { - const char* name; - const char* body; /* NULL means "1" */ -} CfreeDefine; - -typedef CfreeDefine CfreePredefinedMacro; - -typedef struct CfreePreprocessOptions { - const char* const* include_dirs; - uint32_t ninclude_dirs; - const char* const* system_include_dirs; - uint32_t nsystem_include_dirs; - const CfreeDefine* defines; - uint32_t ndefines; - const char* const* undefines; - uint32_t nundefines; -} CfreePreprocessOptions; - typedef struct CfreeDiagnosticOptions { bool warnings_are_errors; uint32_t max_errors; /* 0 means unlimited */ diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -33,7 +33,13 @@ #define CFREE_OBJ_COFF_ENABLED 1 /* Language frontends. The assembler frontend is unconditional: it lives - * inside libcfree as part of the codegen substrate. */ + * inside libcfree as part of the codegen substrate. + * + * CFREE_LANG_CPP_ENABLED gates the C preprocessor (lang/cpp/). The C + * frontend depends on it; enabling CFREE_LANG_C_ENABLED without + * CFREE_LANG_CPP_ENABLED is a build-time error. The preprocessor can + * be enabled standalone (e.g. for `cfree cpp` only). */ +#define CFREE_LANG_CPP_ENABLED 1 #define CFREE_LANG_C_ENABLED 1 #define CFREE_LANG_TOY_ENABLED 1 #define CFREE_LANG_WASM_ENABLED 1 diff --git a/include/cfree/preprocess.h b/include/cfree/preprocess.h @@ -0,0 +1,37 @@ +#ifndef CFREE_PREPROCESS_H +#define CFREE_PREPROCESS_H + +#include <cfree/core.h> + +/* + * Public surface for the cfree C preprocessor (lang/cpp). + * + * The preprocessor consumes a single translation unit's source bytes + * and produces preprocessed C text. It is independent of the C + * frontend: when CFREE_LANG_CPP_ENABLED is set but CFREE_LANG_C is + * not, cfree_cpp_preprocess() is still available (used by `cfree cpp` + * and `cfree cc -E`). + */ + +typedef struct CfreeDefine { + const char* name; + const char* body; /* NULL means "1" */ +} CfreeDefine; + +typedef CfreeDefine CfreePredefinedMacro; + +typedef struct CfreePreprocessOptions { + const char* const* include_dirs; + uint32_t ninclude_dirs; + const char* const* system_include_dirs; + uint32_t nsystem_include_dirs; + const CfreeDefine* defines; + uint32_t ndefines; + const char* const* undefines; + uint32_t nundefines; +} CfreePreprocessOptions; + +CfreeStatus cfree_cpp_preprocess(CfreeCompiler*, const CfreePreprocessOptions*, + const CfreeBytes*, CfreeWriter*); + +#endif diff --git a/lang/c/c.c b/lang/c/c.c @@ -19,10 +19,6 @@ static _Noreturn void c_bad_options(Compiler* c, const char* msg) { compiler_panic(c, c_no_loc(), "bad C frontend options: %s", msg); } -typedef struct CFrontend { - CfreeCompiler* c; -} CFrontend; - static void c_apply_pp_options(Pp* pp, const CfreePreprocessOptions* opts) { u32 i; @@ -41,49 +37,9 @@ static void c_apply_pp_options(Pp* pp, const CfreePreprocessOptions* opts) { } } -typedef struct CPreprocessRun { - const CfreePreprocessOptions* opts; - const CfreeBytes* input; - CfreeWriter* out; -} CPreprocessRun; - -static CfreeStatus c_preprocess_body(CfreeCompiler* c, void* user) { - CPreprocessRun* r = (CPreprocessRun*)user; - Lexer* lex; - Pp* pp; - - const CfreePreprocessOptions* opts = r->opts; - const CfreeBytes* input = r->input; - CfreeWriter* out = r->out; - - if (!opts || !input || !out) { - c_bad_options(c, "preprocess args missing"); - } - if (!input->name) c_bad_options(c, "input name is NULL"); - if (!input->data && input->len != 0) { - c_bad_options(c, "input data is NULL but len > 0"); - } - - lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); - pp = pp_new(c); - if (!lex || !pp) - compiler_panic(c, c_no_loc(), "C preprocessor out of memory"); - c_apply_pp_options(pp, opts); - pp_push_input(pp, lex); - pp_emit_text(pp, out); - pp_free(pp); - return CFREE_OK; -} - -CfreeStatus cfree_c_preprocess(CfreeCompiler* c, - const CfreePreprocessOptions* opts, - const CfreeBytes* input, CfreeWriter* out) { - CPreprocessRun r; - r.opts = opts; - r.input = input; - r.out = out; - return cfree_frontend_run(c, c_preprocess_body, &r); -} +typedef struct CFrontend { + CfreeCompiler* c; +} CFrontend; static CfreeFrontendState* c_frontend_new(CfreeCompiler* c) { CfreeHeap* h; diff --git a/lang/c/c.h b/lang/c/c.h @@ -13,18 +13,14 @@ * frontend casts language_options back to CfreeCCompileOptions* to recover * the preprocessor and diagnostic configuration. * - * cfree_c_preprocess is a standalone helper driven by the driver's -E - * mode and by -S/.s preprocessing; it runs under the same frontend panic - * boundary as cfree_c_compile via cfree_frontend_run. */ + * Standalone preprocessing has moved to <cfree/preprocess.h>'s + * cfree_cpp_preprocess(). */ #include <cfree/compile.h> #include <cfree/core.h> #include <cfree/frontend.h> #include <cfree/object.h> -CfreeStatus cfree_c_preprocess(CfreeCompiler*, const CfreePreprocessOptions*, - const CfreeBytes*, CfreeWriter*); - extern const CfreeFrontendVTable cfree_c_frontend_vtable; #endif diff --git a/lang/c/c_support.h b/lang/c/c_support.h @@ -1,97 +1,17 @@ #ifndef CFREE_LANG_C_SUPPORT_H #define CFREE_LANG_C_SUPPORT_H -#include <cfree/frontend.h> -#include <cfree/support/hashmap.h> -#include <stdarg.h> -#include <stddef.h> -#include <stdint.h> +/* C-frontend support layer. Re-exports the lexer/preprocessor shared + * substrate (cpp_support.h) and adds C-specific helpers used by the + * parser, declarations, and types. */ -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; +#include "cpp_support.h" -typedef CfreeCompiler Compiler; -typedef CfreeHeap Heap; -typedef CfreeWriter Writer; -typedef CfreeSym Sym; -typedef CfreeSrcLoc SrcLoc; typedef CfreeCompiler TargetABI; -typedef u32 BytesId; typedef struct SrcRange { SrcLoc begin; SrcLoc end; } SrcRange; -typedef struct Pool { - Compiler* c; - CfreeArena* arena; - void* type_cache; -} Pool; - -static inline Pool* c_pool_new(Compiler* c) { - Heap* h = cfree_compiler_context(c)->heap; - Pool* p = h ? (Pool*)h->alloc(h, sizeof(*p), _Alignof(Pool)) : NULL; - if (!p) return NULL; - p->c = c; - p->arena = NULL; - p->type_cache = NULL; - if (cfree_arena_new(h, 0, &p->arena) != CFREE_OK || !p->arena) { - h->free(h, p, sizeof(*p)); - return NULL; - } - return p; -} - -static inline void c_pool_free(Pool* p) { - Heap* h; - if (!p) return; - h = cfree_compiler_context(p->c)->heap; - cfree_arena_free(p->arena); - if (h) h->free(h, p, sizeof(*p)); -} - -static inline Sym pool_intern(Pool* p, const char* s, size_t len) { - return cfree_sym_intern_len(p->c, s, len); -} - -static inline Sym pool_intern_cstr(Pool* p, const char* s) { - return cfree_sym_intern(p->c, s); -} - -static inline const char* pool_str(Pool* p, Sym sym, size_t* len_out) { - return cfree_sym_str(p->c, sym, len_out); -} - -static inline const char* compiler_sym_str(Compiler* c, Sym sym, - size_t* len_out) { - return cfree_sym_str(c, sym, len_out); -} - -#define arena_alloc(a, size, align) cfree_arena_alloc((a), (size), (align)) -#define arena_zalloc(a, size, align) cfree_arena_zalloc((a), (size), (align)) -#define arena_strdup(a, s, len) cfree_arena_strdup((a), (s), (len)) -#define arena_new(a, T) cfree_arena_new_obj((a), T) -#define arena_znew(a, T) cfree_arena_znew_obj((a), T) -#define arena_array(a, T, n) cfree_arena_array((a), T, n) -#define arena_zarray(a, T, n) cfree_arena_zarray((a), T, n) - -_Noreturn static inline void compiler_panic(Compiler* c, SrcLoc loc, - const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - cfree_frontend_vfatal(c, loc, fmt, ap); -} - -_Noreturn static inline void compiler_panicv(Compiler* c, SrcLoc loc, - const char* fmt, va_list ap) { - cfree_frontend_vfatal(c, loc, fmt, ap); -} - #endif diff --git a/lang/c/lex/lex.h b/lang/c/lex/lex.h @@ -1,127 +0,0 @@ -#ifndef CFREE_LEX_H -#define CFREE_LEX_H - -#include "c_support.h" - -typedef enum TokKind { - TOK_EOF = 0, - TOK_IDENT, /* v.ident */ - TOK_NUM, /* lit */ - TOK_FLT, /* lit */ - TOK_STR, /* lit; v.str is decoded bytes if target-independent */ - TOK_CHR, /* lit */ - TOK_PUNCT, /* v.punct */ - TOK_PP_HASH, /* # */ - TOK_PP_PASTE, /* ## */ - TOK_HEADER, /* header-name in #include / #embed */ - TOK_NEWLINE, /* visible to PP only */ - TOK_KW_FIRST, - /* C11 keywords are inserted into this range by parse_c via pool */ - TOK_KW_LAST = 0x1000, -} TokKind; - -typedef enum TokFlag { - TF_AT_BOL = 1u << 0, - TF_HAS_SPACE = 1u << 1, - TF_NO_EXPAND = 1u << 2, - TF_INT_U = 1u << 3, - TF_INT_L = 1u << 4, - TF_INT_LL = 1u << 5, - TF_FLT_F = 1u << 6, - TF_FLT_L = 1u << 7, - TF_STR_WIDE = 1u << 8, - TF_STR_U8 = 1u << 9, - TF_STR_U16 = 1u << 10, - TF_STR_U32 = 1u << 11, - TF_LITERAL_BAD = 1u << 12, -} TokFlag; - -typedef enum Punct { - P_NONE = 0, - /* Single-char punctuators reuse their ASCII codepoint here. */ - P_ARROW = 256, - P_INC, - P_DEC, - P_SHL, - P_SHR, - P_LE, - P_GE, - P_EQ, - P_NE, - P_AND, - P_OR, - P_ADD_ASSIGN, - P_SUB_ASSIGN, - P_MUL_ASSIGN, - P_DIV_ASSIGN, - P_MOD_ASSIGN, - P_AND_ASSIGN, - P_OR_ASSIGN, - P_XOR_ASSIGN, - P_SHL_ASSIGN, - P_SHR_ASSIGN, - P_ELLIPSIS, - P_HASH_HASH, -} Punct; - -typedef u32 LitId; -#define LIT_NONE 0u - -typedef enum LitKind { - LIT_INT, - LIT_FLOAT, - LIT_STRING, - LIT_CHAR, -} LitKind; - -typedef enum LitEnc { - LENC_ORDINARY, - LENC_UTF8, - LENC_WIDE, - LENC_UTF16, - LENC_UTF32, -} LitEnc; - -typedef struct LitInfo { - u8 kind; /* LitKind */ - u8 enc; /* LitEnc for strings/chars */ - u16 flags; /* TokFlag suffix/encoding bits */ - Sym spelling; /* exact source spelling */ - BytesId bytes; /* decoded bytes/code units, if already decoded */ -} LitInfo; - -typedef struct Tok { - u16 kind; - u16 flags; - SrcLoc loc; - Sym spelling; /* exact token spelling for diagnostics/#/## */ - LitId lit; /* literal-table handle; LIT_NONE otherwise */ - union { - Sym ident; - Sym str; - u32 punct; - } v; -} Tok; - -typedef struct Lexer Lexer; - -/* lex_open_mem borrows (src, len). The lexer does not copy source bytes; - * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics - * and the preprocessor's directive scanner read from the borrowed buffer. - * - * Ownership: a Lexer that has been handed to pp_push_input is owned by PP - * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call - * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly). - * - * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed - * Lexer means outliving pp_free. */ -Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); -void lex_close(Lexer*); - -/* Streaming. Returns TOK_EOF repeatedly at end of input. */ -Tok lex_next(Lexer*); -SrcLoc lex_loc(const Lexer*); -u32 lex_file_id(const Lexer*); -const LitInfo* lex_lit(const Lexer*, LitId); - -#endif diff --git a/lang/c/pp/pp_priv.h b/lang/c/pp/pp_priv.h @@ -1,328 +0,0 @@ -/* pp_priv.h — shared types, helpers, and cross-module forward declarations - * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c). - * NOT part of the public API; included only within lang/c/pp/. */ - -#ifndef CFREE_PP_PRIV_H -#define CFREE_PP_PRIV_H - -#include <stdlib.h> -#include <string.h> - -#include "c_support.h" -#include "pp/pp.h" - -/* ============================================================ - * Internal token kinds - * ============================================================ */ - -/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */ -#define TOK_PP_PARAM ((u16)0x1100) -#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */ - -/* ============================================================ - * Types - * ============================================================ */ - -typedef struct Macro { - Sym name; - SrcLoc def_loc; - u8 is_func; - u8 is_variadic; - u8 pad[2]; - u32 n_params; - Sym* params; /* parameter names */ - Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */ - u32 body_len; -} Macro; - -typedef u32 HidesetId; -#define HS_EMPTY 0u - -typedef struct Hideset { - u32 n; - Sym names[1]; /* flexible; allocated with extra trailing slots */ -} Hideset; - -typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind; - -typedef struct TokSrc { - u8 kind; - /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is - * the top source and it's exhausted, instead of popping. The caller - * (e.g. argument pre-expansion) explicitly pops the scope when done. - * This bounds expansion to a single argument's token stream. */ - u8 scope_top; - u8 pad[2]; - /* SRC_LEX */ - Lexer* lex; - /* SRC_BUF */ - Tok* toks; - HidesetId* hs; - u32 i; - u32 n; - /* #line state (SRC_LEX only). line_delta is added to every emitted - * token's loc.line on its way out so __LINE__ and the output cursor - * see user-visible numbering. file_override is the Sym (without - * surrounding quotes) used by __FILE__ when set. */ - i32 line_delta; - Sym file_override; -} TokSrc; - -typedef enum IfState { - IF_INCLUDE = 1, /* group active, emit code */ - IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */ - IF_DONE = 3, /* skip, already had a true branch */ -} IfState; - -typedef struct IfFrame { - u8 state; - u8 has_else; - u8 pad[2]; - SrcLoc loc; -} IfFrame; - -/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with - * deletion (#undef). See core/hashmap.h. */ -#include <cfree/support/hashmap.h> -static inline u32 macro_hash_(Sym s) { return cfree_hash_u32((u32)s); } -CFREE_HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_); - -/* ============================================================ - * Pp struct (definition shared across all three TUs) - * ============================================================ */ - -struct Pp { - Compiler* c; - Pool* pool; - - /* Source stack — top of stack is sources[nsources-1]. */ - TokSrc* sources; - u32 nsources; - u32 sources_cap; - - /* Macro table (open-addressed; key = Sym, value = Macro*). */ - MacroMap mtab; - - /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */ - IfFrame* ifstk; - u32 ifstk_n; - u32 ifstk_cap; - - /* Hideset table. Element 0 reserved as HS_EMPTY. */ - Hideset** hsets; - u32 hsets_n; - u32 hsets_cap; - - /* Include directories (stage 9). */ - struct { - const char* path; - u8 system; - }* inc_dirs; - u32 ninc_dirs; - u32 inc_dirs_cap; - - /* Current #pragma pack maximum field alignment. 0 means natural. */ - u32 pack_align; - u32 pack_stack[16]; - u32 pack_stack_n; - - /* Internal arena: macro bodies, hidesets, expansion buffers, file - * data for #include. Lives until pp_free. */ - CfreeArena* arena; - - /* Cached interned identifiers used for directive recognition. */ - Sym sym_define; - Sym sym_undef; - Sym sym_include; - Sym sym_if; - Sym sym_ifdef; - Sym sym_ifndef; - Sym sym_elif; - Sym sym_else; - Sym sym_endif; - Sym sym_line; - Sym sym_pragma; - Sym sym_error; - Sym sym_warning; - Sym sym_embed; - Sym sym_defined; - Sym sym_va_args; - Sym sym_line__; /* __LINE__ */ - Sym sym_file__; /* __FILE__ */ - Sym sym_date__; /* __DATE__ */ - Sym sym_time__; /* __TIME__ */ - Sym sym_stdc__; /* __STDC__ */ - Sym sym_stdc_hosted__; - Sym sym_stdc_version__; - Sym sym__pragma; /* _Pragma operator */ - Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */ - - /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for - * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or - * time(NULL) if unset). */ - Sym val_date_str; - Sym val_time_str; - - /* Defined-operator handling during #if expansion. - * - * The first prepass in eval_if_expr replaces `defined X` / `defined - * (X)` literally found in the directive line, but `defined()` can - * also come from macro bodies (mingw's intrin-impl.h uses - * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When - * the expander processes such a body, the identifier inside - * `defined(...)` must NOT be macro-expanded — otherwise an empty - * macro X would turn `defined(X)` into `defined()` and the - * post-expansion prepass would reject it. - * - * This pair of fields tracks the state across `pp_next_raw` calls - * within `expand_for_if`: - * in_if_expansion: 1 inside an #if's expand_arg_to_eof call - * defined_skip: 0 normally; 1 after emitting `defined` - * (consume one IDENT before clearing); 2 after - * emitting `defined (` (waiting for IDENT then - * `)`). - * The expander uses these to mark the operand IDENT TF_NO_EXPAND - * before the macro-expansion check at the head of pp_next_raw. */ - u8 in_if_expansion; - u8 defined_skip; -}; - -/* ============================================================ - * Allocation helpers (defined in pp.c, used everywhere) - * ============================================================ */ - -static inline Heap* pp_heap(Pp* pp) { return cfree_compiler_context(pp->c)->heap; } - -static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n, - size_t align) { - Heap* h = pp_heap(pp); - void* q = h->realloc(h, p, old_n, new_n, align); - if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory"); - return q; -} - -static inline void pp_xfree(Pp* pp, void* p, size_t n) { - if (p) pp_heap(pp)->free(pp_heap(pp), p, n); -} - -/* ============================================================ - * Token-vector helpers - * ============================================================ */ - -typedef struct TokVec { - Tok* data; - u32 n; - u32 cap; -} TokVec; - -typedef struct HsVec { - HidesetId* data; - u32 n; - u32 cap; -} HsVec; - -static inline void tv_grow(Pp* pp, TokVec* v, u32 want) { - u32 nc; - if (v->cap >= want) return; - nc = v->cap ? v->cap * 2 : 8; - while (nc < want) nc *= 2; - { - Tok* nb = arena_array(pp->arena, Tok, nc); - if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n); - v->data = nb; - v->cap = nc; - } -} - -static inline void tv_push(Pp* pp, TokVec* v, Tok t) { - tv_grow(pp, v, v->n + 1); - v->data[v->n++] = t; -} - -static inline void hsv_grow(Pp* pp, HsVec* v, u32 want) { - u32 nc; - if (v->cap >= want) return; - nc = v->cap ? v->cap * 2 : 8; - while (nc < want) nc *= 2; - { - HidesetId* nb = arena_array(pp->arena, HidesetId, nc); - if (v->n) memcpy(nb, v->data, sizeof(HidesetId) * v->n); - v->data = nb; - v->cap = nc; - } -} - -static inline void hsv_push(Pp* pp, HsVec* v, HidesetId hs) { - hsv_grow(pp, v, v->n + 1); - v->data[v->n++] = hs; -} - -/* Growable char buffer (arena-backed). */ -typedef struct CharBuf { - char* data; - u32 len; - u32 cap; -} CharBuf; - -static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) { - if (b->len + n > b->cap) { - u32 nc = b->cap ? b->cap * 2 : 64; - while (nc < b->len + n) nc *= 2; - { - char* nb = (char*)arena_alloc(pp->arena, nc, 1); - if (b->len) memcpy(nb, b->data, b->len); - b->data = nb; - b->cap = nc; - } - } - if (n) memcpy(b->data + b->len, s, n); - b->len += n; -} - -static inline void cb_putc(Pp* pp, CharBuf* b, char c) { - cb_append(pp, b, &c, 1); -} - -/* ============================================================ - * Cross-module forward declarations - * ============================================================ */ - -/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */ -Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out); -void src_push(Pp* pp, TokSrc s); -void src_pop(Pp* pp); -void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n); - -/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it, - * and pp_next_raw drives directives and expansion. Declared non-static so - * pp_expand.c can call it without a forward decl each time. */ -Tok pp_next_raw(Pp* pp); - -/* --- pp_expand.c → pp.c, pp_directive.c --- */ -HidesetId hs_add(Pp* pp, HidesetId id, Sym s); -int hs_contains(Pp* pp, HidesetId id, Sym s); -Macro* mt_get(Pp* pp, Sym name); -void mt_put(Pp* pp, Sym name, Macro* m); -void mt_del(Pp* pp, Sym name); -void expand_arg_to_eof(Pp* pp, Tok* in, HidesetId* hs, u32 nin, TokVec* out); - -/* --- pp_directive.c → pp_expand.c --- */ -i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc); -void process_directive(Pp* pp, SrcLoc hash_loc); - -/* --- pp_directive.c internal helpers called from pp_expand.c --- */ -void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc); -int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out); -int try_expand_pragma_op(Pp* pp, const Tok* invoke); - -/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */ -void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n); - -/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */ -void do_define(Pp* pp, const Tok* line, u32 n); -void do_undef(Pp* pp, const Tok* line, u32 n); - -/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */ -TokSrc* current_lex_src(Pp* pp); - -#endif /* CFREE_PP_PRIV_H */ diff --git a/lang/cpp/cpp.c b/lang/cpp/cpp.c @@ -0,0 +1,86 @@ +/* cpp.c — public entry point for the cfree C preprocessor. + * + * cfree_cpp_preprocess() runs the preprocessor under the standard + * frontend panic boundary and writes the resulting token stream as + * text to the caller's writer. This is the implementation behind + * `cfree cpp` and `cfree cc -E`; the full C frontend reuses it too. */ + +#include <cfree/preprocess.h> + +#include "cpp_support.h" +#include "lex/lex.h" +#include "pp/pp.h" + +static SrcLoc cpp_no_loc(void) { + SrcLoc loc; + loc.file_id = 0; + loc.line = 0; + loc.col = 0; + return loc; +} + +static _Noreturn void cpp_bad_options(Compiler* c, const char* msg) { + compiler_panic(c, cpp_no_loc(), "bad preprocess options: %s", msg); +} + +static void cpp_apply_options(Pp* pp, const CfreePreprocessOptions* opts) { + u32 i; + + for (i = 0; i < opts->ninclude_dirs; ++i) { + pp_add_include_dir(pp, opts->include_dirs[i], 0); + } + for (i = 0; i < opts->nsystem_include_dirs; ++i) { + pp_add_include_dir(pp, opts->system_include_dirs[i], 1); + } + for (i = 0; i < opts->ndefines; ++i) { + const char* body = opts->defines[i].body ? opts->defines[i].body : "1"; + pp_define(pp, opts->defines[i].name, body); + } + for (i = 0; i < opts->nundefines; ++i) { + pp_undef(pp, opts->undefines[i]); + } +} + +typedef struct CppRun { + const CfreePreprocessOptions* opts; + const CfreeBytes* input; + CfreeWriter* out; +} CppRun; + +static CfreeStatus cpp_preprocess_body(CfreeCompiler* c, void* user) { + CppRun* r = (CppRun*)user; + Lexer* lex; + Pp* pp; + + const CfreePreprocessOptions* opts = r->opts; + const CfreeBytes* input = r->input; + CfreeWriter* out = r->out; + + if (!opts || !input || !out) { + cpp_bad_options(c, "preprocess args missing"); + } + if (!input->name) cpp_bad_options(c, "input name is NULL"); + if (!input->data && input->len != 0) { + cpp_bad_options(c, "input data is NULL but len > 0"); + } + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + pp = pp_new(c); + if (!lex || !pp) + compiler_panic(c, cpp_no_loc(), "C preprocessor out of memory"); + cpp_apply_options(pp, opts); + pp_push_input(pp, lex); + pp_emit_text(pp, out); + pp_free(pp); + return CFREE_OK; +} + +CfreeStatus cfree_cpp_preprocess(CfreeCompiler* c, + const CfreePreprocessOptions* opts, + const CfreeBytes* input, CfreeWriter* out) { + CppRun r; + r.opts = opts; + r.input = input; + r.out = out; + return cfree_frontend_run(c, cpp_preprocess_body, &r); +} diff --git a/lang/cpp/cpp_support.h b/lang/cpp/cpp_support.h @@ -0,0 +1,97 @@ +#ifndef CFREE_LANG_CPP_SUPPORT_H +#define CFREE_LANG_CPP_SUPPORT_H + +/* Shared substrate for the lexer and preprocessor: width-typed integer + * aliases, the Compiler/Heap/Writer/Sym/SrcLoc typedefs, the Pool + * abstraction over a per-frontend arena, arena allocation macros, and + * the panic helpers. Used by lang/cpp/ directly and re-exported by + * lang/c/c_support.h for the C frontend. */ + +#include <cfree/frontend.h> +#include <cfree/support/hashmap.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef CfreeCompiler Compiler; +typedef CfreeHeap Heap; +typedef CfreeWriter Writer; +typedef CfreeSym Sym; +typedef CfreeSrcLoc SrcLoc; +typedef u32 BytesId; + +typedef struct Pool { + Compiler* c; + CfreeArena* arena; + void* type_cache; /* opaque slot owned by the C frontend; unused by cpp */ +} Pool; + +static inline Pool* c_pool_new(Compiler* c) { + Heap* h = cfree_compiler_context(c)->heap; + Pool* p = h ? (Pool*)h->alloc(h, sizeof(*p), _Alignof(Pool)) : NULL; + if (!p) return NULL; + p->c = c; + p->arena = NULL; + p->type_cache = NULL; + if (cfree_arena_new(h, 0, &p->arena) != CFREE_OK || !p->arena) { + h->free(h, p, sizeof(*p)); + return NULL; + } + return p; +} + +static inline void c_pool_free(Pool* p) { + Heap* h; + if (!p) return; + h = cfree_compiler_context(p->c)->heap; + cfree_arena_free(p->arena); + if (h) h->free(h, p, sizeof(*p)); +} + +static inline Sym pool_intern(Pool* p, const char* s, size_t len) { + return cfree_sym_intern_len(p->c, s, len); +} + +static inline Sym pool_intern_cstr(Pool* p, const char* s) { + return cfree_sym_intern(p->c, s); +} + +static inline const char* pool_str(Pool* p, Sym sym, size_t* len_out) { + return cfree_sym_str(p->c, sym, len_out); +} + +static inline const char* compiler_sym_str(Compiler* c, Sym sym, + size_t* len_out) { + return cfree_sym_str(c, sym, len_out); +} + +#define arena_alloc(a, size, align) cfree_arena_alloc((a), (size), (align)) +#define arena_zalloc(a, size, align) cfree_arena_zalloc((a), (size), (align)) +#define arena_strdup(a, s, len) cfree_arena_strdup((a), (s), (len)) +#define arena_new(a, T) cfree_arena_new_obj((a), T) +#define arena_znew(a, T) cfree_arena_znew_obj((a), T) +#define arena_array(a, T, n) cfree_arena_array((a), T, n) +#define arena_zarray(a, T, n) cfree_arena_zarray((a), T, n) + +_Noreturn static inline void compiler_panic(Compiler* c, SrcLoc loc, + const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + cfree_frontend_vfatal(c, loc, fmt, ap); +} + +_Noreturn static inline void compiler_panicv(Compiler* c, SrcLoc loc, + const char* fmt, va_list ap) { + cfree_frontend_vfatal(c, loc, fmt, ap); +} + +#endif diff --git a/lang/c/lex/lex.c b/lang/cpp/lex/lex.c diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h @@ -0,0 +1,127 @@ +#ifndef CFREE_LEX_H +#define CFREE_LEX_H + +#include "cpp_support.h" + +typedef enum TokKind { + TOK_EOF = 0, + TOK_IDENT, /* v.ident */ + TOK_NUM, /* lit */ + TOK_FLT, /* lit */ + TOK_STR, /* lit; v.str is decoded bytes if target-independent */ + TOK_CHR, /* lit */ + TOK_PUNCT, /* v.punct */ + TOK_PP_HASH, /* # */ + TOK_PP_PASTE, /* ## */ + TOK_HEADER, /* header-name in #include / #embed */ + TOK_NEWLINE, /* visible to PP only */ + TOK_KW_FIRST, + /* C11 keywords are inserted into this range by parse_c via pool */ + TOK_KW_LAST = 0x1000, +} TokKind; + +typedef enum TokFlag { + TF_AT_BOL = 1u << 0, + TF_HAS_SPACE = 1u << 1, + TF_NO_EXPAND = 1u << 2, + TF_INT_U = 1u << 3, + TF_INT_L = 1u << 4, + TF_INT_LL = 1u << 5, + TF_FLT_F = 1u << 6, + TF_FLT_L = 1u << 7, + TF_STR_WIDE = 1u << 8, + TF_STR_U8 = 1u << 9, + TF_STR_U16 = 1u << 10, + TF_STR_U32 = 1u << 11, + TF_LITERAL_BAD = 1u << 12, +} TokFlag; + +typedef enum Punct { + P_NONE = 0, + /* Single-char punctuators reuse their ASCII codepoint here. */ + P_ARROW = 256, + P_INC, + P_DEC, + P_SHL, + P_SHR, + P_LE, + P_GE, + P_EQ, + P_NE, + P_AND, + P_OR, + P_ADD_ASSIGN, + P_SUB_ASSIGN, + P_MUL_ASSIGN, + P_DIV_ASSIGN, + P_MOD_ASSIGN, + P_AND_ASSIGN, + P_OR_ASSIGN, + P_XOR_ASSIGN, + P_SHL_ASSIGN, + P_SHR_ASSIGN, + P_ELLIPSIS, + P_HASH_HASH, +} Punct; + +typedef u32 LitId; +#define LIT_NONE 0u + +typedef enum LitKind { + LIT_INT, + LIT_FLOAT, + LIT_STRING, + LIT_CHAR, +} LitKind; + +typedef enum LitEnc { + LENC_ORDINARY, + LENC_UTF8, + LENC_WIDE, + LENC_UTF16, + LENC_UTF32, +} LitEnc; + +typedef struct LitInfo { + u8 kind; /* LitKind */ + u8 enc; /* LitEnc for strings/chars */ + u16 flags; /* TokFlag suffix/encoding bits */ + Sym spelling; /* exact source spelling */ + BytesId bytes; /* decoded bytes/code units, if already decoded */ +} LitInfo; + +typedef struct Tok { + u16 kind; + u16 flags; + SrcLoc loc; + Sym spelling; /* exact token spelling for diagnostics/#/## */ + LitId lit; /* literal-table handle; LIT_NONE otherwise */ + union { + Sym ident; + Sym str; + u32 punct; + } v; +} Tok; + +typedef struct Lexer Lexer; + +/* lex_open_mem borrows (src, len). The lexer does not copy source bytes; + * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics + * and the preprocessor's directive scanner read from the borrowed buffer. + * + * Ownership: a Lexer that has been handed to pp_push_input is owned by PP + * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call + * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly). + * + * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed + * Lexer means outliving pp_free. */ +Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); +void lex_close(Lexer*); + +/* Streaming. Returns TOK_EOF repeatedly at end of input. */ +Tok lex_next(Lexer*); +SrcLoc lex_loc(const Lexer*); +u32 lex_file_id(const Lexer*); +const LitInfo* lex_lit(const Lexer*, LitId); + +#endif diff --git a/lang/c/pp/pp.c b/lang/cpp/pp/pp.c diff --git a/lang/c/pp/pp.h b/lang/cpp/pp/pp.h diff --git a/lang/c/pp/pp_directive.c b/lang/cpp/pp/pp_directive.c diff --git a/lang/c/pp/pp_expand.c b/lang/cpp/pp/pp_expand.c diff --git a/lang/cpp/pp/pp_priv.h b/lang/cpp/pp/pp_priv.h @@ -0,0 +1,328 @@ +/* pp_priv.h — shared types, helpers, and cross-module forward declarations + * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c). + * NOT part of the public API; included only within lang/cpp/pp/. */ + +#ifndef CFREE_PP_PRIV_H +#define CFREE_PP_PRIV_H + +#include <stdlib.h> +#include <string.h> + +#include "cpp_support.h" +#include "pp/pp.h" + +/* ============================================================ + * Internal token kinds + * ============================================================ */ + +/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */ +#define TOK_PP_PARAM ((u16)0x1100) +#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */ + +/* ============================================================ + * Types + * ============================================================ */ + +typedef struct Macro { + Sym name; + SrcLoc def_loc; + u8 is_func; + u8 is_variadic; + u8 pad[2]; + u32 n_params; + Sym* params; /* parameter names */ + Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */ + u32 body_len; +} Macro; + +typedef u32 HidesetId; +#define HS_EMPTY 0u + +typedef struct Hideset { + u32 n; + Sym names[1]; /* flexible; allocated with extra trailing slots */ +} Hideset; + +typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind; + +typedef struct TokSrc { + u8 kind; + /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is + * the top source and it's exhausted, instead of popping. The caller + * (e.g. argument pre-expansion) explicitly pops the scope when done. + * This bounds expansion to a single argument's token stream. */ + u8 scope_top; + u8 pad[2]; + /* SRC_LEX */ + Lexer* lex; + /* SRC_BUF */ + Tok* toks; + HidesetId* hs; + u32 i; + u32 n; + /* #line state (SRC_LEX only). line_delta is added to every emitted + * token's loc.line on its way out so __LINE__ and the output cursor + * see user-visible numbering. file_override is the Sym (without + * surrounding quotes) used by __FILE__ when set. */ + i32 line_delta; + Sym file_override; +} TokSrc; + +typedef enum IfState { + IF_INCLUDE = 1, /* group active, emit code */ + IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */ + IF_DONE = 3, /* skip, already had a true branch */ +} IfState; + +typedef struct IfFrame { + u8 state; + u8 has_else; + u8 pad[2]; + SrcLoc loc; +} IfFrame; + +/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with + * deletion (#undef). See core/hashmap.h. */ +#include <cfree/support/hashmap.h> +static inline u32 macro_hash_(Sym s) { return cfree_hash_u32((u32)s); } +CFREE_HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_); + +/* ============================================================ + * Pp struct (definition shared across all three TUs) + * ============================================================ */ + +struct Pp { + Compiler* c; + Pool* pool; + + /* Source stack — top of stack is sources[nsources-1]. */ + TokSrc* sources; + u32 nsources; + u32 sources_cap; + + /* Macro table (open-addressed; key = Sym, value = Macro*). */ + MacroMap mtab; + + /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */ + IfFrame* ifstk; + u32 ifstk_n; + u32 ifstk_cap; + + /* Hideset table. Element 0 reserved as HS_EMPTY. */ + Hideset** hsets; + u32 hsets_n; + u32 hsets_cap; + + /* Include directories (stage 9). */ + struct { + const char* path; + u8 system; + }* inc_dirs; + u32 ninc_dirs; + u32 inc_dirs_cap; + + /* Current #pragma pack maximum field alignment. 0 means natural. */ + u32 pack_align; + u32 pack_stack[16]; + u32 pack_stack_n; + + /* Internal arena: macro bodies, hidesets, expansion buffers, file + * data for #include. Lives until pp_free. */ + CfreeArena* arena; + + /* Cached interned identifiers used for directive recognition. */ + Sym sym_define; + Sym sym_undef; + Sym sym_include; + Sym sym_if; + Sym sym_ifdef; + Sym sym_ifndef; + Sym sym_elif; + Sym sym_else; + Sym sym_endif; + Sym sym_line; + Sym sym_pragma; + Sym sym_error; + Sym sym_warning; + Sym sym_embed; + Sym sym_defined; + Sym sym_va_args; + Sym sym_line__; /* __LINE__ */ + Sym sym_file__; /* __FILE__ */ + Sym sym_date__; /* __DATE__ */ + Sym sym_time__; /* __TIME__ */ + Sym sym_stdc__; /* __STDC__ */ + Sym sym_stdc_hosted__; + Sym sym_stdc_version__; + Sym sym__pragma; /* _Pragma operator */ + Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */ + + /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for + * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or + * time(NULL) if unset). */ + Sym val_date_str; + Sym val_time_str; + + /* Defined-operator handling during #if expansion. + * + * The first prepass in eval_if_expr replaces `defined X` / `defined + * (X)` literally found in the directive line, but `defined()` can + * also come from macro bodies (mingw's intrin-impl.h uses + * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When + * the expander processes such a body, the identifier inside + * `defined(...)` must NOT be macro-expanded — otherwise an empty + * macro X would turn `defined(X)` into `defined()` and the + * post-expansion prepass would reject it. + * + * This pair of fields tracks the state across `pp_next_raw` calls + * within `expand_for_if`: + * in_if_expansion: 1 inside an #if's expand_arg_to_eof call + * defined_skip: 0 normally; 1 after emitting `defined` + * (consume one IDENT before clearing); 2 after + * emitting `defined (` (waiting for IDENT then + * `)`). + * The expander uses these to mark the operand IDENT TF_NO_EXPAND + * before the macro-expansion check at the head of pp_next_raw. */ + u8 in_if_expansion; + u8 defined_skip; +}; + +/* ============================================================ + * Allocation helpers (defined in pp.c, used everywhere) + * ============================================================ */ + +static inline Heap* pp_heap(Pp* pp) { return cfree_compiler_context(pp->c)->heap; } + +static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n, + size_t align) { + Heap* h = pp_heap(pp); + void* q = h->realloc(h, p, old_n, new_n, align); + if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory"); + return q; +} + +static inline void pp_xfree(Pp* pp, void* p, size_t n) { + if (p) pp_heap(pp)->free(pp_heap(pp), p, n); +} + +/* ============================================================ + * Token-vector helpers + * ============================================================ */ + +typedef struct TokVec { + Tok* data; + u32 n; + u32 cap; +} TokVec; + +typedef struct HsVec { + HidesetId* data; + u32 n; + u32 cap; +} HsVec; + +static inline void tv_grow(Pp* pp, TokVec* v, u32 want) { + u32 nc; + if (v->cap >= want) return; + nc = v->cap ? v->cap * 2 : 8; + while (nc < want) nc *= 2; + { + Tok* nb = arena_array(pp->arena, Tok, nc); + if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n); + v->data = nb; + v->cap = nc; + } +} + +static inline void tv_push(Pp* pp, TokVec* v, Tok t) { + tv_grow(pp, v, v->n + 1); + v->data[v->n++] = t; +} + +static inline void hsv_grow(Pp* pp, HsVec* v, u32 want) { + u32 nc; + if (v->cap >= want) return; + nc = v->cap ? v->cap * 2 : 8; + while (nc < want) nc *= 2; + { + HidesetId* nb = arena_array(pp->arena, HidesetId, nc); + if (v->n) memcpy(nb, v->data, sizeof(HidesetId) * v->n); + v->data = nb; + v->cap = nc; + } +} + +static inline void hsv_push(Pp* pp, HsVec* v, HidesetId hs) { + hsv_grow(pp, v, v->n + 1); + v->data[v->n++] = hs; +} + +/* Growable char buffer (arena-backed). */ +typedef struct CharBuf { + char* data; + u32 len; + u32 cap; +} CharBuf; + +static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) { + if (b->len + n > b->cap) { + u32 nc = b->cap ? b->cap * 2 : 64; + while (nc < b->len + n) nc *= 2; + { + char* nb = (char*)arena_alloc(pp->arena, nc, 1); + if (b->len) memcpy(nb, b->data, b->len); + b->data = nb; + b->cap = nc; + } + } + if (n) memcpy(b->data + b->len, s, n); + b->len += n; +} + +static inline void cb_putc(Pp* pp, CharBuf* b, char c) { + cb_append(pp, b, &c, 1); +} + +/* ============================================================ + * Cross-module forward declarations + * ============================================================ */ + +/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */ +Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out); +void src_push(Pp* pp, TokSrc s); +void src_pop(Pp* pp); +void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n); + +/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it, + * and pp_next_raw drives directives and expansion. Declared non-static so + * pp_expand.c can call it without a forward decl each time. */ +Tok pp_next_raw(Pp* pp); + +/* --- pp_expand.c → pp.c, pp_directive.c --- */ +HidesetId hs_add(Pp* pp, HidesetId id, Sym s); +int hs_contains(Pp* pp, HidesetId id, Sym s); +Macro* mt_get(Pp* pp, Sym name); +void mt_put(Pp* pp, Sym name, Macro* m); +void mt_del(Pp* pp, Sym name); +void expand_arg_to_eof(Pp* pp, Tok* in, HidesetId* hs, u32 nin, TokVec* out); + +/* --- pp_directive.c → pp_expand.c --- */ +i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc); +void process_directive(Pp* pp, SrcLoc hash_loc); + +/* --- pp_directive.c internal helpers called from pp_expand.c --- */ +void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc); +int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out); +int try_expand_pragma_op(Pp* pp, const Tok* invoke); + +/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */ +void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n); + +/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */ +void do_define(Pp* pp, const Tok* line, u32 n); +void do_undef(Pp* pp, const Tok* line, u32 n); + +/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */ +TokSrc* current_lex_src(Pp* pp); + +#endif /* CFREE_PP_PRIV_H */ diff --git a/mk/config.mk b/mk/config.mk @@ -14,6 +14,7 @@ CFREE_OBJ_ELF_ENABLED := $(call cfg_flag,CFREE_OBJ_ELF_ENABLED) CFREE_OBJ_MACHO_ENABLED := $(call cfg_flag,CFREE_OBJ_MACHO_ENABLED) CFREE_OBJ_COFF_ENABLED := $(call cfg_flag,CFREE_OBJ_COFF_ENABLED) +CFREE_LANG_CPP_ENABLED := $(call cfg_flag,CFREE_LANG_CPP_ENABLED) CFREE_LANG_C_ENABLED := $(call cfg_flag,CFREE_LANG_C_ENABLED) CFREE_LANG_TOY_ENABLED := $(call cfg_flag,CFREE_LANG_TOY_ENABLED) CFREE_LANG_WASM_ENABLED := $(call cfg_flag,CFREE_LANG_WASM_ENABLED) diff --git a/src/api/lang_registry.c b/src/api/lang_registry.c @@ -21,6 +21,9 @@ #include "cfree/config.h" +_Static_assert(!CFREE_LANG_C_ENABLED || CFREE_LANG_CPP_ENABLED, + "CFREE_LANG_C_ENABLED requires CFREE_LANG_CPP_ENABLED"); + /* Defined in src/api/compile.c, alongside the asm frontend's * new/compile/free functions. Treated as part of the codegen substrate * (no per-frontend lang/ directory), so its declaration lives here