commit 662ea223050b63f620b330a9f129a1b352dfc0af
parent 44c6219f7bef26da23a7e87681417a7b51ffca1c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 23 May 2026 14:19:04 -0700
lang: extract C preprocessor into lang/cpp/ behind CFREE_LANG_CPP_ENABLED
Pulls the lexer and preprocessor out of lang/c/ so the driver is
generic over frontends and a cpp-only build (no C parser/types) is
possible. lang/c/ now depends on lang/cpp/.
- lang/cpp/lex/, lang/cpp/pp/: moved verbatim from lang/c/
- lang/cpp/cpp_support.h: shared substrate (typedefs, Pool, arena
macros, compiler_panic) extracted from lang/c/c_support.h
- lang/cpp/cpp.c: cfree_cpp_preprocess() — moved from lang/c/c.c
- include/cfree/preprocess.h: new public header owning
CfreePreprocessOptions/CfreeDefine and declaring
cfree_cpp_preprocess(); compile.h re-includes it
- CFREE_LANG_CPP_ENABLED flag in config.h + mk/config.mk; the
Makefile gates lang/cpp/, driver/cpp.c, and threads -Ilang/cpp
into lang/c/ builds; main.c gates the `cpp` subcommand
- _Static_assert in src/api/lang_registry.c enforces C ⇒ CPP
- cfree_c_preprocess() replaced by cfree_cpp_preprocess() at all
callers (driver/cpp.c, cc.c, as.c, runtime.c) — public API break
Verified: default all-on build clean; test-pp 85/85, test-pp-err
15/15, test-cg-api 955/0, test-toy, test-smoke-x64 pass. cpp-only
build (CFREE_LANG_C_ENABLED=0) builds clean and `cfree cpp` works
end-to-end. The bad combo (C on, CPP off) trips the static assert.
Diffstat:
25 files changed, 729 insertions(+), 623 deletions(-)
diff --git a/Makefile b/Makefile
@@ -51,11 +51,15 @@ endif
# Per-frontend source sets. Each is gated by its CFREE_LANG_*_ENABLED flag
# from mk/config.mk so the matching `#if` in src/api/lang_registry.c and
# the build agree on which frontends are compiled in.
+LANG_CPP_SRCS = $(shell find lang/cpp -name '*.c' 2>/dev/null)
LANG_C_SRCS = $(shell find lang/c -name '*.c' 2>/dev/null)
LANG_WASM_SRCS = $(shell find lang/wasm -name '*.c' 2>/dev/null)
LANG_TOY_SRCS = $(wildcard lang/toy/*.c)
LANG_OBJS =
+ifeq ($(CFREE_LANG_CPP_ENABLED),1)
+LANG_OBJS += $(patsubst lang/cpp/%.c,build/lang/cpp/%.o,$(LANG_CPP_SRCS))
+endif
ifeq ($(CFREE_LANG_C_ENABLED),1)
LANG_OBJS += $(patsubst lang/c/%.c,build/lang/c/%.o,$(LANG_C_SRCS))
endif
@@ -73,6 +77,9 @@ LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) \
LIB_DEPS = $(LIB_OBJS:.o=.d)
DRIVER_SRCS = $(wildcard driver/*.c)
+ifneq ($(CFREE_LANG_CPP_ENABLED),1)
+DRIVER_SRCS := $(filter-out driver/cpp.c,$(DRIVER_SRCS))
+endif
DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS))
DRIVER_DEPS = $(DRIVER_OBJS:.o=.d)
@@ -107,9 +114,16 @@ build/lib/api/lang_registry.o: src/api/lang_registry.c Makefile
@mkdir -p $(dir $@)
$(CC) $(LIB_CFLAGS) -Ilang $(DEPFLAGS) -c $< -o $@
+build/lang/cpp/%.o: lang/cpp/%.c Makefile
+ @mkdir -p $(dir $@)
+ $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp $(DEPFLAGS) -c $< -o $@
+
+# The C frontend includes the lexer and preprocessor headers (pp/pp.h,
+# lex/lex.h) which now live under lang/cpp/, and cpp_support.h is the
+# shared substrate. So lang/c objects build with -Ilang/cpp -Ilang/c.
build/lang/c/%.o: lang/c/%.c Makefile
@mkdir -p $(dir $@)
- $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/c $(DEPFLAGS) -c $< -o $@
+ $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp -Ilang/c $(DEPFLAGS) -c $< -o $@
build/lang/wasm/%.o: lang/wasm/%.c Makefile
@mkdir -p $(dir $@)
diff --git a/driver/as.c b/driver/as.c
@@ -1,15 +1,15 @@
#include <cfree/compile.h>
#include <cfree/core.h>
+#include <cfree/preprocess.h>
#include <stdint.h>
#include <string.h>
#include "cflags.h"
#include "driver.h"
-#include "lang/c/c.h"
/* `cfree as` — standalone assembler. Reads a single text source, writes a
* relocatable object via a CfreeCompileSession. `.S` inputs are
- * preprocessed first via cfree_c_preprocess; `.s` inputs are not. The
+ * preprocessed first via cfree_cpp_preprocess; `.s` inputs are not. The
* accepted input is a GAS subset (AT&T syntax on x86). */
#define AS_TOOL "as"
@@ -196,7 +196,7 @@ int driver_as(int argc, char** argv) {
driver_errf(AS_TOOL, "out of memory");
goto out;
}
- if (cfree_c_preprocess(compiler, &pp, &input, pp_writer) != CFREE_OK)
+ if (cfree_cpp_preprocess(compiler, &pp, &input, pp_writer) != CFREE_OK)
goto out;
if (cfree_writer_status(pp_writer) != CFREE_OK) {
driver_errf(AS_TOOL, "failed to preprocess: %s", o.source);
diff --git a/driver/cc.c b/driver/cc.c
@@ -3,6 +3,7 @@
#include <cfree/compile.h>
#include <cfree/core.h>
#include <cfree/link.h>
+#include <cfree/preprocess.h>
#include <stdint.h>
#include <string.h>
@@ -1620,8 +1621,9 @@ static int cc_preprocess(DriverEnv* env, const CcOptions* o,
goto out;
}
- rc =
- cfree_c_preprocess(compiler, pp_opts, &input, writer) == CFREE_OK ? 0 : 1;
+ rc = cfree_cpp_preprocess(compiler, pp_opts, &input, writer) == CFREE_OK
+ ? 0
+ : 1;
out:
if (compiler) driver_compiler_free(compiler);
@@ -2003,7 +2005,7 @@ static int cc_run_deps_only(DriverEnv* env, const CcOptions* o,
goto out;
}
- if (cfree_c_preprocess(compiler, pp, &input, discard) != CFREE_OK) goto out;
+ if (cfree_cpp_preprocess(compiler, pp, &input, discard) != CFREE_OK) goto out;
rc = cc_dep_finish(env, &ctx, compiler, o);
diff --git a/driver/cpp.c b/driver/cpp.c
@@ -2,10 +2,10 @@
#include <cfree/compile.h>
#include <cfree/core.h>
+#include <cfree/preprocess.h>
#include "cflags.h"
#include "driver.h"
-#include "lang/c/c.h"
/* `cfree cpp` — standalone C preprocessor. Reads one C source (path or
* `-` for stdin), writes the preprocessed token stream to `-o PATH` or
@@ -210,7 +210,7 @@ int driver_cpp(int argc, char** argv) {
}
rc =
- cfree_c_preprocess(compiler, &pp, &input, writer) == CFREE_OK ? 0 : 1;
+ cfree_cpp_preprocess(compiler, &pp, &input, writer) == CFREE_OK ? 0 : 1;
out:
if (compiler) driver_compiler_free(compiler);
diff --git a/driver/main.c b/driver/main.c
@@ -1,3 +1,5 @@
+#include <cfree/config.h>
+
#include "driver.h"
/* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "as", "ld",
@@ -21,7 +23,9 @@
static int dispatch(const char* name, int argc, char** argv) {
if (driver_streq(name, "cc")) return driver_cc(argc, argv);
+#if CFREE_LANG_CPP_ENABLED
if (driver_streq(name, "cpp")) return driver_cpp(argc, argv);
+#endif
if (driver_streq(name, "as")) return driver_as(argc, argv);
if (driver_streq(name, "ld")) return driver_ld(argc, argv);
if (driver_streq(name, "ar")) return driver_ar(argc, argv);
@@ -42,10 +46,12 @@ static int print_tool_help(const char* name) {
driver_help_cc();
return 0;
}
+#if CFREE_LANG_CPP_ENABLED
if (driver_streq(name, "cpp")) {
driver_help_cpp();
return 0;
}
+#endif
if (driver_streq(name, "as")) {
driver_help_as();
return 0;
diff --git a/driver/runtime.c b/driver/runtime.c
@@ -3,11 +3,10 @@
#include <cfree/archive.h>
#include <cfree/compile.h>
#include <cfree/core.h>
+#include <cfree/preprocess.h>
#include <stdint.h>
#include <string.h>
-#include "lang/c/c.h"
-
#define RT_TOOL "cc"
typedef struct RuntimeVariant {
@@ -538,7 +537,7 @@ static int rt_compile_source(DriverEnv* env,
rt_free_pp(env, &pp, owned_dirs, owned_sizes, owned_count);
goto out;
}
- st = cfree_c_preprocess(compiler, &pp, &input, pp_writer);
+ st = cfree_cpp_preprocess(compiler, &pp, &input, pp_writer);
rt_free_pp(env, &pp, owned_dirs, owned_sizes, owned_count);
if (st != CFREE_OK || cfree_writer_status(pp_writer) != CFREE_OK)
goto out;
diff --git a/include/cfree/compile.h b/include/cfree/compile.h
@@ -3,6 +3,7 @@
#include <cfree/core.h>
#include <cfree/object.h>
+#include <cfree/preprocess.h>
/*
* Source compiler embedding API.
@@ -21,24 +22,6 @@ typedef enum CfreeLanguage {
CFREE_LANG_COUNT = 4,
} CfreeLanguage;
-typedef struct CfreeDefine {
- const char* name;
- const char* body; /* NULL means "1" */
-} CfreeDefine;
-
-typedef CfreeDefine CfreePredefinedMacro;
-
-typedef struct CfreePreprocessOptions {
- const char* const* include_dirs;
- uint32_t ninclude_dirs;
- const char* const* system_include_dirs;
- uint32_t nsystem_include_dirs;
- const CfreeDefine* defines;
- uint32_t ndefines;
- const char* const* undefines;
- uint32_t nundefines;
-} CfreePreprocessOptions;
-
typedef struct CfreeDiagnosticOptions {
bool warnings_are_errors;
uint32_t max_errors; /* 0 means unlimited */
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -33,7 +33,13 @@
#define CFREE_OBJ_COFF_ENABLED 1
/* Language frontends. The assembler frontend is unconditional: it lives
- * inside libcfree as part of the codegen substrate. */
+ * inside libcfree as part of the codegen substrate.
+ *
+ * CFREE_LANG_CPP_ENABLED gates the C preprocessor (lang/cpp/). The C
+ * frontend depends on it; enabling CFREE_LANG_C_ENABLED without
+ * CFREE_LANG_CPP_ENABLED is a build-time error. The preprocessor can
+ * be enabled standalone (e.g. for `cfree cpp` only). */
+#define CFREE_LANG_CPP_ENABLED 1
#define CFREE_LANG_C_ENABLED 1
#define CFREE_LANG_TOY_ENABLED 1
#define CFREE_LANG_WASM_ENABLED 1
diff --git a/include/cfree/preprocess.h b/include/cfree/preprocess.h
@@ -0,0 +1,37 @@
+#ifndef CFREE_PREPROCESS_H
+#define CFREE_PREPROCESS_H
+
+#include <cfree/core.h>
+
+/*
+ * Public surface for the cfree C preprocessor (lang/cpp).
+ *
+ * The preprocessor consumes a single translation unit's source bytes
+ * and produces preprocessed C text. It is independent of the C
+ * frontend: when CFREE_LANG_CPP_ENABLED is set but CFREE_LANG_C is
+ * not, cfree_cpp_preprocess() is still available (used by `cfree cpp`
+ * and `cfree cc -E`).
+ */
+
+typedef struct CfreeDefine {
+ const char* name;
+ const char* body; /* NULL means "1" */
+} CfreeDefine;
+
+typedef CfreeDefine CfreePredefinedMacro;
+
+typedef struct CfreePreprocessOptions {
+ const char* const* include_dirs;
+ uint32_t ninclude_dirs;
+ const char* const* system_include_dirs;
+ uint32_t nsystem_include_dirs;
+ const CfreeDefine* defines;
+ uint32_t ndefines;
+ const char* const* undefines;
+ uint32_t nundefines;
+} CfreePreprocessOptions;
+
+CfreeStatus cfree_cpp_preprocess(CfreeCompiler*, const CfreePreprocessOptions*,
+ const CfreeBytes*, CfreeWriter*);
+
+#endif
diff --git a/lang/c/c.c b/lang/c/c.c
@@ -19,10 +19,6 @@ static _Noreturn void c_bad_options(Compiler* c, const char* msg) {
compiler_panic(c, c_no_loc(), "bad C frontend options: %s", msg);
}
-typedef struct CFrontend {
- CfreeCompiler* c;
-} CFrontend;
-
static void c_apply_pp_options(Pp* pp, const CfreePreprocessOptions* opts) {
u32 i;
@@ -41,49 +37,9 @@ static void c_apply_pp_options(Pp* pp, const CfreePreprocessOptions* opts) {
}
}
-typedef struct CPreprocessRun {
- const CfreePreprocessOptions* opts;
- const CfreeBytes* input;
- CfreeWriter* out;
-} CPreprocessRun;
-
-static CfreeStatus c_preprocess_body(CfreeCompiler* c, void* user) {
- CPreprocessRun* r = (CPreprocessRun*)user;
- Lexer* lex;
- Pp* pp;
-
- const CfreePreprocessOptions* opts = r->opts;
- const CfreeBytes* input = r->input;
- CfreeWriter* out = r->out;
-
- if (!opts || !input || !out) {
- c_bad_options(c, "preprocess args missing");
- }
- if (!input->name) c_bad_options(c, "input name is NULL");
- if (!input->data && input->len != 0) {
- c_bad_options(c, "input data is NULL but len > 0");
- }
-
- lex = lex_open_mem(c, input->name, (const char*)input->data, input->len);
- pp = pp_new(c);
- if (!lex || !pp)
- compiler_panic(c, c_no_loc(), "C preprocessor out of memory");
- c_apply_pp_options(pp, opts);
- pp_push_input(pp, lex);
- pp_emit_text(pp, out);
- pp_free(pp);
- return CFREE_OK;
-}
-
-CfreeStatus cfree_c_preprocess(CfreeCompiler* c,
- const CfreePreprocessOptions* opts,
- const CfreeBytes* input, CfreeWriter* out) {
- CPreprocessRun r;
- r.opts = opts;
- r.input = input;
- r.out = out;
- return cfree_frontend_run(c, c_preprocess_body, &r);
-}
+typedef struct CFrontend {
+ CfreeCompiler* c;
+} CFrontend;
static CfreeFrontendState* c_frontend_new(CfreeCompiler* c) {
CfreeHeap* h;
diff --git a/lang/c/c.h b/lang/c/c.h
@@ -13,18 +13,14 @@
* frontend casts language_options back to CfreeCCompileOptions* to recover
* the preprocessor and diagnostic configuration.
*
- * cfree_c_preprocess is a standalone helper driven by the driver's -E
- * mode and by -S/.s preprocessing; it runs under the same frontend panic
- * boundary as cfree_c_compile via cfree_frontend_run. */
+ * Standalone preprocessing has moved to <cfree/preprocess.h>'s
+ * cfree_cpp_preprocess(). */
#include <cfree/compile.h>
#include <cfree/core.h>
#include <cfree/frontend.h>
#include <cfree/object.h>
-CfreeStatus cfree_c_preprocess(CfreeCompiler*, const CfreePreprocessOptions*,
- const CfreeBytes*, CfreeWriter*);
-
extern const CfreeFrontendVTable cfree_c_frontend_vtable;
#endif
diff --git a/lang/c/c_support.h b/lang/c/c_support.h
@@ -1,97 +1,17 @@
#ifndef CFREE_LANG_C_SUPPORT_H
#define CFREE_LANG_C_SUPPORT_H
-#include <cfree/frontend.h>
-#include <cfree/support/hashmap.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
+/* C-frontend support layer. Re-exports the lexer/preprocessor shared
+ * substrate (cpp_support.h) and adds C-specific helpers used by the
+ * parser, declarations, and types. */
-typedef int8_t i8;
-typedef int16_t i16;
-typedef int32_t i32;
-typedef int64_t i64;
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
+#include "cpp_support.h"
-typedef CfreeCompiler Compiler;
-typedef CfreeHeap Heap;
-typedef CfreeWriter Writer;
-typedef CfreeSym Sym;
-typedef CfreeSrcLoc SrcLoc;
typedef CfreeCompiler TargetABI;
-typedef u32 BytesId;
typedef struct SrcRange {
SrcLoc begin;
SrcLoc end;
} SrcRange;
-typedef struct Pool {
- Compiler* c;
- CfreeArena* arena;
- void* type_cache;
-} Pool;
-
-static inline Pool* c_pool_new(Compiler* c) {
- Heap* h = cfree_compiler_context(c)->heap;
- Pool* p = h ? (Pool*)h->alloc(h, sizeof(*p), _Alignof(Pool)) : NULL;
- if (!p) return NULL;
- p->c = c;
- p->arena = NULL;
- p->type_cache = NULL;
- if (cfree_arena_new(h, 0, &p->arena) != CFREE_OK || !p->arena) {
- h->free(h, p, sizeof(*p));
- return NULL;
- }
- return p;
-}
-
-static inline void c_pool_free(Pool* p) {
- Heap* h;
- if (!p) return;
- h = cfree_compiler_context(p->c)->heap;
- cfree_arena_free(p->arena);
- if (h) h->free(h, p, sizeof(*p));
-}
-
-static inline Sym pool_intern(Pool* p, const char* s, size_t len) {
- return cfree_sym_intern_len(p->c, s, len);
-}
-
-static inline Sym pool_intern_cstr(Pool* p, const char* s) {
- return cfree_sym_intern(p->c, s);
-}
-
-static inline const char* pool_str(Pool* p, Sym sym, size_t* len_out) {
- return cfree_sym_str(p->c, sym, len_out);
-}
-
-static inline const char* compiler_sym_str(Compiler* c, Sym sym,
- size_t* len_out) {
- return cfree_sym_str(c, sym, len_out);
-}
-
-#define arena_alloc(a, size, align) cfree_arena_alloc((a), (size), (align))
-#define arena_zalloc(a, size, align) cfree_arena_zalloc((a), (size), (align))
-#define arena_strdup(a, s, len) cfree_arena_strdup((a), (s), (len))
-#define arena_new(a, T) cfree_arena_new_obj((a), T)
-#define arena_znew(a, T) cfree_arena_znew_obj((a), T)
-#define arena_array(a, T, n) cfree_arena_array((a), T, n)
-#define arena_zarray(a, T, n) cfree_arena_zarray((a), T, n)
-
-_Noreturn static inline void compiler_panic(Compiler* c, SrcLoc loc,
- const char* fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- cfree_frontend_vfatal(c, loc, fmt, ap);
-}
-
-_Noreturn static inline void compiler_panicv(Compiler* c, SrcLoc loc,
- const char* fmt, va_list ap) {
- cfree_frontend_vfatal(c, loc, fmt, ap);
-}
-
#endif
diff --git a/lang/c/lex/lex.h b/lang/c/lex/lex.h
@@ -1,127 +0,0 @@
-#ifndef CFREE_LEX_H
-#define CFREE_LEX_H
-
-#include "c_support.h"
-
-typedef enum TokKind {
- TOK_EOF = 0,
- TOK_IDENT, /* v.ident */
- TOK_NUM, /* lit */
- TOK_FLT, /* lit */
- TOK_STR, /* lit; v.str is decoded bytes if target-independent */
- TOK_CHR, /* lit */
- TOK_PUNCT, /* v.punct */
- TOK_PP_HASH, /* # */
- TOK_PP_PASTE, /* ## */
- TOK_HEADER, /* header-name in #include / #embed */
- TOK_NEWLINE, /* visible to PP only */
- TOK_KW_FIRST,
- /* C11 keywords are inserted into this range by parse_c via pool */
- TOK_KW_LAST = 0x1000,
-} TokKind;
-
-typedef enum TokFlag {
- TF_AT_BOL = 1u << 0,
- TF_HAS_SPACE = 1u << 1,
- TF_NO_EXPAND = 1u << 2,
- TF_INT_U = 1u << 3,
- TF_INT_L = 1u << 4,
- TF_INT_LL = 1u << 5,
- TF_FLT_F = 1u << 6,
- TF_FLT_L = 1u << 7,
- TF_STR_WIDE = 1u << 8,
- TF_STR_U8 = 1u << 9,
- TF_STR_U16 = 1u << 10,
- TF_STR_U32 = 1u << 11,
- TF_LITERAL_BAD = 1u << 12,
-} TokFlag;
-
-typedef enum Punct {
- P_NONE = 0,
- /* Single-char punctuators reuse their ASCII codepoint here. */
- P_ARROW = 256,
- P_INC,
- P_DEC,
- P_SHL,
- P_SHR,
- P_LE,
- P_GE,
- P_EQ,
- P_NE,
- P_AND,
- P_OR,
- P_ADD_ASSIGN,
- P_SUB_ASSIGN,
- P_MUL_ASSIGN,
- P_DIV_ASSIGN,
- P_MOD_ASSIGN,
- P_AND_ASSIGN,
- P_OR_ASSIGN,
- P_XOR_ASSIGN,
- P_SHL_ASSIGN,
- P_SHR_ASSIGN,
- P_ELLIPSIS,
- P_HASH_HASH,
-} Punct;
-
-typedef u32 LitId;
-#define LIT_NONE 0u
-
-typedef enum LitKind {
- LIT_INT,
- LIT_FLOAT,
- LIT_STRING,
- LIT_CHAR,
-} LitKind;
-
-typedef enum LitEnc {
- LENC_ORDINARY,
- LENC_UTF8,
- LENC_WIDE,
- LENC_UTF16,
- LENC_UTF32,
-} LitEnc;
-
-typedef struct LitInfo {
- u8 kind; /* LitKind */
- u8 enc; /* LitEnc for strings/chars */
- u16 flags; /* TokFlag suffix/encoding bits */
- Sym spelling; /* exact source spelling */
- BytesId bytes; /* decoded bytes/code units, if already decoded */
-} LitInfo;
-
-typedef struct Tok {
- u16 kind;
- u16 flags;
- SrcLoc loc;
- Sym spelling; /* exact token spelling for diagnostics/#/## */
- LitId lit; /* literal-table handle; LIT_NONE otherwise */
- union {
- Sym ident;
- Sym str;
- u32 punct;
- } v;
-} Tok;
-
-typedef struct Lexer Lexer;
-
-/* lex_open_mem borrows (src, len). The lexer does not copy source bytes;
- * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics
- * and the preprocessor's directive scanner read from the borrowed buffer.
- *
- * Ownership: a Lexer that has been handed to pp_push_input is owned by PP
- * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call
- * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly).
- *
- * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed
- * Lexer means outliving pp_free. */
-Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len);
-void lex_close(Lexer*);
-
-/* Streaming. Returns TOK_EOF repeatedly at end of input. */
-Tok lex_next(Lexer*);
-SrcLoc lex_loc(const Lexer*);
-u32 lex_file_id(const Lexer*);
-const LitInfo* lex_lit(const Lexer*, LitId);
-
-#endif
diff --git a/lang/c/pp/pp_priv.h b/lang/c/pp/pp_priv.h
@@ -1,328 +0,0 @@
-/* pp_priv.h — shared types, helpers, and cross-module forward declarations
- * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c).
- * NOT part of the public API; included only within lang/c/pp/. */
-
-#ifndef CFREE_PP_PRIV_H
-#define CFREE_PP_PRIV_H
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "c_support.h"
-#include "pp/pp.h"
-
-/* ============================================================
- * Internal token kinds
- * ============================================================ */
-
-/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */
-#define TOK_PP_PARAM ((u16)0x1100)
-#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */
-
-/* ============================================================
- * Types
- * ============================================================ */
-
-typedef struct Macro {
- Sym name;
- SrcLoc def_loc;
- u8 is_func;
- u8 is_variadic;
- u8 pad[2];
- u32 n_params;
- Sym* params; /* parameter names */
- Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */
- u32 body_len;
-} Macro;
-
-typedef u32 HidesetId;
-#define HS_EMPTY 0u
-
-typedef struct Hideset {
- u32 n;
- Sym names[1]; /* flexible; allocated with extra trailing slots */
-} Hideset;
-
-typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind;
-
-typedef struct TokSrc {
- u8 kind;
- /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is
- * the top source and it's exhausted, instead of popping. The caller
- * (e.g. argument pre-expansion) explicitly pops the scope when done.
- * This bounds expansion to a single argument's token stream. */
- u8 scope_top;
- u8 pad[2];
- /* SRC_LEX */
- Lexer* lex;
- /* SRC_BUF */
- Tok* toks;
- HidesetId* hs;
- u32 i;
- u32 n;
- /* #line state (SRC_LEX only). line_delta is added to every emitted
- * token's loc.line on its way out so __LINE__ and the output cursor
- * see user-visible numbering. file_override is the Sym (without
- * surrounding quotes) used by __FILE__ when set. */
- i32 line_delta;
- Sym file_override;
-} TokSrc;
-
-typedef enum IfState {
- IF_INCLUDE = 1, /* group active, emit code */
- IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */
- IF_DONE = 3, /* skip, already had a true branch */
-} IfState;
-
-typedef struct IfFrame {
- u8 state;
- u8 has_else;
- u8 pad[2];
- SrcLoc loc;
-} IfFrame;
-
-/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with
- * deletion (#undef). See core/hashmap.h. */
-#include <cfree/support/hashmap.h>
-static inline u32 macro_hash_(Sym s) { return cfree_hash_u32((u32)s); }
-CFREE_HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_);
-
-/* ============================================================
- * Pp struct (definition shared across all three TUs)
- * ============================================================ */
-
-struct Pp {
- Compiler* c;
- Pool* pool;
-
- /* Source stack — top of stack is sources[nsources-1]. */
- TokSrc* sources;
- u32 nsources;
- u32 sources_cap;
-
- /* Macro table (open-addressed; key = Sym, value = Macro*). */
- MacroMap mtab;
-
- /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */
- IfFrame* ifstk;
- u32 ifstk_n;
- u32 ifstk_cap;
-
- /* Hideset table. Element 0 reserved as HS_EMPTY. */
- Hideset** hsets;
- u32 hsets_n;
- u32 hsets_cap;
-
- /* Include directories (stage 9). */
- struct {
- const char* path;
- u8 system;
- }* inc_dirs;
- u32 ninc_dirs;
- u32 inc_dirs_cap;
-
- /* Current #pragma pack maximum field alignment. 0 means natural. */
- u32 pack_align;
- u32 pack_stack[16];
- u32 pack_stack_n;
-
- /* Internal arena: macro bodies, hidesets, expansion buffers, file
- * data for #include. Lives until pp_free. */
- CfreeArena* arena;
-
- /* Cached interned identifiers used for directive recognition. */
- Sym sym_define;
- Sym sym_undef;
- Sym sym_include;
- Sym sym_if;
- Sym sym_ifdef;
- Sym sym_ifndef;
- Sym sym_elif;
- Sym sym_else;
- Sym sym_endif;
- Sym sym_line;
- Sym sym_pragma;
- Sym sym_error;
- Sym sym_warning;
- Sym sym_embed;
- Sym sym_defined;
- Sym sym_va_args;
- Sym sym_line__; /* __LINE__ */
- Sym sym_file__; /* __FILE__ */
- Sym sym_date__; /* __DATE__ */
- Sym sym_time__; /* __TIME__ */
- Sym sym_stdc__; /* __STDC__ */
- Sym sym_stdc_hosted__;
- Sym sym_stdc_version__;
- Sym sym__pragma; /* _Pragma operator */
- Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */
-
- /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for
- * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or
- * time(NULL) if unset). */
- Sym val_date_str;
- Sym val_time_str;
-
- /* Defined-operator handling during #if expansion.
- *
- * The first prepass in eval_if_expr replaces `defined X` / `defined
- * (X)` literally found in the directive line, but `defined()` can
- * also come from macro bodies (mingw's intrin-impl.h uses
- * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When
- * the expander processes such a body, the identifier inside
- * `defined(...)` must NOT be macro-expanded — otherwise an empty
- * macro X would turn `defined(X)` into `defined()` and the
- * post-expansion prepass would reject it.
- *
- * This pair of fields tracks the state across `pp_next_raw` calls
- * within `expand_for_if`:
- * in_if_expansion: 1 inside an #if's expand_arg_to_eof call
- * defined_skip: 0 normally; 1 after emitting `defined`
- * (consume one IDENT before clearing); 2 after
- * emitting `defined (` (waiting for IDENT then
- * `)`).
- * The expander uses these to mark the operand IDENT TF_NO_EXPAND
- * before the macro-expansion check at the head of pp_next_raw. */
- u8 in_if_expansion;
- u8 defined_skip;
-};
-
-/* ============================================================
- * Allocation helpers (defined in pp.c, used everywhere)
- * ============================================================ */
-
-static inline Heap* pp_heap(Pp* pp) { return cfree_compiler_context(pp->c)->heap; }
-
-static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n,
- size_t align) {
- Heap* h = pp_heap(pp);
- void* q = h->realloc(h, p, old_n, new_n, align);
- if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory");
- return q;
-}
-
-static inline void pp_xfree(Pp* pp, void* p, size_t n) {
- if (p) pp_heap(pp)->free(pp_heap(pp), p, n);
-}
-
-/* ============================================================
- * Token-vector helpers
- * ============================================================ */
-
-typedef struct TokVec {
- Tok* data;
- u32 n;
- u32 cap;
-} TokVec;
-
-typedef struct HsVec {
- HidesetId* data;
- u32 n;
- u32 cap;
-} HsVec;
-
-static inline void tv_grow(Pp* pp, TokVec* v, u32 want) {
- u32 nc;
- if (v->cap >= want) return;
- nc = v->cap ? v->cap * 2 : 8;
- while (nc < want) nc *= 2;
- {
- Tok* nb = arena_array(pp->arena, Tok, nc);
- if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n);
- v->data = nb;
- v->cap = nc;
- }
-}
-
-static inline void tv_push(Pp* pp, TokVec* v, Tok t) {
- tv_grow(pp, v, v->n + 1);
- v->data[v->n++] = t;
-}
-
-static inline void hsv_grow(Pp* pp, HsVec* v, u32 want) {
- u32 nc;
- if (v->cap >= want) return;
- nc = v->cap ? v->cap * 2 : 8;
- while (nc < want) nc *= 2;
- {
- HidesetId* nb = arena_array(pp->arena, HidesetId, nc);
- if (v->n) memcpy(nb, v->data, sizeof(HidesetId) * v->n);
- v->data = nb;
- v->cap = nc;
- }
-}
-
-static inline void hsv_push(Pp* pp, HsVec* v, HidesetId hs) {
- hsv_grow(pp, v, v->n + 1);
- v->data[v->n++] = hs;
-}
-
-/* Growable char buffer (arena-backed). */
-typedef struct CharBuf {
- char* data;
- u32 len;
- u32 cap;
-} CharBuf;
-
-static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) {
- if (b->len + n > b->cap) {
- u32 nc = b->cap ? b->cap * 2 : 64;
- while (nc < b->len + n) nc *= 2;
- {
- char* nb = (char*)arena_alloc(pp->arena, nc, 1);
- if (b->len) memcpy(nb, b->data, b->len);
- b->data = nb;
- b->cap = nc;
- }
- }
- if (n) memcpy(b->data + b->len, s, n);
- b->len += n;
-}
-
-static inline void cb_putc(Pp* pp, CharBuf* b, char c) {
- cb_append(pp, b, &c, 1);
-}
-
-/* ============================================================
- * Cross-module forward declarations
- * ============================================================ */
-
-/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */
-Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out);
-void src_push(Pp* pp, TokSrc s);
-void src_pop(Pp* pp);
-void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n);
-
-/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it,
- * and pp_next_raw drives directives and expansion. Declared non-static so
- * pp_expand.c can call it without a forward decl each time. */
-Tok pp_next_raw(Pp* pp);
-
-/* --- pp_expand.c → pp.c, pp_directive.c --- */
-HidesetId hs_add(Pp* pp, HidesetId id, Sym s);
-int hs_contains(Pp* pp, HidesetId id, Sym s);
-Macro* mt_get(Pp* pp, Sym name);
-void mt_put(Pp* pp, Sym name, Macro* m);
-void mt_del(Pp* pp, Sym name);
-void expand_arg_to_eof(Pp* pp, Tok* in, HidesetId* hs, u32 nin, TokVec* out);
-
-/* --- pp_directive.c → pp_expand.c --- */
-i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
-void process_directive(Pp* pp, SrcLoc hash_loc);
-
-/* --- pp_directive.c internal helpers called from pp_expand.c --- */
-void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
-int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out);
-int try_expand_pragma_op(Pp* pp, const Tok* invoke);
-
-/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */
-void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n);
-
-/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */
-void do_define(Pp* pp, const Tok* line, u32 n);
-void do_undef(Pp* pp, const Tok* line, u32 n);
-
-/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */
-TokSrc* current_lex_src(Pp* pp);
-
-#endif /* CFREE_PP_PRIV_H */
diff --git a/lang/cpp/cpp.c b/lang/cpp/cpp.c
@@ -0,0 +1,86 @@
+/* cpp.c — public entry point for the cfree C preprocessor.
+ *
+ * cfree_cpp_preprocess() runs the preprocessor under the standard
+ * frontend panic boundary and writes the resulting token stream as
+ * text to the caller's writer. This is the implementation behind
+ * `cfree cpp` and `cfree cc -E`; the full C frontend reuses it too. */
+
+#include <cfree/preprocess.h>
+
+#include "cpp_support.h"
+#include "lex/lex.h"
+#include "pp/pp.h"
+
+static SrcLoc cpp_no_loc(void) {
+ SrcLoc loc;
+ loc.file_id = 0;
+ loc.line = 0;
+ loc.col = 0;
+ return loc;
+}
+
+static _Noreturn void cpp_bad_options(Compiler* c, const char* msg) {
+ compiler_panic(c, cpp_no_loc(), "bad preprocess options: %s", msg);
+}
+
+static void cpp_apply_options(Pp* pp, const CfreePreprocessOptions* opts) {
+ u32 i;
+
+ for (i = 0; i < opts->ninclude_dirs; ++i) {
+ pp_add_include_dir(pp, opts->include_dirs[i], 0);
+ }
+ for (i = 0; i < opts->nsystem_include_dirs; ++i) {
+ pp_add_include_dir(pp, opts->system_include_dirs[i], 1);
+ }
+ for (i = 0; i < opts->ndefines; ++i) {
+ const char* body = opts->defines[i].body ? opts->defines[i].body : "1";
+ pp_define(pp, opts->defines[i].name, body);
+ }
+ for (i = 0; i < opts->nundefines; ++i) {
+ pp_undef(pp, opts->undefines[i]);
+ }
+}
+
+typedef struct CppRun {
+ const CfreePreprocessOptions* opts;
+ const CfreeBytes* input;
+ CfreeWriter* out;
+} CppRun;
+
+static CfreeStatus cpp_preprocess_body(CfreeCompiler* c, void* user) {
+ CppRun* r = (CppRun*)user;
+ Lexer* lex;
+ Pp* pp;
+
+ const CfreePreprocessOptions* opts = r->opts;
+ const CfreeBytes* input = r->input;
+ CfreeWriter* out = r->out;
+
+ if (!opts || !input || !out) {
+ cpp_bad_options(c, "preprocess args missing");
+ }
+ if (!input->name) cpp_bad_options(c, "input name is NULL");
+ if (!input->data && input->len != 0) {
+ cpp_bad_options(c, "input data is NULL but len > 0");
+ }
+
+ lex = lex_open_mem(c, input->name, (const char*)input->data, input->len);
+ pp = pp_new(c);
+ if (!lex || !pp)
+ compiler_panic(c, cpp_no_loc(), "C preprocessor out of memory");
+ cpp_apply_options(pp, opts);
+ pp_push_input(pp, lex);
+ pp_emit_text(pp, out);
+ pp_free(pp);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_cpp_preprocess(CfreeCompiler* c,
+ const CfreePreprocessOptions* opts,
+ const CfreeBytes* input, CfreeWriter* out) {
+ CppRun r;
+ r.opts = opts;
+ r.input = input;
+ r.out = out;
+ return cfree_frontend_run(c, cpp_preprocess_body, &r);
+}
diff --git a/lang/cpp/cpp_support.h b/lang/cpp/cpp_support.h
@@ -0,0 +1,97 @@
+#ifndef CFREE_LANG_CPP_SUPPORT_H
+#define CFREE_LANG_CPP_SUPPORT_H
+
+/* Shared substrate for the lexer and preprocessor: width-typed integer
+ * aliases, the Compiler/Heap/Writer/Sym/SrcLoc typedefs, the Pool
+ * abstraction over a per-frontend arena, arena allocation macros, and
+ * the panic helpers. Used by lang/cpp/ directly and re-exported by
+ * lang/c/c_support.h for the C frontend. */
+
+#include <cfree/frontend.h>
+#include <cfree/support/hashmap.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef CfreeCompiler Compiler;
+typedef CfreeHeap Heap;
+typedef CfreeWriter Writer;
+typedef CfreeSym Sym;
+typedef CfreeSrcLoc SrcLoc;
+typedef u32 BytesId;
+
+typedef struct Pool {
+ Compiler* c;
+ CfreeArena* arena;
+ void* type_cache; /* opaque slot owned by the C frontend; unused by cpp */
+} Pool;
+
+static inline Pool* c_pool_new(Compiler* c) {
+ Heap* h = cfree_compiler_context(c)->heap;
+ Pool* p = h ? (Pool*)h->alloc(h, sizeof(*p), _Alignof(Pool)) : NULL;
+ if (!p) return NULL;
+ p->c = c;
+ p->arena = NULL;
+ p->type_cache = NULL;
+ if (cfree_arena_new(h, 0, &p->arena) != CFREE_OK || !p->arena) {
+ h->free(h, p, sizeof(*p));
+ return NULL;
+ }
+ return p;
+}
+
+static inline void c_pool_free(Pool* p) {
+ Heap* h;
+ if (!p) return;
+ h = cfree_compiler_context(p->c)->heap;
+ cfree_arena_free(p->arena);
+ if (h) h->free(h, p, sizeof(*p));
+}
+
+static inline Sym pool_intern(Pool* p, const char* s, size_t len) {
+ return cfree_sym_intern_len(p->c, s, len);
+}
+
+static inline Sym pool_intern_cstr(Pool* p, const char* s) {
+ return cfree_sym_intern(p->c, s);
+}
+
+static inline const char* pool_str(Pool* p, Sym sym, size_t* len_out) {
+ return cfree_sym_str(p->c, sym, len_out);
+}
+
+static inline const char* compiler_sym_str(Compiler* c, Sym sym,
+ size_t* len_out) {
+ return cfree_sym_str(c, sym, len_out);
+}
+
+#define arena_alloc(a, size, align) cfree_arena_alloc((a), (size), (align))
+#define arena_zalloc(a, size, align) cfree_arena_zalloc((a), (size), (align))
+#define arena_strdup(a, s, len) cfree_arena_strdup((a), (s), (len))
+#define arena_new(a, T) cfree_arena_new_obj((a), T)
+#define arena_znew(a, T) cfree_arena_znew_obj((a), T)
+#define arena_array(a, T, n) cfree_arena_array((a), T, n)
+#define arena_zarray(a, T, n) cfree_arena_zarray((a), T, n)
+
+_Noreturn static inline void compiler_panic(Compiler* c, SrcLoc loc,
+ const char* fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ cfree_frontend_vfatal(c, loc, fmt, ap);
+}
+
+_Noreturn static inline void compiler_panicv(Compiler* c, SrcLoc loc,
+ const char* fmt, va_list ap) {
+ cfree_frontend_vfatal(c, loc, fmt, ap);
+}
+
+#endif
diff --git a/lang/c/lex/lex.c b/lang/cpp/lex/lex.c
diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h
@@ -0,0 +1,127 @@
+#ifndef CFREE_LEX_H
+#define CFREE_LEX_H
+
+#include "cpp_support.h"
+
+typedef enum TokKind {
+ TOK_EOF = 0,
+ TOK_IDENT, /* v.ident */
+ TOK_NUM, /* lit */
+ TOK_FLT, /* lit */
+ TOK_STR, /* lit; v.str is decoded bytes if target-independent */
+ TOK_CHR, /* lit */
+ TOK_PUNCT, /* v.punct */
+ TOK_PP_HASH, /* # */
+ TOK_PP_PASTE, /* ## */
+ TOK_HEADER, /* header-name in #include / #embed */
+ TOK_NEWLINE, /* visible to PP only */
+ TOK_KW_FIRST,
+ /* C11 keywords are inserted into this range by parse_c via pool */
+ TOK_KW_LAST = 0x1000,
+} TokKind;
+
+typedef enum TokFlag {
+ TF_AT_BOL = 1u << 0,
+ TF_HAS_SPACE = 1u << 1,
+ TF_NO_EXPAND = 1u << 2,
+ TF_INT_U = 1u << 3,
+ TF_INT_L = 1u << 4,
+ TF_INT_LL = 1u << 5,
+ TF_FLT_F = 1u << 6,
+ TF_FLT_L = 1u << 7,
+ TF_STR_WIDE = 1u << 8,
+ TF_STR_U8 = 1u << 9,
+ TF_STR_U16 = 1u << 10,
+ TF_STR_U32 = 1u << 11,
+ TF_LITERAL_BAD = 1u << 12,
+} TokFlag;
+
+typedef enum Punct {
+ P_NONE = 0,
+ /* Single-char punctuators reuse their ASCII codepoint here. */
+ P_ARROW = 256,
+ P_INC,
+ P_DEC,
+ P_SHL,
+ P_SHR,
+ P_LE,
+ P_GE,
+ P_EQ,
+ P_NE,
+ P_AND,
+ P_OR,
+ P_ADD_ASSIGN,
+ P_SUB_ASSIGN,
+ P_MUL_ASSIGN,
+ P_DIV_ASSIGN,
+ P_MOD_ASSIGN,
+ P_AND_ASSIGN,
+ P_OR_ASSIGN,
+ P_XOR_ASSIGN,
+ P_SHL_ASSIGN,
+ P_SHR_ASSIGN,
+ P_ELLIPSIS,
+ P_HASH_HASH,
+} Punct;
+
+typedef u32 LitId;
+#define LIT_NONE 0u
+
+typedef enum LitKind {
+ LIT_INT,
+ LIT_FLOAT,
+ LIT_STRING,
+ LIT_CHAR,
+} LitKind;
+
+typedef enum LitEnc {
+ LENC_ORDINARY,
+ LENC_UTF8,
+ LENC_WIDE,
+ LENC_UTF16,
+ LENC_UTF32,
+} LitEnc;
+
+typedef struct LitInfo {
+ u8 kind; /* LitKind */
+ u8 enc; /* LitEnc for strings/chars */
+ u16 flags; /* TokFlag suffix/encoding bits */
+ Sym spelling; /* exact source spelling */
+ BytesId bytes; /* decoded bytes/code units, if already decoded */
+} LitInfo;
+
+typedef struct Tok {
+ u16 kind;
+ u16 flags;
+ SrcLoc loc;
+ Sym spelling; /* exact token spelling for diagnostics/#/## */
+ LitId lit; /* literal-table handle; LIT_NONE otherwise */
+ union {
+ Sym ident;
+ Sym str;
+ u32 punct;
+ } v;
+} Tok;
+
+typedef struct Lexer Lexer;
+
+/* lex_open_mem borrows (src, len). The lexer does not copy source bytes;
+ * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics
+ * and the preprocessor's directive scanner read from the borrowed buffer.
+ *
+ * Ownership: a Lexer that has been handed to pp_push_input is owned by PP
+ * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call
+ * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly).
+ *
+ * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed
+ * Lexer means outliving pp_free. */
+Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len);
+void lex_close(Lexer*);
+
+/* Streaming. Returns TOK_EOF repeatedly at end of input. */
+Tok lex_next(Lexer*);
+SrcLoc lex_loc(const Lexer*);
+u32 lex_file_id(const Lexer*);
+const LitInfo* lex_lit(const Lexer*, LitId);
+
+#endif
diff --git a/lang/c/pp/pp.c b/lang/cpp/pp/pp.c
diff --git a/lang/c/pp/pp.h b/lang/cpp/pp/pp.h
diff --git a/lang/c/pp/pp_directive.c b/lang/cpp/pp/pp_directive.c
diff --git a/lang/c/pp/pp_expand.c b/lang/cpp/pp/pp_expand.c
diff --git a/lang/cpp/pp/pp_priv.h b/lang/cpp/pp/pp_priv.h
@@ -0,0 +1,328 @@
+/* pp_priv.h — shared types, helpers, and cross-module forward declarations
+ * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c).
+ * NOT part of the public API; included only within lang/cpp/pp/. */
+
+#ifndef CFREE_PP_PRIV_H
+#define CFREE_PP_PRIV_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cpp_support.h"
+#include "pp/pp.h"
+
+/* ============================================================
+ * Internal token kinds
+ * ============================================================ */
+
+/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */
+#define TOK_PP_PARAM ((u16)0x1100)
+#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */
+
+/* ============================================================
+ * Types
+ * ============================================================ */
+
+typedef struct Macro {
+ Sym name;
+ SrcLoc def_loc;
+ u8 is_func;
+ u8 is_variadic;
+ u8 pad[2];
+ u32 n_params;
+ Sym* params; /* parameter names */
+ Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */
+ u32 body_len;
+} Macro;
+
+typedef u32 HidesetId;
+#define HS_EMPTY 0u
+
+typedef struct Hideset {
+ u32 n;
+ Sym names[1]; /* flexible; allocated with extra trailing slots */
+} Hideset;
+
+typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind;
+
+typedef struct TokSrc {
+ u8 kind;
+ /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is
+ * the top source and it's exhausted, instead of popping. The caller
+ * (e.g. argument pre-expansion) explicitly pops the scope when done.
+ * This bounds expansion to a single argument's token stream. */
+ u8 scope_top;
+ u8 pad[2];
+ /* SRC_LEX */
+ Lexer* lex;
+ /* SRC_BUF */
+ Tok* toks;
+ HidesetId* hs;
+ u32 i;
+ u32 n;
+ /* #line state (SRC_LEX only). line_delta is added to every emitted
+ * token's loc.line on its way out so __LINE__ and the output cursor
+ * see user-visible numbering. file_override is the Sym (without
+ * surrounding quotes) used by __FILE__ when set. */
+ i32 line_delta;
+ Sym file_override;
+} TokSrc;
+
+typedef enum IfState {
+ IF_INCLUDE = 1, /* group active, emit code */
+ IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */
+ IF_DONE = 3, /* skip, already had a true branch */
+} IfState;
+
+typedef struct IfFrame {
+ u8 state;
+ u8 has_else;
+ u8 pad[2];
+ SrcLoc loc;
+} IfFrame;
+
+/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with
+ * deletion (#undef). See core/hashmap.h. */
+#include <cfree/support/hashmap.h>
+static inline u32 macro_hash_(Sym s) { return cfree_hash_u32((u32)s); }
+CFREE_HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_);
+
+/* ============================================================
+ * Pp struct (definition shared across all three TUs)
+ * ============================================================ */
+
+struct Pp {
+ Compiler* c;
+ Pool* pool;
+
+ /* Source stack — top of stack is sources[nsources-1]. */
+ TokSrc* sources;
+ u32 nsources;
+ u32 sources_cap;
+
+ /* Macro table (open-addressed; key = Sym, value = Macro*). */
+ MacroMap mtab;
+
+ /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */
+ IfFrame* ifstk;
+ u32 ifstk_n;
+ u32 ifstk_cap;
+
+ /* Hideset table. Element 0 reserved as HS_EMPTY. */
+ Hideset** hsets;
+ u32 hsets_n;
+ u32 hsets_cap;
+
+ /* Include directories (stage 9). */
+ struct {
+ const char* path;
+ u8 system;
+ }* inc_dirs;
+ u32 ninc_dirs;
+ u32 inc_dirs_cap;
+
+ /* Current #pragma pack maximum field alignment. 0 means natural. */
+ u32 pack_align;
+ u32 pack_stack[16];
+ u32 pack_stack_n;
+
+ /* Internal arena: macro bodies, hidesets, expansion buffers, file
+ * data for #include. Lives until pp_free. */
+ CfreeArena* arena;
+
+ /* Cached interned identifiers used for directive recognition. */
+ Sym sym_define;
+ Sym sym_undef;
+ Sym sym_include;
+ Sym sym_if;
+ Sym sym_ifdef;
+ Sym sym_ifndef;
+ Sym sym_elif;
+ Sym sym_else;
+ Sym sym_endif;
+ Sym sym_line;
+ Sym sym_pragma;
+ Sym sym_error;
+ Sym sym_warning;
+ Sym sym_embed;
+ Sym sym_defined;
+ Sym sym_va_args;
+ Sym sym_line__; /* __LINE__ */
+ Sym sym_file__; /* __FILE__ */
+ Sym sym_date__; /* __DATE__ */
+ Sym sym_time__; /* __TIME__ */
+ Sym sym_stdc__; /* __STDC__ */
+ Sym sym_stdc_hosted__;
+ Sym sym_stdc_version__;
+ Sym sym__pragma; /* _Pragma operator */
+ Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */
+
+ /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for
+ * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or
+ * time(NULL) if unset). */
+ Sym val_date_str;
+ Sym val_time_str;
+
+ /* Defined-operator handling during #if expansion.
+ *
+ * The first prepass in eval_if_expr replaces `defined X` / `defined
+ * (X)` literally found in the directive line, but `defined()` can
+ * also come from macro bodies (mingw's intrin-impl.h uses
+ * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When
+ * the expander processes such a body, the identifier inside
+ * `defined(...)` must NOT be macro-expanded — otherwise an empty
+ * macro X would turn `defined(X)` into `defined()` and the
+ * post-expansion prepass would reject it.
+ *
+ * This pair of fields tracks the state across `pp_next_raw` calls
+ * within `expand_for_if`:
+ * in_if_expansion: 1 inside an #if's expand_arg_to_eof call
+ * defined_skip: 0 normally; 1 after emitting `defined`
+ * (consume one IDENT before clearing); 2 after
+ * emitting `defined (` (waiting for IDENT then
+ * `)`).
+ * The expander uses these to mark the operand IDENT TF_NO_EXPAND
+ * before the macro-expansion check at the head of pp_next_raw. */
+ u8 in_if_expansion;
+ u8 defined_skip;
+};
+
+/* ============================================================
+ * Allocation helpers (defined in pp.c, used everywhere)
+ * ============================================================ */
+
+static inline Heap* pp_heap(Pp* pp) { return cfree_compiler_context(pp->c)->heap; }
+
+static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n,
+ size_t align) {
+ Heap* h = pp_heap(pp);
+ void* q = h->realloc(h, p, old_n, new_n, align);
+ if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory");
+ return q;
+}
+
+static inline void pp_xfree(Pp* pp, void* p, size_t n) {
+ if (p) pp_heap(pp)->free(pp_heap(pp), p, n);
+}
+
+/* ============================================================
+ * Token-vector helpers
+ * ============================================================ */
+
+typedef struct TokVec {
+ Tok* data;
+ u32 n;
+ u32 cap;
+} TokVec;
+
+typedef struct HsVec {
+ HidesetId* data;
+ u32 n;
+ u32 cap;
+} HsVec;
+
+static inline void tv_grow(Pp* pp, TokVec* v, u32 want) {
+ u32 nc;
+ if (v->cap >= want) return;
+ nc = v->cap ? v->cap * 2 : 8;
+ while (nc < want) nc *= 2;
+ {
+ Tok* nb = arena_array(pp->arena, Tok, nc);
+ if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n);
+ v->data = nb;
+ v->cap = nc;
+ }
+}
+
+static inline void tv_push(Pp* pp, TokVec* v, Tok t) {
+ tv_grow(pp, v, v->n + 1);
+ v->data[v->n++] = t;
+}
+
+static inline void hsv_grow(Pp* pp, HsVec* v, u32 want) {
+ u32 nc;
+ if (v->cap >= want) return;
+ nc = v->cap ? v->cap * 2 : 8;
+ while (nc < want) nc *= 2;
+ {
+ HidesetId* nb = arena_array(pp->arena, HidesetId, nc);
+ if (v->n) memcpy(nb, v->data, sizeof(HidesetId) * v->n);
+ v->data = nb;
+ v->cap = nc;
+ }
+}
+
+static inline void hsv_push(Pp* pp, HsVec* v, HidesetId hs) {
+ hsv_grow(pp, v, v->n + 1);
+ v->data[v->n++] = hs;
+}
+
+/* Growable char buffer (arena-backed). */
+typedef struct CharBuf {
+ char* data;
+ u32 len;
+ u32 cap;
+} CharBuf;
+
+static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) {
+ if (b->len + n > b->cap) {
+ u32 nc = b->cap ? b->cap * 2 : 64;
+ while (nc < b->len + n) nc *= 2;
+ {
+ char* nb = (char*)arena_alloc(pp->arena, nc, 1);
+ if (b->len) memcpy(nb, b->data, b->len);
+ b->data = nb;
+ b->cap = nc;
+ }
+ }
+ if (n) memcpy(b->data + b->len, s, n);
+ b->len += n;
+}
+
+static inline void cb_putc(Pp* pp, CharBuf* b, char c) {
+ cb_append(pp, b, &c, 1);
+}
+
+/* ============================================================
+ * Cross-module forward declarations
+ * ============================================================ */
+
+/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */
+Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out);
+void src_push(Pp* pp, TokSrc s);
+void src_pop(Pp* pp);
+void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n);
+
+/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it,
+ * and pp_next_raw drives directives and expansion. Declared non-static so
+ * pp_expand.c can call it without a forward decl each time. */
+Tok pp_next_raw(Pp* pp);
+
+/* --- pp_expand.c → pp.c, pp_directive.c --- */
+HidesetId hs_add(Pp* pp, HidesetId id, Sym s);
+int hs_contains(Pp* pp, HidesetId id, Sym s);
+Macro* mt_get(Pp* pp, Sym name);
+void mt_put(Pp* pp, Sym name, Macro* m);
+void mt_del(Pp* pp, Sym name);
+void expand_arg_to_eof(Pp* pp, Tok* in, HidesetId* hs, u32 nin, TokVec* out);
+
+/* --- pp_directive.c → pp_expand.c --- */
+i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
+void process_directive(Pp* pp, SrcLoc hash_loc);
+
+/* --- pp_directive.c internal helpers called from pp_expand.c --- */
+void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
+int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out);
+int try_expand_pragma_op(Pp* pp, const Tok* invoke);
+
+/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */
+void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n);
+
+/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */
+void do_define(Pp* pp, const Tok* line, u32 n);
+void do_undef(Pp* pp, const Tok* line, u32 n);
+
+/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */
+TokSrc* current_lex_src(Pp* pp);
+
+#endif /* CFREE_PP_PRIV_H */
diff --git a/mk/config.mk b/mk/config.mk
@@ -14,6 +14,7 @@ CFREE_OBJ_ELF_ENABLED := $(call cfg_flag,CFREE_OBJ_ELF_ENABLED)
CFREE_OBJ_MACHO_ENABLED := $(call cfg_flag,CFREE_OBJ_MACHO_ENABLED)
CFREE_OBJ_COFF_ENABLED := $(call cfg_flag,CFREE_OBJ_COFF_ENABLED)
+CFREE_LANG_CPP_ENABLED := $(call cfg_flag,CFREE_LANG_CPP_ENABLED)
CFREE_LANG_C_ENABLED := $(call cfg_flag,CFREE_LANG_C_ENABLED)
CFREE_LANG_TOY_ENABLED := $(call cfg_flag,CFREE_LANG_TOY_ENABLED)
CFREE_LANG_WASM_ENABLED := $(call cfg_flag,CFREE_LANG_WASM_ENABLED)
diff --git a/src/api/lang_registry.c b/src/api/lang_registry.c
@@ -21,6 +21,9 @@
#include "cfree/config.h"
+_Static_assert(!CFREE_LANG_C_ENABLED || CFREE_LANG_CPP_ENABLED,
+ "CFREE_LANG_C_ENABLED requires CFREE_LANG_CPP_ENABLED");
+
/* Defined in src/api/compile.c, alongside the asm frontend's
* new/compile/free functions. Treated as part of the codegen substrate
* (no per-frontend lang/ directory), so its declaration lives here