pp.c (36749B)
1 /* C11 preprocessor (translation phase 4). 2 * 3 * Streams tokens via pp_next: directives are consumed, macro invocations are 4 * expanded, and TOK_NEWLINE is preserved so pp_emit_text can reconstruct the 5 * line structure of the source. 6 * 7 * The token-source stack carries either a Lexer (file or #include'd file) or 8 * a pre-built Tok[] buffer (macro expansion). Each buffer token carries a 9 * hideset (Prosser, the standard's "nested-replacement" rule) recording 10 * which macro names it must not be re-expanded by during rescan. 11 * 12 * Residual module: source stack, pp_next / pp_next_raw (public streaming), 13 * pp_new/free, predefined macros, lifecycle, keyword interning. */ 14 15 #include <kit/compile.h> 16 17 #include "pp/pp_priv.h" 18 19 /* ============================================================ 20 * Source stack 21 * ============================================================ */ 22 23 static TokSrc* src_top(Pp* pp) { 24 return pp->nsources ? &pp->sources[pp->nsources - 1] : NULL; 25 } 26 27 void src_push(Pp* pp, TokSrc s) { 28 if (pp->nsources == pp->sources_cap) { 29 u32 nc = pp->sources_cap ? pp->sources_cap * 2 : 8; 30 pp->sources = 31 (TokSrc*)pp_xrealloc(pp, pp->sources, sizeof(TokSrc) * pp->sources_cap, 32 sizeof(TokSrc) * nc, _Alignof(TokSrc)); 33 pp->sources_cap = nc; 34 } 35 pp->sources[pp->nsources++] = s; 36 } 37 38 void src_pop(Pp* pp) { 39 TokSrc* t; 40 if (!pp->nsources) return; 41 t = &pp->sources[pp->nsources - 1]; 42 if (t->kind == SRC_LEX && t->lex) { 43 lex_close(t->lex); 44 t->lex = NULL; 45 } 46 --pp->nsources; 47 } 48 49 /* Read next raw token from the top source. Returns TOK_EOF when stack is 50 * empty. Pops empty buffer/lexer sources as it descends. `src_kind_out`, 51 * if non-NULL, receives the kind of the source the token came from 52 * (SRC_LEX vs SRC_BUF). Used by pp_next_raw to gate directive recognition 53 * to lex-sourced tokens only — a `#` produced by macro expansion never 54 * starts a directive (§6.10.3.4 ¶3, covered by `63_rescan_not_directive`). */ 55 Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) { 56 Tok t; 57 TokSrc* s; 58 while ((s = src_top(pp)) != NULL) { 59 if (s->kind == SRC_BUF) { 60 if (s->i < s->n) { 61 t = s->toks[s->i]; 62 if (hs_out) *hs_out = s->hs ? s->hs[s->i] : HS_EMPTY; 63 if (src_kind_out) *src_kind_out = SRC_BUF; 64 ++s->i; 65 return t; 66 } 67 if (s->scope_top) { 68 memset(&t, 0, sizeof(t)); 69 t.kind = TOK_EOF; 70 if (hs_out) *hs_out = HS_EMPTY; 71 if (src_kind_out) *src_kind_out = SRC_BUF; 72 return t; 73 } 74 src_pop(pp); 75 continue; 76 } 77 /* SRC_LEX */ 78 t = lex_next(s->lex); 79 if (t.kind == TOK_EOF) { 80 if (pp->nsources > 1) { 81 src_pop(pp); 82 continue; 83 } 84 if (hs_out) *hs_out = HS_EMPTY; 85 if (src_kind_out) *src_kind_out = SRC_LEX; 86 return t; 87 } 88 /* Apply #line line-number delta on the way out so the rest of 89 * the pipeline sees user-visible line numbers (matters for 90 * __LINE__ expansion and for line-tracking output cursors). */ 91 if (s->line_delta) { 92 t.loc.line = (u32)((i32)t.loc.line + s->line_delta); 93 } 94 if (hs_out) *hs_out = HS_EMPTY; 95 if (src_kind_out) *src_kind_out = SRC_LEX; 96 return t; 97 } 98 memset(&t, 0, sizeof(t)); 99 t.kind = TOK_EOF; 100 if (hs_out) *hs_out = HS_EMPTY; 101 if (src_kind_out) *src_kind_out = SRC_LEX; 102 return t; 103 } 104 105 /* ============================================================ 106 * Buffer source push helpers 107 * ============================================================ */ 108 109 void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) { 110 TokSrc s; 111 memset(&s, 0, sizeof(s)); 112 s.kind = SRC_BUF; 113 s.toks = toks; 114 s.hs = hs; 115 s.i = 0; 116 s.n = n; 117 src_push(pp, s); 118 } 119 120 /* ============================================================ 121 * Public streaming entries 122 * ============================================================ */ 123 124 Tok pp_next(Pp* pp) { 125 /* Public: filter newlines so consumers like the C parser don't need 126 * to handle them. pp_emit_text uses pp_next_raw via its own loop. 127 * 128 * Also drop forwarded `#pragma` lines: do_pragma pushes the directive 129 * back onto the source stack so pp_emit_text can re-emit it verbatim 130 * in cpp mode, but the C parser (cc mode) would see the trailing 131 * tokens as stray identifiers. When we see TOK_PP_HASH followed by 132 * `pragma`, swallow tokens through the next NEWLINE. */ 133 for (;;) { 134 Tok t = pp_next_raw(pp); 135 if (t.kind == TOK_NEWLINE) continue; 136 if (t.kind == TOK_PP_HASH) { 137 Tok t2 = pp_next_raw(pp); 138 if (t2.kind == TOK_IDENT && t2.v.ident == pp->sym_pragma) { 139 for (;;) { 140 Tok tt = pp_next_raw(pp); 141 if (tt.kind == TOK_NEWLINE || tt.kind == TOK_EOF) break; 142 } 143 continue; 144 } 145 /* Not a pragma — push the peeked token back as a 1-element buffer 146 * so the next pp_next_raw returns it, and surface the hash now. */ 147 Tok* keep = arena_array(pp->arena, Tok, 1); 148 HidesetId* hs = arena_array(pp->arena, HidesetId, 1); 149 keep[0] = t2; 150 hs[0] = HS_EMPTY; 151 push_buf(pp, keep, hs, 1); 152 return t; 153 } 154 return t; 155 } 156 } 157 158 /* ============================================================ 159 * pp_emit_text 160 * ============================================================ */ 161 162 static void w_str(Writer* w, const char* s, size_t n) { 163 if (n) (void)kit_writer_write(w, s, n); 164 } 165 166 void pp_emit_text(Pp* pp, Writer* out) { 167 int at_bol = 1; 168 for (;;) { 169 Tok t = pp_next_raw(pp); 170 if (t.kind == TOK_EOF) break; 171 if (t.kind == TOK_NEWLINE) { 172 w_str(out, "\n", 1); 173 at_bol = 1; 174 continue; 175 } 176 if (!at_bol && (t.flags & (TF_HAS_SPACE | TF_AT_BOL))) { 177 /* TF_AT_BOL on a non-leading output token means the source 178 * had a line break here that the line-tracking cursor isn't 179 * preserving — fall back to a single space so the tokens 180 * don't run together. */ 181 w_str(out, " ", 1); 182 } 183 if (t.spelling) { 184 KitSlice s = kit_sym_str(pp->pool->c, t.spelling); 185 w_str(out, s.s, s.len); 186 } 187 at_bol = 0; 188 } 189 } 190 191 /* ============================================================ 192 * Lifecycle and configuration 193 * ============================================================ */ 194 195 static void pp_intern_keywords(Pp* pp) { 196 Pool* p = pp->pool; 197 pp->sym_define = kit_sym_intern(p->c, KIT_SLICE_LIT("define")); 198 pp->sym_undef = kit_sym_intern(p->c, KIT_SLICE_LIT("undef")); 199 pp->sym_include = kit_sym_intern(p->c, KIT_SLICE_LIT("include")); 200 pp->sym_if = kit_sym_intern(p->c, KIT_SLICE_LIT("if")); 201 pp->sym_ifdef = kit_sym_intern(p->c, KIT_SLICE_LIT("ifdef")); 202 pp->sym_ifndef = kit_sym_intern(p->c, KIT_SLICE_LIT("ifndef")); 203 pp->sym_elif = kit_sym_intern(p->c, KIT_SLICE_LIT("elif")); 204 pp->sym_else = kit_sym_intern(p->c, KIT_SLICE_LIT("else")); 205 pp->sym_endif = kit_sym_intern(p->c, KIT_SLICE_LIT("endif")); 206 pp->sym_line = kit_sym_intern(p->c, KIT_SLICE_LIT("line")); 207 pp->sym_pragma = kit_sym_intern(p->c, KIT_SLICE_LIT("pragma")); 208 pp->sym_pragma_kw = pp->sym_pragma; 209 pp->sym_error = kit_sym_intern(p->c, KIT_SLICE_LIT("error")); 210 pp->sym_warning = kit_sym_intern(p->c, KIT_SLICE_LIT("warning")); 211 pp->sym_embed = kit_sym_intern(p->c, KIT_SLICE_LIT("embed")); 212 pp->sym_defined = kit_sym_intern(p->c, KIT_SLICE_LIT("defined")); 213 pp->sym_va_args = kit_sym_intern(p->c, KIT_SLICE_LIT("__VA_ARGS__")); 214 pp->sym_line__ = kit_sym_intern(p->c, KIT_SLICE_LIT("__LINE__")); 215 pp->sym_file__ = kit_sym_intern(p->c, KIT_SLICE_LIT("__FILE__")); 216 pp->sym_date__ = kit_sym_intern(p->c, KIT_SLICE_LIT("__DATE__")); 217 pp->sym_time__ = kit_sym_intern(p->c, KIT_SLICE_LIT("__TIME__")); 218 pp->sym_stdc__ = kit_sym_intern(p->c, KIT_SLICE_LIT("__STDC__")); 219 pp->sym_stdc_hosted__ = 220 kit_sym_intern(p->c, KIT_SLICE_LIT("__STDC_HOSTED__")); 221 pp->sym_stdc_version__ = 222 kit_sym_intern(p->c, KIT_SLICE_LIT("__STDC_VERSION__")); 223 pp->sym__pragma = kit_sym_intern(p->c, KIT_SLICE_LIT("_Pragma")); 224 } 225 226 /* Decompose unix seconds into UTC y/M/d/h/m/s. Algorithm: Howard Hinnant, 227 * "chrono-compatible Low-Level Date Algorithms" (civil_from_days). Valid 228 * for any int64 input; uses floor division so negative epoch values 229 * (pre-1970) work correctly. */ 230 typedef struct PpYMD { 231 int y; /* full year, e.g. 2026 */ 232 int M; /* 1..12 */ 233 int d; /* 1..31 */ 234 int h; /* 0..23 */ 235 int m; /* 0..59 */ 236 int s; /* 0..59 */ 237 } PpYMD; 238 239 static void pp_break_time(int64_t t, PpYMD* out) { 240 int64_t days, secs; 241 int64_t z, era, doe, yoe, y, doy, mp, d, mo; 242 /* Floor-divide t by 86400. */ 243 days = t / 86400; 244 secs = t - days * 86400; 245 if (secs < 0) { 246 secs += 86400; 247 days -= 1; 248 } 249 out->h = (int)(secs / 3600); 250 out->m = (int)((secs / 60) % 60); 251 out->s = (int)(secs % 60); 252 253 z = days + 719468; /* shift to era starting 0000-03-01 */ 254 era = (z >= 0 ? z : z - 146096) / 146097; 255 doe = z - era * 146097; /* [0,146096] */ 256 yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; /* [0,399] */ 257 y = yoe + era * 400; 258 doy = doe - (365 * yoe + yoe / 4 - yoe / 100); /* [0,365] */ 259 mp = (5 * doy + 2) / 153; /* [0,11], Mar=0 */ 260 d = doy - (153 * mp + 2) / 5 + 1; /* [1,31] */ 261 mo = mp + (mp < 10 ? 3 : -9); /* [1,12] */ 262 y += (mo <= 2); 263 out->y = (int)y; 264 out->M = (int)mo; 265 out->d = (int)d; 266 } 267 268 /* Compute __DATE__ and __TIME__ from env->now (unix seconds, host-supplied; 269 * negative means "no clock"). Per C11 §6.10.8.1: __DATE__ is "Mmm dd yyyy" 270 * (dd is space-padded if < 10), __TIME__ is "hh:mm:ss". Both quoted. */ 271 static void compute_date_time(Pp* pp) { 272 static const char* mons[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", 273 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; 274 char date[24]; 275 char tm[16]; 276 int64_t t = kit_compiler_context(pp->c)->now; 277 PpYMD ymd; 278 if (t < 0) { 279 pp->val_date_str = 280 kit_sym_intern(pp->pool->c, KIT_SLICE_LIT("\"??? ?? ????\"")); 281 pp->val_time_str = 282 kit_sym_intern(pp->pool->c, KIT_SLICE_LIT("\"??:??:??\"")); 283 return; 284 } 285 pp_break_time(t, &ymd); 286 { 287 int dd = ymd.d, yyyy = ymd.y; 288 int p = 0; 289 date[p++] = '"'; 290 memcpy(date + p, mons[ymd.M - 1], 3); 291 p += 3; 292 date[p++] = ' '; 293 date[p++] = (dd >= 10) ? (char)('0' + dd / 10) : ' '; 294 date[p++] = (char)('0' + dd % 10); 295 date[p++] = ' '; 296 date[p++] = (char)('0' + (yyyy / 1000) % 10); 297 date[p++] = (char)('0' + (yyyy / 100) % 10); 298 date[p++] = (char)('0' + (yyyy / 10) % 10); 299 date[p++] = (char)('0' + (yyyy) % 10); 300 date[p++] = '"'; 301 pp->val_date_str = 302 kit_sym_intern(pp->pool->c, (KitSlice){.s = date, .len = (size_t)p}); 303 } 304 { 305 int hh = ymd.h, mm = ymd.m, ss = ymd.s; 306 int p = 0; 307 tm[p++] = '"'; 308 tm[p++] = (char)('0' + (hh / 10) % 10); 309 tm[p++] = (char)('0' + hh % 10); 310 tm[p++] = ':'; 311 tm[p++] = (char)('0' + (mm / 10) % 10); 312 tm[p++] = (char)('0' + mm % 10); 313 tm[p++] = ':'; 314 tm[p++] = (char)('0' + (ss / 10) % 10); 315 tm[p++] = (char)('0' + ss % 10); 316 tm[p++] = '"'; 317 pp->val_time_str = 318 kit_sym_intern(pp->pool->c, (KitSlice){.s = tm, .len = (size_t)p}); 319 } 320 } 321 322 static void pp_register_static_predefined(Pp* pp) { 323 pp_define(pp, "__kit__", "1"); 324 pp_define(pp, "__kit_major__", "0"); 325 pp_define(pp, "__kit_minor__", "0"); 326 pp_define(pp, "__kit_patchlevel__", "0"); 327 pp_define(pp, "__STDC__", "1"); 328 pp_define(pp, "__STDC_HOSTED__", "0"); 329 pp_define(pp, "__STDC_VERSION__", "201112L"); 330 /* C11 memory_order constants used by __atomic_* builtins. Values match the 331 * `MemOrder` enum in src/arch/arch.h so eval_const_int -> MemOrder is a 332 * direct cast. */ 333 pp_define(pp, "__ATOMIC_RELAXED", "0"); 334 pp_define(pp, "__ATOMIC_CONSUME", "1"); 335 pp_define(pp, "__ATOMIC_ACQUIRE", "2"); 336 pp_define(pp, "__ATOMIC_RELEASE", "3"); 337 pp_define(pp, "__ATOMIC_ACQ_REL", "4"); 338 pp_define(pp, "__ATOMIC_SEQ_CST", "5"); 339 /* GNU `__extension__` is a pedantic-quiet prefix on non-standard constructs 340 * (statement exprs, anonymous structs, `long long` in C89, ...). kit's parser 341 * is permissive about those already and the keyword has no effect on parsing, 342 * so erase it. Needed on every OS, not just mingw: glibc's headers (e.g. 343 * <stdlib.h>'s `__extension__ typedef struct { ... } lldiv_t;`) use it 344 * pervasively, and musl's cleaner ISO-C headers simply never tripped it. */ 345 pp_define(pp, "__extension__", ""); 346 /* GCC keyword spellings of the C qualifiers/`signed`. GNU libc and the Linux 347 * kernel UAPI headers use these directly in GCC mode (e.g. 348 * <asm-generic/int-ll64.h>'s `typedef __signed__ char __s8;`). kit parses the 349 * canonical keywords; map the GCC spellings onto them. (`__restrict` is 350 * additionally a parser keyword alias for the configs that #undef the macro.) */ 351 pp_define(pp, "__volatile__", "volatile"); 352 pp_define(pp, "__const__", "const"); 353 pp_define(pp, "__signed__", "signed"); 354 } 355 356 /* OS-keyed predefined macros: the single place to extend per operating system. 357 * Each OS arm owns its full set of OS-specific predefines (including the 358 * arch-specific MSVC machine macros, which are an OS-flavor concern, not a 359 * data-model one). Add a new `case` to support another OS personality; the 360 * data-model / type-width macros stay in pp_register_target_predefined since 361 * they are OS-independent. */ 362 static void pp_register_os_predefined(Pp* pp, KitTargetSpec target) { 363 switch (target.os) { 364 case KIT_OS_WINDOWS: 365 /* Windows / mingw predefined macros. kit targets the mingw 366 * flavor (DWARF debug info, mingwex CRT) rather than MSVC, so we 367 * advertise __MINGW{32,64}__ and friends but never set _MSC_VER. 368 * Both _WIN32 and the legacy unprefixed WIN32 are defined; _WIN64 369 * is set on 64-bit targets only. The MSVC-compat machine macros 370 * (_M_X64 / _M_AMD64 / _M_ARM64) are useful for headers that gate 371 * on them but harmless to set everywhere — mingw's own headers 372 * tolerate them. */ 373 pp_define(pp, "_WIN32", "1"); 374 pp_define(pp, "WIN32", "1"); 375 pp_define(pp, "__MINGW32__", "1"); 376 if (target.ptr_size == 8) { 377 pp_define(pp, "_WIN64", "1"); 378 pp_define(pp, "__MINGW64__", "1"); 379 } 380 if (target.arch == KIT_ARCH_X86_64) { 381 pp_define(pp, "_M_X64", "100"); 382 pp_define(pp, "_M_AMD64", "100"); 383 } else if (target.arch == KIT_ARCH_ARM_64) { 384 pp_define(pp, "_M_ARM64", "1"); 385 } 386 /* mingw's <vadefs.h> / many CRT headers gate __builtin_va_list / 387 * __gnuc_va_list on __GNUC__. kit implements the va_* builtins 388 * and __builtin_va_list with the GCC contract, so impersonating a 389 * conservative GCC vintage lets the mingw header tree compile. 390 * We pick 4.0 — old enough that no header expects GCC-specific 391 * extensions kit doesn't implement (e.g. transactional memory, 392 * GIMPLE plugins), but new enough to clear every __GNUC__ >= N 393 * gate we've seen in practice. */ 394 pp_define(pp, "__GNUC__", "4"); 395 pp_define(pp, "__GNUC_MINOR__", "0"); 396 pp_define(pp, "__GNUC_PATCHLEVEL__", "0"); 397 /* __has_builtin / __has_attribute / __has_include_next: clang/GCC 398 * preprocessor extensions. mingw's _mingw.h gates inline-asm 399 * intrinsic definitions on whether the compiler claims to have 400 * them as builtins (e.g. __debugbreak, __fastfail, __prefetch). 401 * kit doesn't model individual builtin lookups; claim "yes" 402 * uniformly so mingw skips its inline-asm fallbacks (which use 403 * intel/{$}-form asm syntax kit's parser doesn't accept). */ 404 pp_define(pp, "__has_builtin(x)", "1"); 405 pp_define(pp, "__has_feature(x)", "0"); 406 pp_define(pp, "__has_attribute(x)", "0"); 407 /* MSVC fixed-width integer types. mingw's corecrt.h uses these 408 * directly (e.g. `typedef unsigned __int64 size_t;`). Map to the 409 * C standard equivalents. */ 410 pp_define(pp, "__int8", "char"); 411 pp_define(pp, "__int16", "short"); 412 pp_define(pp, "__int32", "int"); 413 pp_define(pp, "__int64", "long long"); 414 /* mingw's psdk_inc/intrin-impl.h emits an inline implementation 415 * for every MSVC intrinsic (_lrotl, _BitScanForward, ...) and 416 * gates them with __INTRINSIC_PROLOG, which uses ## to paste the 417 * intrinsic's name into a `defined(__INTRINSIC_DEFINED_<name>)` 418 * test. Once an intrinsic gets defined, a later re-invocation of 419 * the same gate macro hits a kit pp bug where a *defined* 420 * symbol referenced inside `defined()` gets expanded before the 421 * `defined` operator captures it. Predefining 422 * __INTRINSIC_ONLYSPECIAL flips the gate's second clause so 423 * none of the inline intrinsics are emitted (mingw expects this 424 * idiom for non-special builds; the linker pulls them from 425 * libmingwex/libmsvcrt instead). This sidesteps the pp bug 426 * entirely. */ 427 pp_define(pp, "__INTRINSIC_ONLYSPECIAL", "1"); 428 /* __declspec(...) is the MSVC syntax for attributes. mingw uses 429 * it in headers for dllimport/dllexport, alignment, noreturn, 430 * etc. kit's COFF linker routes externs through the IAT 431 * regardless of the dllimport hint and doesn't yet model 432 * dllexport via this attribute — so we erase it as a no-op 433 * macro. (Note: this is at the preprocessor layer; the parser 434 * still needs to handle the syntax if/when the macro is removed.) 435 */ 436 pp_define(pp, "__declspec(x)", ""); 437 /* __extension__ is erased unconditionally in pp_register_static_predefined. */ 438 /* __restrict / __restrict__: GCC-flavored alternates to the C99 439 * `restrict` keyword. kit parses `restrict` already; map the 440 * GCC spellings onto it. */ 441 pp_define(pp, "__restrict", "restrict"); 442 pp_define(pp, "__restrict__", "restrict"); 443 /* __volatile__/__const__/__signed__ erased->canonical unconditionally in 444 * pp_register_static_predefined (GNU spellings glibc/UAPI headers use). */ 445 /* MSVC calling-convention attributes. On x86_64 they're no-ops 446 * (every function uses the Win64 ABI) and on ARM64 likewise; on 447 * i386 they actually mean something but kit doesn't target it. 448 * Defining them as empty macros lets mingw headers that say 449 * `void __cdecl foo(void)` parse correctly. Same posture mingw's 450 * own GCC takes: __MINGW_USYMBOL((__cdecl__)). */ 451 /* MSVC calling-convention attributes — no-ops on Win64. kit 452 * pre-defines them empty *only when* mingw's headers don't 453 * themselves redefine them; we use the __MINGW_<x>_REDEFINE form 454 * via `#undef` first to play nicely with mingw's own 455 * redefinitions (mingw's _mingw.h does `#define __cdecl 456 * __attribute__((__cdecl__))` further down). Setting them empty 457 * here is safe because kit's parser will see the redefinition 458 * before any header uses them. */ 459 pp_define(pp, "__cdecl", ""); 460 pp_define(pp, "__stdcall", ""); 461 pp_define(pp, "__fastcall", ""); 462 pp_define(pp, "__thiscall", ""); 463 pp_define(pp, "__vectorcall", ""); 464 pp_define(pp, "_cdecl", ""); 465 pp_define(pp, "_stdcall", ""); 466 pp_define(pp, "_fastcall", ""); 467 /* __forceinline / __inline / __w64: mingw's _mingw.h redefines 468 * them itself when __GNUC__ is set, so we leave them alone here 469 * to avoid a redefinition-with-different-replacement error. */ 470 break; 471 case KIT_OS_LINUX: 472 case KIT_OS_MACOS: 473 case KIT_OS_FREEBSD: 474 case KIT_OS_FREESTANDING: 475 case KIT_OS_WASI: 476 /* No OS-specific predefines beyond the shared data-model set. */ 477 break; 478 } 479 } 480 481 /* Target-dependent predefined macros consumed by rt/include/stddef.h and 482 * rt/include/stdint.h. The set mirrors the subset of GCC/Clang's __*_TYPE__ 483 * / __*_MAX__ namespace that those headers reference. We split only on 484 * pointer width plus the target data model: LP64 for Unix-like 64-bit targets, 485 * LLP64 for 64-bit Windows, and ILP32 for 32-bit targets. */ 486 static void pp_register_target_predefined(Pp* pp) { 487 KitTargetSpec target = kit_compiler_target_spec(pp->c); 488 const KitPredefinedMacro* arch_defs = NULL; 489 uint32_t narch_defs = kit_compiler_arch_predefines(pp->c, &arch_defs); 490 uint32_t i; 491 int ptr64 = (target.ptr_size == 8); 492 int lp64 = kit_target_uses_lp64(target); 493 /* sizeof(wchar_t) is a resolved data-model fact carried on the spec. */ 494 int wchar16 = (target.wchar_size == 2); 495 496 for (i = 0; i < narch_defs; ++i) { 497 pp_define(pp, arch_defs[i].name.s, arch_defs[i].body.s); 498 } 499 500 /* __USER_LABEL_PREFIX__ is the C source-symbol prefix the object format 501 * prepends ("_" for Mach-O, "" else); read it from the CG target rather 502 * than re-deriving from the object-format identity. */ 503 pp_define(pp, "__USER_LABEL_PREFIX__", kit_cg_target_c_label_prefix(pp->c)); 504 505 /* Byte / type sizes. kit uses a single LP64 (or ILP32) model across 506 * every supported target: int=4, short=2, long-long=8, float=4, double=8, 507 * long-double=8 (sharing the double representation — see the 508 * __LDBL_* block below). long and pointer-derived types track ptr_size. 509 * These macros let portable C code probe widths without first pulling in 510 * <limits.h> / <stddef.h>. */ 511 pp_define(pp, "__CHAR_BIT__", "8"); 512 pp_define(pp, "__SIZEOF_SHORT__", "2"); 513 pp_define(pp, "__SIZEOF_INT__", "4"); 514 pp_define(pp, "__SIZEOF_LONG__", lp64 ? "8" : "4"); 515 pp_define(pp, "__SIZEOF_LONG_LONG__", "8"); 516 pp_define(pp, "__SIZEOF_POINTER__", ptr64 ? "8" : "4"); 517 pp_define(pp, "__SIZEOF_SIZE_T__", ptr64 ? "8" : "4"); 518 pp_define(pp, "__SIZEOF_PTRDIFF_T__", ptr64 ? "8" : "4"); 519 pp_define(pp, "__SIZEOF_WCHAR_T__", wchar16 ? "2" : "4"); 520 pp_define(pp, "__SIZEOF_WINT_T__", "4"); 521 pp_define(pp, "__SIZEOF_FLOAT__", "4"); 522 pp_define(pp, "__SIZEOF_DOUBLE__", "8"); 523 pp_define(pp, "__SIZEOF_LONG_DOUBLE__", 524 kit_target_long_double_is_binary128(target) ? "16" : "8"); 525 526 /* OS-specific predefines (Windows/mingw + MSVC machine macros) live in one 527 * os-keyed table — pp_register_os_predefined — so adding an OS personality 528 * touches a single place. Emitted here, between the data-model size macros 529 * and the stddef.h type aliases, to preserve the predefined-macro ordering. */ 530 pp_register_os_predefined(pp, target); 531 532 /* stddef.h base aliases */ 533 if (lp64) { 534 pp_define(pp, "__SIZE_TYPE__", "unsigned long"); 535 pp_define(pp, "__PTRDIFF_TYPE__", "long"); 536 } else if (ptr64) { 537 pp_define(pp, "__SIZE_TYPE__", "unsigned long long"); 538 pp_define(pp, "__PTRDIFF_TYPE__", "long long"); 539 } else { 540 pp_define(pp, "__SIZE_TYPE__", "unsigned int"); 541 pp_define(pp, "__PTRDIFF_TYPE__", "int"); 542 } 543 pp_define(pp, "__WCHAR_TYPE__", wchar16 ? "unsigned short" : "int"); 544 pp_define(pp, "__CHAR16_TYPE__", "unsigned short"); 545 pp_define(pp, "__CHAR32_TYPE__", "unsigned int"); 546 547 /* stdint.h exact-width aliases (widths <= 32 are model-independent) */ 548 pp_define(pp, "__INT8_TYPE__", "signed char"); 549 pp_define(pp, "__INT16_TYPE__", "short"); 550 pp_define(pp, "__INT32_TYPE__", "int"); 551 pp_define(pp, "__UINT8_TYPE__", "unsigned char"); 552 pp_define(pp, "__UINT16_TYPE__", "unsigned short"); 553 pp_define(pp, "__UINT32_TYPE__", "unsigned int"); 554 pp_define(pp, "__INT64_TYPE__", lp64 ? "long" : "long long"); 555 pp_define(pp, "__UINT64_TYPE__", 556 lp64 ? "unsigned long" : "unsigned long long"); 557 558 /* Least-width == exact-width on every target kit knows about */ 559 pp_define(pp, "__INT_LEAST8_TYPE__", "signed char"); 560 pp_define(pp, "__INT_LEAST16_TYPE__", "short"); 561 pp_define(pp, "__INT_LEAST32_TYPE__", "int"); 562 pp_define(pp, "__UINT_LEAST8_TYPE__", "unsigned char"); 563 pp_define(pp, "__UINT_LEAST16_TYPE__", "unsigned short"); 564 pp_define(pp, "__UINT_LEAST32_TYPE__", "unsigned int"); 565 pp_define(pp, "__INT_LEAST64_TYPE__", lp64 ? "long" : "long long"); 566 pp_define(pp, "__UINT_LEAST64_TYPE__", 567 lp64 ? "unsigned long" : "unsigned long long"); 568 569 /* Fast types: fast8 stays at `signed char`; fast16/32/64 widen to the 570 * register-width integer so the operation fits in a single instruction. */ 571 pp_define(pp, "__INT_FAST8_TYPE__", "signed char"); 572 pp_define(pp, "__UINT_FAST8_TYPE__", "unsigned char"); 573 pp_define(pp, "__INT_FAST8_MAX__", "127"); 574 pp_define(pp, "__UINT_FAST8_MAX__", "255"); 575 if (ptr64) { 576 pp_define(pp, "__INT_FAST16_TYPE__", lp64 ? "long" : "long long"); 577 pp_define(pp, "__INT_FAST32_TYPE__", lp64 ? "long" : "long long"); 578 pp_define(pp, "__INT_FAST64_TYPE__", lp64 ? "long" : "long long"); 579 pp_define(pp, "__UINT_FAST16_TYPE__", 580 lp64 ? "unsigned long" : "unsigned long long"); 581 pp_define(pp, "__UINT_FAST32_TYPE__", 582 lp64 ? "unsigned long" : "unsigned long long"); 583 pp_define(pp, "__UINT_FAST64_TYPE__", 584 lp64 ? "unsigned long" : "unsigned long long"); 585 pp_define(pp, "__INT_FAST16_MAX__", 586 lp64 ? "9223372036854775807L" : "9223372036854775807LL"); 587 pp_define(pp, "__INT_FAST32_MAX__", 588 lp64 ? "9223372036854775807L" : "9223372036854775807LL"); 589 pp_define(pp, "__INT_FAST64_MAX__", 590 lp64 ? "9223372036854775807L" : "9223372036854775807LL"); 591 pp_define(pp, "__UINT_FAST16_MAX__", 592 lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); 593 pp_define(pp, "__UINT_FAST32_MAX__", 594 lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); 595 pp_define(pp, "__UINT_FAST64_MAX__", 596 lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); 597 } else { 598 pp_define(pp, "__INT_FAST16_TYPE__", "int"); 599 pp_define(pp, "__INT_FAST32_TYPE__", "int"); 600 pp_define(pp, "__INT_FAST64_TYPE__", "long long"); 601 pp_define(pp, "__UINT_FAST16_TYPE__", "unsigned int"); 602 pp_define(pp, "__UINT_FAST32_TYPE__", "unsigned int"); 603 pp_define(pp, "__UINT_FAST64_TYPE__", "unsigned long long"); 604 pp_define(pp, "__INT_FAST16_MAX__", "2147483647"); 605 pp_define(pp, "__INT_FAST32_MAX__", "2147483647"); 606 pp_define(pp, "__INT_FAST64_MAX__", "9223372036854775807LL"); 607 pp_define(pp, "__UINT_FAST16_MAX__", "4294967295U"); 608 pp_define(pp, "__UINT_FAST32_MAX__", "4294967295U"); 609 pp_define(pp, "__UINT_FAST64_MAX__", "18446744073709551615ULL"); 610 } 611 612 /* Pointer-holding integers + ptrdiff/size maxes */ 613 if (lp64) { 614 pp_define(pp, "__LONG_MAX__", "9223372036854775807L"); 615 pp_define(pp, "__INTPTR_TYPE__", "long"); 616 pp_define(pp, "__UINTPTR_TYPE__", "unsigned long"); 617 pp_define(pp, "__INTPTR_MAX__", "9223372036854775807L"); 618 pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615UL"); 619 pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807L"); 620 pp_define(pp, "__SIZE_MAX__", "18446744073709551615UL"); 621 } else if (ptr64) { 622 pp_define(pp, "__LONG_MAX__", "2147483647L"); 623 pp_define(pp, "__INTPTR_TYPE__", "long long"); 624 pp_define(pp, "__UINTPTR_TYPE__", "unsigned long long"); 625 pp_define(pp, "__INTPTR_MAX__", "9223372036854775807LL"); 626 pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615ULL"); 627 pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807LL"); 628 pp_define(pp, "__SIZE_MAX__", "18446744073709551615ULL"); 629 } else { 630 pp_define(pp, "__LONG_MAX__", "2147483647L"); 631 pp_define(pp, "__INTPTR_TYPE__", "int"); 632 pp_define(pp, "__UINTPTR_TYPE__", "unsigned int"); 633 pp_define(pp, "__INTPTR_MAX__", "2147483647"); 634 pp_define(pp, "__UINTPTR_MAX__", "4294967295U"); 635 pp_define(pp, "__PTRDIFF_MAX__", "2147483647"); 636 pp_define(pp, "__SIZE_MAX__", "4294967295U"); 637 } 638 639 /* Greatest-width integers + matching _C() suffix macros */ 640 if (lp64) { 641 pp_define(pp, "__INTMAX_TYPE__", "long"); 642 pp_define(pp, "__UINTMAX_TYPE__", "unsigned long"); 643 pp_define(pp, "__INTMAX_MAX__", "9223372036854775807L"); 644 pp_define(pp, "__UINTMAX_MAX__", "18446744073709551615UL"); 645 pp_define(pp, "__INT64_C(c)", "c ## L"); 646 pp_define(pp, "__UINT64_C(c)", "c ## UL"); 647 pp_define(pp, "__INTMAX_C(c)", "c ## L"); 648 pp_define(pp, "__UINTMAX_C(c)", "c ## UL"); 649 /* Suffix tokens (the form <stdint.h> uses to build INT64_C/etc.). GCC and 650 * clang predefine these; kit must too so kit-compiled TUs that include its 651 * freestanding <stdint.h> get the right-typed 64-bit constants. */ 652 pp_define(pp, "__INT64_C_SUFFIX__", "L"); 653 pp_define(pp, "__UINT64_C_SUFFIX__", "UL"); 654 pp_define(pp, "__INTMAX_C_SUFFIX__", "L"); 655 pp_define(pp, "__UINTMAX_C_SUFFIX__", "UL"); 656 } else { 657 pp_define(pp, "__INTMAX_TYPE__", "long long"); 658 pp_define(pp, "__UINTMAX_TYPE__", "unsigned long long"); 659 pp_define(pp, "__INTMAX_MAX__", "9223372036854775807LL"); 660 pp_define(pp, "__UINTMAX_MAX__", "18446744073709551615ULL"); 661 pp_define(pp, "__INT64_C(c)", "c ## LL"); 662 pp_define(pp, "__UINT64_C(c)", "c ## ULL"); 663 pp_define(pp, "__INTMAX_C(c)", "c ## LL"); 664 pp_define(pp, "__UINTMAX_C(c)", "c ## ULL"); 665 pp_define(pp, "__INT64_C_SUFFIX__", "LL"); 666 pp_define(pp, "__UINT64_C_SUFFIX__", "ULL"); 667 pp_define(pp, "__INTMAX_C_SUFFIX__", "LL"); 668 pp_define(pp, "__UINTMAX_C_SUFFIX__", "ULL"); 669 } 670 671 pp_define(pp, "__WCHAR_MAX__", wchar16 ? "65535" : "2147483647"); 672 pp_define(pp, "__WCHAR_MIN__", wchar16 ? "0" : "(-__WCHAR_MAX__ - 1)"); 673 pp_define(pp, "__WINT_MAX__", "2147483647"); 674 pp_define(pp, "__WINT_MIN__", "(-__WINT_MAX__ - 1)"); 675 pp_define(pp, "__SIG_ATOMIC_MAX__", "2147483647"); 676 pp_define(pp, "__SIG_ATOMIC_MIN__", "(-__SIG_ATOMIC_MAX__ - 1)"); 677 678 /* C11 <stdatomic.h> lock-free macros. The currently supported primary 679 * targets have naturally lock-free scalar and pointer atomics through the 680 * machine-word sizes used by these typedefs. */ 681 pp_define(pp, "__ATOMIC_BOOL_LOCK_FREE", "2"); 682 pp_define(pp, "__ATOMIC_CHAR_LOCK_FREE", "2"); 683 pp_define(pp, "__ATOMIC_CHAR16_T_LOCK_FREE", "2"); 684 pp_define(pp, "__ATOMIC_CHAR32_T_LOCK_FREE", "2"); 685 pp_define(pp, "__ATOMIC_WCHAR_T_LOCK_FREE", "2"); 686 pp_define(pp, "__ATOMIC_SHORT_LOCK_FREE", "2"); 687 pp_define(pp, "__ATOMIC_INT_LOCK_FREE", "2"); 688 pp_define(pp, "__ATOMIC_LONG_LOCK_FREE", "2"); 689 pp_define(pp, "__ATOMIC_LLONG_LOCK_FREE", "2"); 690 pp_define(pp, "__ATOMIC_POINTER_LOCK_FREE", "2"); 691 692 pp_define(pp, "__FLT_EVAL_METHOD__", "0"); 693 pp_define(pp, "__FLT_HAS_DENORM__", "1"); 694 pp_define(pp, "__FLT_MANT_DIG__", "24"); 695 pp_define(pp, "__FLT_DECIMAL_DIG__", "9"); 696 pp_define(pp, "__FLT_DIG__", "6"); 697 pp_define(pp, "__FLT_MIN_EXP__", "(-125)"); 698 pp_define(pp, "__FLT_MIN_10_EXP__", "(-37)"); 699 pp_define(pp, "__FLT_MAX_EXP__", "128"); 700 pp_define(pp, "__FLT_MAX_10_EXP__", "38"); 701 pp_define(pp, "__FLT_MAX__", "0x1.fffffep+127F"); 702 pp_define(pp, "__FLT_EPSILON__", "0x1p-23F"); 703 pp_define(pp, "__FLT_MIN__", "0x1p-126F"); 704 pp_define(pp, "__FLT_DENORM_MIN__", "0x1p-149F"); 705 706 pp_define(pp, "__DBL_HAS_DENORM__", "1"); 707 pp_define(pp, "__DBL_MANT_DIG__", "53"); 708 pp_define(pp, "__DBL_DECIMAL_DIG__", "17"); 709 pp_define(pp, "__DBL_DIG__", "15"); 710 pp_define(pp, "__DBL_MIN_EXP__", "(-1021)"); 711 pp_define(pp, "__DBL_MIN_10_EXP__", "(-307)"); 712 pp_define(pp, "__DBL_MAX_EXP__", "1024"); 713 pp_define(pp, "__DBL_MAX_10_EXP__", "308"); 714 pp_define(pp, "__DBL_MAX__", "0x1.fffffffffffffp+1023"); 715 pp_define(pp, "__DBL_EPSILON__", "0x1p-52"); 716 pp_define(pp, "__DBL_MIN__", "0x1p-1022"); 717 pp_define(pp, "__DBL_DENORM_MIN__", "0x1p-1074"); 718 719 /* Targets that follow the IEEE-754 binary128 quad psABI for `long double` 720 * (RISC-V, aarch64-linux, wasm32) get the 113-bit-mantissa characteristics; 721 * everything else aliases `double`. The wasm backend still reports f128 as 722 * unsupported when a value is actually materialized. See 723 * kit_target_long_double_is_binary128. */ 724 if (kit_target_long_double_is_binary128(target)) { 725 pp_define(pp, "__LDBL_HAS_DENORM__", "1"); 726 pp_define(pp, "__LDBL_MANT_DIG__", "113"); 727 pp_define(pp, "__LDBL_DECIMAL_DIG__", "36"); 728 pp_define(pp, "__LDBL_DIG__", "33"); 729 pp_define(pp, "__LDBL_MIN_EXP__", "(-16381)"); 730 pp_define(pp, "__LDBL_MIN_10_EXP__", "(-4931)"); 731 pp_define(pp, "__LDBL_MAX_EXP__", "16384"); 732 pp_define(pp, "__LDBL_MAX_10_EXP__", "4932"); 733 pp_define(pp, "__LDBL_MAX__", "0x1.ffffffffffffffffffffffffffffp+16383L"); 734 pp_define(pp, "__LDBL_EPSILON__", "0x1p-112L"); 735 pp_define(pp, "__LDBL_MIN__", "0x1p-16382L"); 736 pp_define(pp, "__LDBL_DENORM_MIN__", "0x1p-16494L"); 737 pp_define(pp, "__DECIMAL_DIG__", "36"); 738 } else { 739 pp_define(pp, "__LDBL_HAS_DENORM__", "1"); 740 pp_define(pp, "__LDBL_MANT_DIG__", "53"); 741 pp_define(pp, "__LDBL_DECIMAL_DIG__", "17"); 742 pp_define(pp, "__LDBL_DIG__", "15"); 743 pp_define(pp, "__LDBL_MIN_EXP__", "(-1021)"); 744 pp_define(pp, "__LDBL_MIN_10_EXP__", "(-307)"); 745 pp_define(pp, "__LDBL_MAX_EXP__", "1024"); 746 pp_define(pp, "__LDBL_MAX_10_EXP__", "308"); 747 pp_define(pp, "__LDBL_MAX__", "0x1.fffffffffffffp+1023L"); 748 pp_define(pp, "__LDBL_EPSILON__", "0x1p-52L"); 749 pp_define(pp, "__LDBL_MIN__", "0x1p-1022L"); 750 pp_define(pp, "__LDBL_DENORM_MIN__", "0x1p-1074L"); 751 pp_define(pp, "__DECIMAL_DIG__", "17"); 752 } 753 } 754 755 Pp* pp_new(Compiler* c) { 756 Heap* h = (Heap*)kit_compiler_context(c)->heap; 757 Pp* pp = (Pp*)h->alloc(h, sizeof(*pp), _Alignof(Pp)); 758 if (!pp) return NULL; 759 memset(pp, 0, sizeof(*pp)); 760 pp->c = c; 761 pp->pool = c_pool_new(c); 762 pp->arena = NULL; 763 (void)kit_arena_new(h, 64 * 1024, &pp->arena); 764 if (!pp->pool || !pp->arena) { 765 c_pool_free(pp->pool); 766 kit_arena_free(pp->arena); 767 h->free(h, pp, sizeof(*pp)); 768 return NULL; 769 } 770 /* Reserve hideset slot 0 for HS_EMPTY. The slot is unused but the 771 * indexing convention costs only a pointer. */ 772 pp->hsets_cap = 8; 773 pp->hsets = (Hideset**)pp_xrealloc( 774 pp, NULL, 0, sizeof(Hideset*) * pp->hsets_cap, _Alignof(Hideset*)); 775 pp->hsets[0] = NULL; 776 pp->hsets_n = 1; 777 MacroMap_init_cap(&pp->mtab, h, 32u); 778 pp_intern_keywords(pp); 779 compute_date_time(pp); 780 pp_register_static_predefined(pp); 781 pp_register_target_predefined(pp); 782 return pp; 783 } 784 785 void pp_free(Pp* pp) { 786 Heap* h; 787 if (!pp) return; 788 h = pp_heap(pp); 789 /* Pop / close any remaining lex sources. */ 790 while (pp->nsources) src_pop(pp); 791 pp_xfree(pp, pp->sources, sizeof(TokSrc) * pp->sources_cap); 792 MacroMap_fini(&pp->mtab); 793 pp_xfree(pp, pp->hsets, sizeof(Hideset*) * pp->hsets_cap); 794 pp_xfree(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap); 795 pp_xfree(pp, pp->inc_dirs, sizeof(*pp->inc_dirs) * pp->inc_dirs_cap); 796 c_pool_free(pp->pool); 797 kit_arena_free(pp->arena); 798 h->free(h, pp, sizeof(*pp)); 799 } 800 801 void pp_push_input(Pp* pp, Lexer* lex) { 802 TokSrc s; 803 memset(&s, 0, sizeof(s)); 804 s.kind = SRC_LEX; 805 s.lex = lex; 806 src_push(pp, s); 807 } 808 809 void pp_add_include_dir(Pp* pp, const char* dir, int system) { 810 if (pp->ninc_dirs == pp->inc_dirs_cap) { 811 u32 nc = pp->inc_dirs_cap ? pp->inc_dirs_cap * 2 : 4; 812 pp->inc_dirs = 813 pp_xrealloc(pp, pp->inc_dirs, sizeof(*pp->inc_dirs) * pp->inc_dirs_cap, 814 sizeof(*pp->inc_dirs) * nc, _Alignof(void*)); 815 pp->inc_dirs_cap = nc; 816 } 817 pp->inc_dirs[pp->ninc_dirs].path = dir; 818 pp->inc_dirs[pp->ninc_dirs].system = (u8)(system ? 1 : 0); 819 ++pp->ninc_dirs; 820 } 821 822 void pp_define(Pp* pp, const char* name, const char* body) { 823 /* Stage 1+2: build a synthetic source line "name body\n" and run it 824 * through the lexer + define machinery so command-line -D matches the 825 * normal #define path. */ 826 size_t nlen = name ? kit_slice_cstr(name).len : 0; 827 size_t blen = body ? kit_slice_cstr(body).len : 0; 828 Heap* h = pp_heap(pp); 829 char* buf; 830 size_t pos = 0; 831 Lexer* lex; 832 Tok* line; 833 u32 lineN; 834 835 if (!name || !*name) return; 836 /* "name" + " " + "body" + "\n" */ 837 buf = (char*)h->alloc(h, nlen + 1 + blen + 1 + 1, 1); 838 memcpy(buf + pos, name, nlen); 839 pos += nlen; 840 buf[pos++] = ' '; 841 if (blen) { 842 memcpy(buf + pos, body, blen); 843 pos += blen; 844 } 845 buf[pos++] = '\n'; 846 buf[pos] = 0; 847 848 lex = lex_open_mem(pp->c, "<command-line>", buf, pos); 849 { 850 TokSrc s; 851 memset(&s, 0, sizeof(s)); 852 s.kind = SRC_LEX; 853 s.lex = lex; 854 src_push(pp, s); 855 } 856 read_directive_line(pp, &line, &lineN); 857 do_define(pp, line, lineN); 858 /* Drain anything trailing (shouldn't be any) and pop the lexer. */ 859 src_pop(pp); 860 h->free(h, buf, nlen + 1 + blen + 1 + 1); 861 } 862 863 void pp_undef(Pp* pp, const char* name) { 864 Sym s; 865 if (!name || !*name) return; 866 s = kit_sym_intern(pp->pool->c, kit_slice_cstr(name)); 867 mt_del(pp, s); 868 } 869 870 uint32_t pp_pack_alignment(const Pp* pp) { return pp ? pp->pack_align : 0; } 871 872 void pp_add_include_edge(Pp* pp, u32 includer, u32 included, SrcLoc include_loc, 873 int system) { 874 kit_source_add_include(pp->c, includer, included, include_loc, system); 875 }