kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit e37202a809c3df4009250b2820d444be4817a61f
parent ae82f93f8ba736c4ba664969e703abbc4c27cce6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 12 May 2026 16:44:12 -0700

cfree/cg.h and lang/toy

Diffstat:
MMakefile | 24++++++++++++++++++++----
Mdoc/LANGS.md | 33++++++++++++++++++++++++++++-----
Mdriver/cc.c | 10+++++-----
Mdriver/env.c | 13+++++++++++--
Mdriver/inputs.c | 3+--
Minclude/cfree.h | 35+++++++++++++++++++++++++++--------
Ainclude/cfree/cg.h | 311+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/toy/toy.c | 374+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/toy/toy.h | 9+++++++++
Msrc/api/lifecycle.c | 12++++++++++++
Msrc/api/pipeline.c | 22+++++++++++++++++++---
Msrc/core/core.h | 1+
12 files changed, 818 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile @@ -16,7 +16,8 @@ LIB_CFLAGS = $(CFLAGS_COMMON) -ffreestanding -Iinclude -Isrc # Driver: hosted CLI binary. Sees only the public include/ tree — that's # what makes the driver the first consumer of libcfree. -DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude +DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude -I. +LANG_CFLAGS = $(CFLAGS_COMMON) -Iinclude LIB_SRCS = $(shell find src -name '*.c') LIB_ASMS = $(shell find src -name '*.S') @@ -28,7 +29,12 @@ DRIVER_SRCS = $(wildcard driver/*.c) DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS)) DRIVER_DEPS = $(DRIVER_OBJS:.o=.d) +LANG_TOY_SRCS = $(wildcard lang/toy/*.c) +LANG_TOY_OBJS = $(patsubst lang/toy/%.c,build/lang/toy/%.o,$(LANG_TOY_SRCS)) +LANG_TOY_DEPS = $(LANG_TOY_OBJS:.o=.d) + LIB_AR = build/libcfree.a +LANG_TOY_AR = build/libcfree_toy.a BIN = build/cfree .PHONY: all lib bin format clean self self-stage2 @@ -46,8 +52,13 @@ $(LIB_AR): $(LIB_OBJS) @rm -f $@ ar rcs $@ $(LIB_OBJS) -$(BIN): $(DRIVER_OBJS) $(LIB_AR) - $(CC) $(HOST_SYSROOT_LDFLAGS) -o $@ $(DRIVER_OBJS) $(LIB_AR) +$(LANG_TOY_AR): $(LANG_TOY_OBJS) + @mkdir -p $(dir $@) + @rm -f $@ + ar rcs $@ $(LANG_TOY_OBJS) + +$(BIN): $(DRIVER_OBJS) $(LIB_AR) $(LANG_TOY_AR) + $(CC) $(HOST_SYSROOT_LDFLAGS) -o $@ $(DRIVER_OBJS) $(LIB_AR) $(LANG_TOY_AR) build/lib/%.o: src/%.c @mkdir -p $(dir $@) @@ -61,6 +72,10 @@ build/driver/%.o: driver/%.c @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) $(DEPFLAGS) -c $< -o $@ +build/lang/toy/%.o: lang/toy/%.c + @mkdir -p $(dir $@) + $(CC) $(LANG_CFLAGS) $(DEPFLAGS) -c $< -o $@ + include rt/Makefile # Self-host: build cfree with clang (stage 1), then rebuild libcfree.a + @@ -96,12 +111,13 @@ self-stage2: DEPFLAGS='' format: - find src include driver test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google + find src include driver lang test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google clean: rm -rf build -include $(LIB_DEPS) -include $(DRIVER_DEPS) +-include $(LANG_TOY_DEPS) include test/test.mk diff --git a/doc/LANGS.md b/doc/LANGS.md @@ -23,12 +23,12 @@ before touching the C frontend. ``` lang/ toy/ - lex.c — tokenizer (names, numbers, punctuation, keywords) - parse.c — recursive-descent parser → CfreeCg calls + lex.c — tokenizer: produces `ToyToken` structs with `CfreeSrcLoc` (line, col) + parse.c — recursive-descent parser that consumes a token iterator → CfreeCg calls type.c — toy type system: int, record, array, pointer type.h - toy.h — public frontend entry: cfree_toy_compile() - Makefile — produces libcfree_toy.a + toy.h — public frontend entry: `cfree_toy_compile()` + Makefile — produces `libcfree_toy.a` ``` `lang/` is a sibling of `driver/` and `src/`. It only includes `<cfree.h>` and @@ -281,6 +281,26 @@ unary_expr ::= ("-" | "!" | "&") unary_expr | primary primary ::= number | string | name | lvalue | "(" expr ")" ``` +### Token representation + +```c +typedef enum ToyTokenKind { TOK_EOF, TOK_FN, TOK_LET, TOK_IF, TOK_INT, + TOK_IDENT, TOK_NUMBER, TOK_STRING, ... } ToyTokenKind; + +typedef struct ToyToken { + ToyTokenKind kind; + CfreeSrcLoc loc; /* file_id, line, col */ + const uint8_t* text; /* points into source buffer */ + size_t text_len; + int64_t int_value; /* valid for TOK_NUMBER */ +} ToyToken; +``` + +The lexer tracks `cur`, `end`, `bol` (beginning-of-line), and `line` so that +every emitted token gets an accurate `CfreeSrcLoc`. The parser holds a +`ToyLexer` as its token iterator and calls `toy_lexer_next()` to advance, +keeping the current token in `parser->cur`. + ### Semantics - **One integer type**: `int` is a signed integer whose width equals the target @@ -299,7 +319,10 @@ primary ::= number | string | name | lvalue | "(" expr ")" ### Frontend pipeline -1. **Lex** (`lex.c`) — plain recursive descent with 1-char lookahead. +1. **Lex** (`lex.c`) — token iterator (`ToyLexer`) with 1-char lookahead. + Every token carries its kind, source span (`text`/`text_len`), `CfreeSrcLoc` + (line/col), and an `int_value` for number literals. The parser calls + `toy_lexer_next()` to advance the iterator and inspects the current `ToyToken`. 2. **Type check** (`type.c`) — minimal bidirectional inference: - `let` requires an explicit type (or an initializer from which to infer). - Every expression node carries a `ToyType*`. diff --git a/driver/cc.c b/driver/cc.c @@ -22,7 +22,7 @@ * -l name -L dir * -x c (no-op; rejected for any other language) * - (stdin source) - * .c/.cc/.cpp -> source; .o/.obj -> object inputs; .a -> archive inputs. + * .c/.cc -> source; .o/.obj -> object inputs; .a -> archive inputs. * * Library resolution (-lfoo against -L paths) happens here and produces * concrete archive paths for libcfree. */ @@ -81,7 +81,7 @@ typedef struct CcOptions { DriverCflags cf; /* Positional inputs split by suffix. */ - const char** source_files; /* .c/.cc/.cpp paths */ + const char** source_files; /* .c paths */ uint32_t nsource_files; CfreeBytesInput* source_memory; /* "-" stdin slurp */ uint32_t nsource_memory; @@ -150,7 +150,7 @@ void driver_help_cc(void) { "DESCRIPTION\n" " Compiles C11 sources and links them with .o/.a inputs. Inputs are\n" " classified by suffix:\n" - " .c .cc .cpp C source\n" + " .c C source\n" " .o .obj object file (link-time input)\n" " .a static archive (link-time input)\n" " - read C source from stdin (single source only)\n" @@ -384,8 +384,7 @@ static int cc_record_wl(CcOptions* o, const char* arg) { /* Suffix predicate: is `s` a recognized C source suffix? */ static int cc_is_c_source(const char* s) { - return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".cc") || - driver_has_suffix(s, ".cpp"); + return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".toy"); } /* Decimal uint64 parse for SOURCE_DATE_EPOCH. Stops at the first non-digit; @@ -945,6 +944,7 @@ static int cc_load_single_source(DriverEnv* env, const CfreeEnv* cenv, in->name = o->source_files[0]; in->data = fd->data; in->len = fd->size; + in->lang = cfree_language_for_path(o->source_files[0]); (void)env; return 0; } diff --git a/driver/env.c b/driver/env.c @@ -35,6 +35,7 @@ #include <libkern/OSCacheControl.h> #endif +#include "lang/toy/toy.h" #include "driver.h" /* Dual-mapping back-ends for strict W^X. Picks per-platform: @@ -139,7 +140,10 @@ void driver_diag_set_compiler(CfreeCompiler* c) { g_diag_active_compiler = c; } * file name rather than a bare numeric file_id. */ CfreeCompiler* driver_compiler_new(CfreeTarget t, const CfreeEnv* env) { CfreeCompiler* c = cfree_compiler_new(t, env); - if (c) driver_diag_set_compiler(c); + if (c) { + (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile); + driver_diag_set_compiler(c); + } return c; } @@ -151,7 +155,11 @@ void driver_compiler_free(CfreeCompiler* c) { CfreePipeline* driver_pipeline_new(CfreeTarget t, const CfreeEnv* env) { CfreePipeline* p = cfree_pipeline_new(t, env); - if (p) driver_diag_set_compiler(cfree_pipeline_compiler(p)); + if (p) { + CfreeCompiler* c = cfree_pipeline_compiler(p); + (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile); + driver_diag_set_compiler(c); + } return p; } @@ -1234,6 +1242,7 @@ int driver_load_bytes(const CfreeFileIO* io, const char* tool, const char* path, in->name = path; in->data = out->fd.data; in->len = out->fd.size; + in->lang = cfree_language_for_path(path); return 0; } diff --git a/driver/inputs.c b/driver/inputs.c @@ -57,8 +57,7 @@ static int inputs_record_stdin(DriverInputs* in) { int driver_inputs_classify(DriverInputs* in, const char* arg) { if (driver_streq(arg, "-")) return inputs_record_stdin(in); - if (driver_has_suffix(arg, ".c") || driver_has_suffix(arg, ".cc") || - driver_has_suffix(arg, ".cpp")) { + if (driver_has_suffix(arg, ".c") || driver_has_suffix(arg, ".toy")) { in->sources[in->nsources++] = arg; return 1; } diff --git a/include/cfree.h b/include/cfree.h @@ -21,6 +21,9 @@ typedef struct CfreeJit CfreeJit; typedef struct CfreeJitSession CfreeJitSession; typedef struct CfreeObjFile CfreeObjFile; typedef struct CfreeDebugInfo CfreeDebugInfo; +typedef struct CfreeBytesInput CfreeBytesInput; +typedef struct CfreeCompileOptions CfreeCompileOptions; +typedef uint32_t CfreeSym; /* ============================================================ * Source locations (carried in diagnostics) @@ -416,6 +419,12 @@ void cfree_compiler_free(CfreeCompiler*); * `path:line:col` instead of the bare numeric `file_id`. */ const char* cfree_compiler_file_name(CfreeCompiler*, uint32_t file_id); +/* Intern a string into the compiler's global symbol pool. The returned symbol + * is stable until cfree_compiler_free and may be passed through public APIs + * that traffic in pre-interned names. 0 is reserved for "no symbol"; this + * entry never returns 0 for a non-NULL string. */ +CfreeSym cfree_sym_intern(CfreeCompiler*, const char* str); + /* ============================================================ * Writer dispatch (inline) * ============================================================ @@ -696,24 +705,34 @@ typedef struct CfreeDefine { typedef enum CfreeLanguage { CFREE_LANG_C = 0, CFREE_LANG_ASM = 1, + CFREE_LANG_TOY = 2, + CFREE_LANG_COUNT = 3, } CfreeLanguage; +typedef int (*CfreeCompileFn)(CfreeCompiler*, const CfreeCompileOptions*, + const CfreeBytesInput*, CfreeObjBuilder* out); + +/* Register an out-of-core language frontend for this compiler instance. + * Built-in C and asm compilation remain available without registration. + * Passing NULL clears the slot. Returns nonzero on bad args. */ +int cfree_register_frontend(CfreeCompiler*, CfreeLanguage, CfreeCompileFn); + /* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects, * and archives. `name` is a diagnostic label (typically a path or pseudo- * path); the linker interns it on entry. `data` may be any byte-shaped * content. `lang` is consulted only by source-consuming entries; other * entries ignore it. */ -typedef struct CfreeBytesInput { +struct CfreeBytesInput { const char* name; const uint8_t* data; size_t len; CfreeLanguage lang; -} CfreeBytesInput; +}; -/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM; `.c`, - * `.cc`, `.cpp` and any other suffix (including a path with no suffix) -> - * CFREE_LANG_C. `.S` (preprocessed asm) is not recognized — drivers must - * preprocess first and submit the result as CFREE_LANG_ASM. */ +/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM, `.toy` -> + * CFREE_LANG_TOY, `.c`, `.cc`, `.cpp` and any other suffix (including a path + * with no suffix) -> CFREE_LANG_C. `.S` (preprocessed asm) is not recognized + * — drivers must preprocess first and submit the result as CFREE_LANG_ASM. */ CfreeLanguage cfree_language_for_path(const char* path); /* Preprocessor configuration shared by compile_* and the convenience run. */ @@ -737,7 +756,7 @@ typedef struct CfreePathPrefixMap { } CfreePathPrefixMap; /* Per-TU compile knobs. */ -typedef struct CfreeCompileOptions { +struct CfreeCompileOptions { int opt_level; /* 0 direct, 1 minimal, 2 full */ int debug_info; CfreePpOptions pp; @@ -759,7 +778,7 @@ typedef struct CfreeCompileOptions { * (N+1)th is not), and compile_* returns nonzero. */ int warnings_are_errors; uint32_t max_errors; -} CfreeCompileOptions; +}; /* Preprocess one C input. * diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -0,0 +1,311 @@ +#ifndef CFREE_PUBLIC_CG_H +#define CFREE_PUBLIC_CG_H + +#include <cfree.h> + +typedef struct CfreeCg CfreeCg; +typedef struct CfreeCgValue CfreeCgValue; + +typedef uint32_t CfreeCgLabel; +typedef uint32_t CfreeCgScope; +typedef uint32_t CfreeCgSlot; +typedef uint32_t CfreeCgTypeId; + +#define CFREE_CG_LABEL_NONE 0u +#define CFREE_CG_SCOPE_NONE 0u +#define CFREE_CG_TYPE_NONE 0u + +typedef struct CfreeCgBuiltinTypes { + CfreeCgTypeId void_; + CfreeCgTypeId bool_; + CfreeCgTypeId i8; + CfreeCgTypeId u8; + CfreeCgTypeId i16; + CfreeCgTypeId u16; + CfreeCgTypeId i32; + CfreeCgTypeId u32; + CfreeCgTypeId i64; + CfreeCgTypeId u64; + CfreeCgTypeId isize; + CfreeCgTypeId usize; + CfreeCgTypeId f32; + CfreeCgTypeId f64; +} CfreeCgBuiltinTypes; + +typedef enum CfreeCgTypeQual { + CFREE_CG_TQ_CONST = 1u << 0, + CFREE_CG_TQ_VOLATILE = 1u << 1, + CFREE_CG_TQ_RESTRICT = 1u << 2, +} CfreeCgTypeQual; + +typedef struct CfreeCgField { + CfreeSym name; /* 0 for anonymous fields/tuple elements */ + CfreeCgTypeId type; + uint32_t align_override; /* 0 = natural, 1 = packed, >1 explicit align */ +} CfreeCgField; + +typedef struct CfreeCgEnumValue { + CfreeSym name; + int64_t value; +} CfreeCgEnumValue; + +/* Builtin ids are stable for the compiler. All constructors below allocate a + * fresh user-facing type id; construct aliases/qualified types to describe + * distinct source-language identities over the same ABI layout. */ +CfreeCgBuiltinTypes cfree_cg_builtin_types(CfreeCompiler*); +CfreeCgTypeId cfree_cg_type_ptr(CfreeCompiler*, CfreeCgTypeId pointee); +CfreeCgTypeId cfree_cg_type_array(CfreeCompiler*, CfreeCgTypeId elem, + uint32_t count); +CfreeCgTypeId cfree_cg_type_qualified(CfreeCompiler*, CfreeCgTypeId base, + uint32_t quals); +CfreeCgTypeId cfree_cg_type_alias(CfreeCompiler*, CfreeSym name, + CfreeCgTypeId base); +CfreeCgTypeId cfree_cg_type_record(CfreeCompiler*, CfreeSym tag, int is_union, + const CfreeCgField* fields, + uint32_t nfields); +CfreeCgTypeId cfree_cg_type_enum(CfreeCompiler*, CfreeSym tag, + CfreeCgTypeId base, + const CfreeCgEnumValue* values, + uint32_t nvalues); +CfreeCgTypeId cfree_cg_type_func(CfreeCompiler*, CfreeCgTypeId ret, + const CfreeCgTypeId* params, uint32_t nparams, + int variadic); + +typedef enum CfreeCgVisibility { + CFREE_CG_VIS_DEFAULT, + CFREE_CG_VIS_HIDDEN, + CFREE_CG_VIS_PROTECTED, +} CfreeCgVisibility; + +typedef enum CfreeCgDeclFlag { + CFREE_CG_DECL_NONE = 0, + CFREE_CG_DECL_DEFINED = 1u << 0, + CFREE_CG_DECL_READONLY = 1u << 1, + CFREE_CG_DECL_TLS = 1u << 2, + CFREE_CG_DECL_COMMON = 1u << 3, + CFREE_CG_DECL_USED = 1u << 4, + CFREE_CG_DECL_NORETURN = 1u << 5, +} CfreeCgDeclFlag; + +typedef enum CfreeCgTlsModel { + CFREE_CG_TLS_DEFAULT, + CFREE_CG_TLS_LOCAL_EXEC, + CFREE_CG_TLS_INITIAL_EXEC, + CFREE_CG_TLS_LOCAL_DYNAMIC, + CFREE_CG_TLS_GENERAL_DYNAMIC, + CFREE_CG_TLS_TLVP, +} CfreeCgTlsModel; + +typedef struct CfreeCgDeclAttrs { + CfreeSymBind bind; + CfreeCgVisibility visibility; + CfreeCgTlsModel tls_model; + CfreeSym section; /* 0 = default section */ + uint32_t align; /* 0 = natural */ + uint32_t flags; /* CfreeCgDeclFlag */ +} CfreeCgDeclAttrs; + +typedef enum CfreeCgSymbolRefKind { + CFREE_CG_SYMREF_ADDR, + CFREE_CG_SYMREF_PCREL, + CFREE_CG_SYMREF_GOT, + CFREE_CG_SYMREF_PLT, + CFREE_CG_SYMREF_TLS_LE, + CFREE_CG_SYMREF_TLS_IE, + CFREE_CG_SYMREF_TLS_LD, + CFREE_CG_SYMREF_TLS_GD, + CFREE_CG_SYMREF_TLVP, +} CfreeCgSymbolRefKind; + +CfreeCg* cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out); +void cfree_cg_free(CfreeCg*); + +/* Sticky source location. Function, scope, local, param, instruction, and + * data-definition debug records use the current location. */ +void cfree_cg_set_loc(CfreeCg*, CfreeSrcLoc); + +void cfree_cg_func_decl(CfreeCg*, CfreeSym name, CfreeCgTypeId fn_type, + CfreeCgDeclAttrs attrs); +void cfree_cg_func_begin(CfreeCg*, CfreeSym name, CfreeCgTypeId fn_type, + CfreeCgDeclAttrs attrs); +void cfree_cg_func_end(CfreeCg*); + +/* Scope debug metadata is attached to the same nesting object used for + * structured control flow; pass CFREE_CG_TYPE_NONE for statement-only scopes. + * Break/continue are valid only for scopes the frontend treats as loop/block + * control-flow targets. */ +CfreeCgScope cfree_cg_scope_begin(CfreeCg*, CfreeCgTypeId result_type); +void cfree_cg_scope_end(CfreeCg*, CfreeCgScope); +void cfree_cg_break(CfreeCg*, CfreeCgScope); +void cfree_cg_break_true(CfreeCg*, CfreeCgScope); +void cfree_cg_break_false(CfreeCg*, CfreeCgScope); +void cfree_cg_continue(CfreeCg*, CfreeCgScope); +void cfree_cg_continue_true(CfreeCg*, CfreeCgScope); +void cfree_cg_continue_false(CfreeCg*, CfreeCgScope); + +CfreeCgSlot cfree_cg_local_slot(CfreeCg*, CfreeCgTypeId type, CfreeSym name); +CfreeCgSlot cfree_cg_param_slot(CfreeCg*, uint32_t index, CfreeCgTypeId type, + CfreeSym name); + +/* Dynamic stack allocation. Pops size in bytes and pushes result_ptr_type. + * `align` 0 means target default stack alignment. */ +void cfree_cg_alloca(CfreeCg*, CfreeCgTypeId result_ptr_type, uint32_t align); + +void cfree_cg_push_int(CfreeCg*, int64_t value, CfreeCgTypeId type); +void cfree_cg_push_float(CfreeCg*, double value, CfreeCgTypeId type); +/* Anonymous immutable bytes in rodata; pushes a pointer to the first byte. */ +void cfree_cg_push_bytes(CfreeCg*, const uint8_t* str, size_t len); +void cfree_cg_push_local(CfreeCg*, CfreeCgSlot slot); +void cfree_cg_push_symbol(CfreeCg*, CfreeSym name, CfreeCgTypeId type, + CfreeCgSymbolRefKind kind, int64_t addend); + +void cfree_cg_load(CfreeCg*); +void cfree_cg_addr(CfreeCg*); +void cfree_cg_store(CfreeCg*); + +void cfree_cg_dup(CfreeCg*); +void cfree_cg_swap(CfreeCg*); +void cfree_cg_drop(CfreeCg*); +void cfree_cg_rot3(CfreeCg*); + +typedef enum CfreeCgBinOp { + CFREE_CG_ADD, + CFREE_CG_SUB, + CFREE_CG_MUL, + CFREE_CG_SDIV, + CFREE_CG_UDIV, + CFREE_CG_SREM, + CFREE_CG_UREM, + CFREE_CG_AND, + CFREE_CG_OR, + CFREE_CG_XOR, + CFREE_CG_SHL, + CFREE_CG_SHR_S, + CFREE_CG_SHR_U, +} CfreeCgBinOp; + +typedef enum CfreeCgCmpOp { + CFREE_CG_EQ, + CFREE_CG_NE, + CFREE_CG_LT_S, + CFREE_CG_LE_S, + CFREE_CG_GT_S, + CFREE_CG_GE_S, + CFREE_CG_LT_U, + CFREE_CG_LE_U, + CFREE_CG_GT_U, + CFREE_CG_GE_U, +} CfreeCgCmpOp; + +void cfree_cg_binop(CfreeCg*, CfreeCgBinOp); +void cfree_cg_cmp(CfreeCg*, CfreeCgCmpOp); +void cfree_cg_convert(CfreeCg*, CfreeCgTypeId dst); + +typedef enum CfreeCgIntrinsic { + CFREE_CG_INTRIN_TRAP, + CFREE_CG_INTRIN_UNREACHABLE, + CFREE_CG_INTRIN_CLZ, + CFREE_CG_INTRIN_CTZ, + CFREE_CG_INTRIN_POPCOUNT, + CFREE_CG_INTRIN_BSWAP, + CFREE_CG_INTRIN_FRAME_ADDRESS, + CFREE_CG_INTRIN_RETURN_ADDRESS, +} CfreeCgIntrinsic; + +/* Pops nargs operands and pushes result_type unless result_type is + * CFREE_CG_TYPE_NONE or void. */ +void cfree_cg_intrinsic(CfreeCg*, CfreeCgIntrinsic, uint32_t nargs, + CfreeCgTypeId result_type); + +typedef enum CfreeCgAtomicOp { + CFREE_CG_ATOMIC_XCHG, + CFREE_CG_ATOMIC_ADD, + CFREE_CG_ATOMIC_SUB, + CFREE_CG_ATOMIC_AND, + CFREE_CG_ATOMIC_OR, + CFREE_CG_ATOMIC_XOR, + CFREE_CG_ATOMIC_NAND, +} CfreeCgAtomicOp; + +typedef enum CfreeCgMemOrder { + CFREE_CG_MO_RELAXED, + CFREE_CG_MO_CONSUME, + CFREE_CG_MO_ACQUIRE, + CFREE_CG_MO_RELEASE, + CFREE_CG_MO_ACQ_REL, + CFREE_CG_MO_SEQ_CST, +} CfreeCgMemOrder; + +void cfree_cg_atomic_load(CfreeCg*, CfreeCgMemOrder); +void cfree_cg_atomic_store(CfreeCg*, CfreeCgMemOrder); +void cfree_cg_atomic_rmw(CfreeCg*, CfreeCgAtomicOp, CfreeCgMemOrder); +/* Stack: [ptr, expected, desired] -> [prior, ok_i1]. */ +void cfree_cg_atomic_cmpxchg(CfreeCg*, CfreeCgMemOrder success, + CfreeCgMemOrder failure); +void cfree_cg_atomic_fence(CfreeCg*, CfreeCgMemOrder); + +typedef enum CfreeCgAsmDir { + CFREE_CG_ASM_IN, + CFREE_CG_ASM_OUT, + CFREE_CG_ASM_INOUT, +} CfreeCgAsmDir; + +typedef enum CfreeCgAsmFlag { + CFREE_CG_ASM_NONE = 0, + CFREE_CG_ASM_VOLATILE = 1u << 0, + CFREE_CG_ASM_GOTO = 1u << 1, +} CfreeCgAsmFlag; + +typedef struct CfreeCgAsmOperand { + CfreeSym constraint; /* interned GCC-style constraint string */ + CfreeSym name; /* interned symbolic operand name; 0 if absent */ + CfreeCgTypeId type; + uint8_t dir; /* CfreeCgAsmDir */ + uint8_t pad[3]; +} CfreeCgAsmOperand; + +/* Inputs are popped in declaration order. Outputs are pushed in declaration + * order as fresh values after the asm block. Template, constraints, and + * clobbers are pre-interned strings. */ +void cfree_cg_inline_asm(CfreeCg*, CfreeSym tmpl, + const CfreeCgAsmOperand* outputs, uint32_t noutputs, + const CfreeCgAsmOperand* inputs, uint32_t ninputs, + const CfreeSym* clobbers, uint32_t nclobbers, + uint32_t flags); + +CfreeCgLabel cfree_cg_label_new(CfreeCg*); +void cfree_cg_label_place(CfreeCg*, CfreeCgLabel); +void cfree_cg_jump(CfreeCg*, CfreeCgLabel); +void cfree_cg_branch_true(CfreeCg*, CfreeCgLabel); +void cfree_cg_branch_false(CfreeCg*, CfreeCgLabel); + +void cfree_cg_memcpy(CfreeCg*, uint32_t size, uint32_t align); +void cfree_cg_memset(CfreeCg*, uint8_t val, uint32_t size, uint32_t align); + +/* Computes base + offset + index * elemsz and pushes the element address. + * Stack is [base, index]. elemsz is inferred from the base pointer/array + * type; index may be a constant produced by cfree_cg_push_int. */ +void cfree_cg_index(CfreeCg*, uint32_t offset); + +/* Pops record base address and pushes the field address. Offset is inferred + * from the record type and field_index. */ +void cfree_cg_field_addr(CfreeCg*, uint32_t field_index); + +void cfree_cg_call(CfreeCg*, uint32_t nargs, CfreeCgTypeId fn_type); +void cfree_cg_tail_call(CfreeCg*, uint32_t nargs, CfreeCgTypeId fn_type); +void cfree_cg_ret(CfreeCg*); +void cfree_cg_ret_void(CfreeCg*); + +/* Global data definitions. Use data_symbol for address constants in data + * initializers; code references use push_symbol. */ +void cfree_cg_data_decl(CfreeCg*, CfreeSym name, CfreeCgTypeId type, + CfreeCgDeclAttrs attrs); +void cfree_cg_data_begin(CfreeCg*, CfreeSym name, CfreeCgTypeId type, + CfreeCgDeclAttrs attrs); +void cfree_cg_data_bytes(CfreeCg*, const uint8_t* data, size_t len); +void cfree_cg_data_zero(CfreeCg*, uint64_t size); +void cfree_cg_data_symbol(CfreeCg*, CfreeCgSymbolRefKind kind, CfreeSym target, + int64_t addend, uint32_t nbytes); +void cfree_cg_data_end(CfreeCg*); + +#endif diff --git a/lang/toy/toy.c b/lang/toy/toy.c @@ -0,0 +1,374 @@ +#include "toy.h" + +#include <cfree/cg.h> +#include <stddef.h> +#include <stdint.h> + +/* ============================================================ + * Lexer / token iterator + * ============================================================ */ + +typedef enum ToyTokenKind { + TOK_EOF = 0, + TOK_FN, + TOK_LET, + TOK_IF, + TOK_ELSE, + TOK_WHILE, + TOK_BREAK, + TOK_CONTINUE, + TOK_RETURN, + TOK_TYPE, + TOK_INT, + TOK_IDENT, + TOK_NUMBER, + TOK_STRING, + TOK_LPAREN, + TOK_RPAREN, + TOK_LBRACE, + TOK_RBRACE, + TOK_LBRACKET, + TOK_RBRACKET, + TOK_COMMA, + TOK_SEMI, + TOK_COLON, + TOK_EQ, + TOK_PLUS, + TOK_MINUS, + TOK_STAR, + TOK_SLASH, + TOK_PERCENT, + TOK_LT, + TOK_GT, + TOK_LE, + TOK_GE, + TOK_EQEQ, + TOK_NE, + TOK_ANDAND, + TOK_PIPEPIPE, + TOK_BANG, + TOK_AMPERSAND, + TOK_DOT, + TOK_DOTSTAR, +} ToyTokenKind; + +typedef struct ToyToken { + ToyTokenKind kind; + CfreeSrcLoc loc; /* line/col within the source buffer */ + const uint8_t* text; /* points into source buffer */ + size_t text_len; + int64_t int_value; /* valid when kind == TOK_NUMBER */ +} ToyToken; + +typedef struct ToyLexer { + const uint8_t* cur; + const uint8_t* end; + const uint8_t* bol; /* beginning of current line */ + uint32_t line; +} ToyLexer; + +static void toy_lexer_init(ToyLexer* lex, const uint8_t* data, size_t len) { + lex->cur = data; + lex->end = data + len; + lex->bol = data; + lex->line = 1; +} + +static int toy_is_space(uint8_t c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || + c == '\v'; +} + +static int toy_is_digit(uint8_t c) { return c >= '0' && c <= '9'; } + +static int toy_is_alpha(uint8_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static int toy_is_alnum(uint8_t c) { + return toy_is_alpha(c) || toy_is_digit(c); +} + +static void toy_lexer_advance_line(ToyLexer* lex) { + lex->bol = lex->cur + 1; + lex->line++; +} + +static void toy_skip_ws(ToyLexer* lex) { + while (lex->cur < lex->end && toy_is_space(*lex->cur)) { + if (*lex->cur == '\n') toy_lexer_advance_line(lex); + lex->cur++; + } +} + +static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind, + const uint8_t* start) { + ToyToken tok; + tok.kind = kind; + tok.loc.file_id = 0; + tok.loc.line = lex->line; + tok.loc.col = (uint32_t)(start - lex->bol) + 1; + tok.text = start; + tok.text_len = (size_t)(lex->cur - start); + tok.int_value = 0; + return tok; +} + +/* Return the next token from the iterator. Always makes forward progress. */ +static ToyToken toy_lexer_next(ToyLexer* lex) { + const uint8_t* start; + ToyToken tok; + + toy_skip_ws(lex); + start = lex->cur; + if (lex->cur >= lex->end) { + tok.kind = TOK_EOF; + tok.loc.file_id = 0; + tok.loc.line = lex->line; + tok.loc.col = (uint32_t)(start - lex->bol) + 1; + tok.text = start; + tok.text_len = 0; + tok.int_value = 0; + return tok; + } + + uint8_t c = *lex->cur++; + + /* Single-char tokens and multi-char operators */ + switch (c) { + case '(': + return toy_lexer_emit(lex, TOK_LPAREN, start); + case ')': + return toy_lexer_emit(lex, TOK_RPAREN, start); + case '{': + return toy_lexer_emit(lex, TOK_LBRACE, start); + case '}': + return toy_lexer_emit(lex, TOK_RBRACE, start); + case '[': + return toy_lexer_emit(lex, TOK_LBRACKET, start); + case ']': + return toy_lexer_emit(lex, TOK_RBRACKET, start); + case ',': + return toy_lexer_emit(lex, TOK_COMMA, start); + case ';': + return toy_lexer_emit(lex, TOK_SEMI, start); + case ':': + return toy_lexer_emit(lex, TOK_COLON, start); + case '+': + return toy_lexer_emit(lex, TOK_PLUS, start); + case '*': + return toy_lexer_emit(lex, TOK_STAR, start); + case '/': + return toy_lexer_emit(lex, TOK_SLASH, start); + case '%': + return toy_lexer_emit(lex, TOK_PERCENT, start); + case '&': + if (lex->cur < lex->end && *lex->cur == '&') { + lex->cur++; + return toy_lexer_emit(lex, TOK_ANDAND, start); + } + return toy_lexer_emit(lex, TOK_AMPERSAND, start); + case '|': + if (lex->cur < lex->end && *lex->cur == '|') { + lex->cur++; + return toy_lexer_emit(lex, TOK_PIPEPIPE, start); + } + break; + case '=': + if (lex->cur < lex->end && *lex->cur == '=') { + lex->cur++; + return toy_lexer_emit(lex, TOK_EQEQ, start); + } + return toy_lexer_emit(lex, TOK_EQ, start); + case '!': + if (lex->cur < lex->end && *lex->cur == '=') { + lex->cur++; + return toy_lexer_emit(lex, TOK_NE, start); + } + return toy_lexer_emit(lex, TOK_BANG, start); + case '<': + if (lex->cur < lex->end && *lex->cur == '=') { + lex->cur++; + return toy_lexer_emit(lex, TOK_LE, start); + } + return toy_lexer_emit(lex, TOK_LT, start); + case '>': + if (lex->cur < lex->end && *lex->cur == '=') { + lex->cur++; + return toy_lexer_emit(lex, TOK_GE, start); + } + return toy_lexer_emit(lex, TOK_GT, start); + case '-': + return toy_lexer_emit(lex, TOK_MINUS, start); + case '.': + if (lex->cur < lex->end && *lex->cur == '*') { + lex->cur++; + return toy_lexer_emit(lex, TOK_DOTSTAR, start); + } + return toy_lexer_emit(lex, TOK_DOT, start); + } + + /* Number literal */ + if (toy_is_digit(c)) { + int64_t v = (int64_t)(c - '0'); + while (lex->cur < lex->end && toy_is_digit(*lex->cur)) { + v = v * 10 + (int64_t)(*lex->cur - '0'); + lex->cur++; + } + tok = toy_lexer_emit(lex, TOK_NUMBER, start); + tok.int_value = v; + return tok; + } + + /* Identifier / keyword */ + if (toy_is_alpha(c)) { + while (lex->cur < lex->end && toy_is_alnum(*lex->cur)) lex->cur++; + size_t len = (size_t)(lex->cur - start); + ToyTokenKind kind = TOK_IDENT; + if (len == 2 && start[0] == 'f' && start[1] == 'n') + kind = TOK_FN; + else if (len == 3 && start[0] == 'i' && start[1] == 'n' && start[2] == 't') + kind = TOK_INT; + else if (len == 3 && start[0] == 'l' && start[1] == 'e' && start[2] == 't') + kind = TOK_LET; + else if (len == 2 && start[0] == 'i' && start[1] == 'f') + kind = TOK_IF; + else if (len == 4 && start[0] == 'e' && start[1] == 'l' && + start[2] == 's' && start[3] == 'e') + kind = TOK_ELSE; + else if (len == 5 && start[0] == 'w' && start[1] == 'h' && + start[2] == 'i' && start[3] == 'l' && start[4] == 'e') + kind = TOK_WHILE; + else if (len == 5 && start[0] == 'b' && start[1] == 'r' && + start[2] == 'e' && start[3] == 'a' && start[4] == 'k') + kind = TOK_BREAK; + else if (len == 8 && start[0] == 'c' && start[1] == 'o' && + start[2] == 'n' && start[3] == 't' && start[4] == 'i' && + start[5] == 'n' && start[6] == 'u' && start[7] == 'e') + kind = TOK_CONTINUE; + else if (len == 6 && start[0] == 'r' && start[1] == 'e' && + start[2] == 't' && start[3] == 'u' && start[4] == 'r' && + start[5] == 'n') + kind = TOK_RETURN; + else if (len == 4 && start[0] == 't' && start[1] == 'y' && + start[2] == 'p' && start[3] == 'e') + kind = TOK_TYPE; + return toy_lexer_emit(lex, kind, start); + } + + /* String literal */ + if (c == '"') { + while (lex->cur < lex->end && *lex->cur != '"') { + if (*lex->cur == '\n') toy_lexer_advance_line(lex); + lex->cur++; + } + if (lex->cur < lex->end && *lex->cur == '"') lex->cur++; + return toy_lexer_emit(lex, TOK_STRING, start); + } + + /* Unknown character */ + return toy_lexer_emit(lex, TOK_EOF, start); +} + +/* ============================================================ + * Parser (consumes token iterator) + * ============================================================ */ + +typedef struct ToyParser { + ToyLexer lex; + ToyToken cur; /* current token under inspection */ + int64_t value; /* last parsed integer literal */ +} ToyParser; + +static void toy_parser_init(ToyParser* p, const uint8_t* data, size_t len) { + toy_lexer_init(&p->lex, data, len); + p->cur = toy_lexer_next(&p->lex); + p->value = 0; +} + +static void toy_parser_advance(ToyParser* p) { + p->cur = toy_lexer_next(&p->lex); +} + +static int toy_parser_match(ToyParser* p, ToyTokenKind kind) { + if (p->cur.kind == kind) { + toy_parser_advance(p); + return 1; + } + return 0; +} + +static int toy_parser_expect(ToyParser* p, ToyTokenKind kind) { + if (p->cur.kind == kind) { + toy_parser_advance(p); + return 1; + } + return 0; +} + +static int toy_parser_at_end(ToyParser* p) { return p->cur.kind == TOK_EOF; } + +static int toy_parse_main(ToyParser* p) { + if (!toy_parser_match(p, TOK_FN)) return 0; + if (p->cur.kind != TOK_IDENT || p->cur.text_len != 4 || + p->cur.text[0] != 'm' || p->cur.text[1] != 'a' || p->cur.text[2] != 'i' || + p->cur.text[3] != 'n') + return 0; + toy_parser_advance(p); + if (!toy_parser_expect(p, TOK_LPAREN)) return 0; + if (!toy_parser_expect(p, TOK_RPAREN)) return 0; + if (toy_parser_match(p, TOK_COLON)) { + if (!toy_parser_match(p, TOK_INT)) return 0; + } + if (!toy_parser_expect(p, TOK_LBRACE)) return 0; + if (!toy_parser_match(p, TOK_RETURN)) return 0; + + /* Optional unary minus for negative return values */ + int neg = 0; + if (p->cur.kind == TOK_MINUS) { + neg = 1; + toy_parser_advance(p); + } + if (p->cur.kind != TOK_NUMBER) return 0; + p->value = neg ? -p->cur.int_value : p->cur.int_value; + toy_parser_advance(p); + + if (!toy_parser_expect(p, TOK_SEMI)) return 0; + if (!toy_parser_expect(p, TOK_RBRACE)) return 0; + return toy_parser_at_end(p); +} + +int cfree_toy_compile(CfreeCompiler* c, const CfreeCompileOptions* opts, + const CfreeBytesInput* input, CfreeObjBuilder* out) { + ToyParser p; + const uint8_t* source; + CfreeCg* cg; + CfreeCgBuiltinTypes types; + CfreeCgTypeId fn_ty; + CfreeSym main_sym; + CfreeCgDeclAttrs main_attrs; + + (void)opts; + if (!c || !input || !out) return 1; + + source = input->data ? input->data : (const uint8_t*)""; + toy_parser_init(&p, source, input->len); + if (!toy_parse_main(&p)) return 1; + + main_attrs.bind = CFREE_SB_GLOBAL; + main_attrs.visibility = CFREE_CG_VIS_DEFAULT; + main_attrs.tls_model = CFREE_CG_TLS_DEFAULT; + main_attrs.section = 0; + main_attrs.align = 0; + main_attrs.flags = CFREE_CG_DECL_DEFINED; + main_sym = cfree_sym_intern(c, "main"); + cg = cfree_cg_new(c, out); + types = cfree_cg_builtin_types(c); + fn_ty = cfree_cg_type_func(c, types.i32, NULL, 0, 0); + cfree_cg_func_begin(cg, main_sym, fn_ty, main_attrs); + cfree_cg_push_int(cg, p.value, types.i32); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + cfree_cg_free(cg); + return 0; +} diff --git a/lang/toy/toy.h b/lang/toy/toy.h @@ -0,0 +1,9 @@ +#ifndef CFREE_TOY_H +#define CFREE_TOY_H + +#include <cfree.h> + +int cfree_toy_compile(CfreeCompiler*, const CfreeCompileOptions*, + const CfreeBytesInput* input, CfreeObjBuilder* out); + +#endif diff --git a/src/api/lifecycle.c b/src/api/lifecycle.c @@ -35,3 +35,15 @@ const char* cfree_compiler_file_name(CfreeCompiler* c, uint32_t file_id) { if (!f) return NULL; return pool_str(c->global, f->name, NULL); } + +CfreeSym cfree_sym_intern(CfreeCompiler* c, const char* str) { + if (!c || !str) return 0; + return pool_intern_cstr(c->global, str); +} + +int cfree_register_frontend(CfreeCompiler* c, CfreeLanguage lang, + CfreeCompileFn fn) { + if (!c || lang >= CFREE_LANG_COUNT) return 1; + c->frontends[lang] = fn; + return 0; +} diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -201,9 +201,23 @@ int cfree_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input, * feeds tokens straight to the assembler. */ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, const CfreeBytesInput* input, ObjBuilder* ob) { - Lexer* lex = - lex_open_mem(c, input->name, (const char*)input->data, input->len); - MCEmitter* mc = mc_new(c, ob); + CfreeCompileFn frontend = NULL; + Lexer* lex; + MCEmitter* mc; + + if (input->lang < CFREE_LANG_COUNT) { + frontend = c->frontends[input->lang]; + } + if (frontend) { + if (frontend(c, opts, input, ob) != 0) { + compiler_panic(c, no_loc(), "frontend failed for input: %s", input->name); + } + obj_finalize(ob); + return; + } + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + mc = mc_new(c, ob); if (input->lang == CFREE_LANG_ASM) { /* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */ @@ -264,6 +278,8 @@ CfreeLanguage cfree_language_for_path(const char* path) { if (path[i] == '.') { const char* ext = path + i + 1; if (ext[0] == 's' && ext[1] == '\0') return CFREE_LANG_ASM; + if (ext[0] == 't' && ext[1] == 'o' && ext[2] == 'y' && ext[3] == '\0') + return CFREE_LANG_TOY; return CFREE_LANG_C; } } diff --git a/src/core/core.h b/src/core/core.h @@ -125,6 +125,7 @@ struct CfreeCompiler { TargetABI* abi; Target target; CompilerCleanup* cleanup; /* top of LIFO cleanup stack */ + CfreeCompileFn frontends[CFREE_LANG_COUNT]; void* reserved; };