commit e37202a809c3df4009250b2820d444be4817a61f
parent ae82f93f8ba736c4ba664969e703abbc4c27cce6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 12 May 2026 16:44:12 -0700
cfree/cg.h and lang/toy
Diffstat:
12 files changed, 818 insertions(+), 29 deletions(-)
diff --git a/Makefile b/Makefile
@@ -16,7 +16,8 @@ LIB_CFLAGS = $(CFLAGS_COMMON) -ffreestanding -Iinclude -Isrc
# Driver: hosted CLI binary. Sees only the public include/ tree — that's
# what makes the driver the first consumer of libcfree.
-DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude
+DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude -I.
+LANG_CFLAGS = $(CFLAGS_COMMON) -Iinclude
LIB_SRCS = $(shell find src -name '*.c')
LIB_ASMS = $(shell find src -name '*.S')
@@ -28,7 +29,12 @@ DRIVER_SRCS = $(wildcard driver/*.c)
DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS))
DRIVER_DEPS = $(DRIVER_OBJS:.o=.d)
+LANG_TOY_SRCS = $(wildcard lang/toy/*.c)
+LANG_TOY_OBJS = $(patsubst lang/toy/%.c,build/lang/toy/%.o,$(LANG_TOY_SRCS))
+LANG_TOY_DEPS = $(LANG_TOY_OBJS:.o=.d)
+
LIB_AR = build/libcfree.a
+LANG_TOY_AR = build/libcfree_toy.a
BIN = build/cfree
.PHONY: all lib bin format clean self self-stage2
@@ -46,8 +52,13 @@ $(LIB_AR): $(LIB_OBJS)
@rm -f $@
ar rcs $@ $(LIB_OBJS)
-$(BIN): $(DRIVER_OBJS) $(LIB_AR)
- $(CC) $(HOST_SYSROOT_LDFLAGS) -o $@ $(DRIVER_OBJS) $(LIB_AR)
+$(LANG_TOY_AR): $(LANG_TOY_OBJS)
+ @mkdir -p $(dir $@)
+ @rm -f $@
+ ar rcs $@ $(LANG_TOY_OBJS)
+
+$(BIN): $(DRIVER_OBJS) $(LIB_AR) $(LANG_TOY_AR)
+ $(CC) $(HOST_SYSROOT_LDFLAGS) -o $@ $(DRIVER_OBJS) $(LIB_AR) $(LANG_TOY_AR)
build/lib/%.o: src/%.c
@mkdir -p $(dir $@)
@@ -61,6 +72,10 @@ build/driver/%.o: driver/%.c
@mkdir -p $(dir $@)
$(CC) $(DRIVER_CFLAGS) $(DEPFLAGS) -c $< -o $@
+build/lang/toy/%.o: lang/toy/%.c
+ @mkdir -p $(dir $@)
+ $(CC) $(LANG_CFLAGS) $(DEPFLAGS) -c $< -o $@
+
include rt/Makefile
# Self-host: build cfree with clang (stage 1), then rebuild libcfree.a +
@@ -96,12 +111,13 @@ self-stage2:
DEPFLAGS=''
format:
- find src include driver test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google
+ find src include driver lang test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google
clean:
rm -rf build
-include $(LIB_DEPS)
-include $(DRIVER_DEPS)
+-include $(LANG_TOY_DEPS)
include test/test.mk
diff --git a/doc/LANGS.md b/doc/LANGS.md
@@ -23,12 +23,12 @@ before touching the C frontend.
```
lang/
toy/
- lex.c — tokenizer (names, numbers, punctuation, keywords)
- parse.c — recursive-descent parser → CfreeCg calls
+ lex.c — tokenizer: produces `ToyToken` structs with `CfreeSrcLoc` (line, col)
+ parse.c — recursive-descent parser that consumes a token iterator → CfreeCg calls
type.c — toy type system: int, record, array, pointer
type.h
- toy.h — public frontend entry: cfree_toy_compile()
- Makefile — produces libcfree_toy.a
+ toy.h — public frontend entry: `cfree_toy_compile()`
+ Makefile — produces `libcfree_toy.a`
```
`lang/` is a sibling of `driver/` and `src/`. It only includes `<cfree.h>` and
@@ -281,6 +281,26 @@ unary_expr ::= ("-" | "!" | "&") unary_expr | primary
primary ::= number | string | name | lvalue | "(" expr ")"
```
+### Token representation
+
+```c
+typedef enum ToyTokenKind { TOK_EOF, TOK_FN, TOK_LET, TOK_IF, TOK_INT,
+ TOK_IDENT, TOK_NUMBER, TOK_STRING, ... } ToyTokenKind;
+
+typedef struct ToyToken {
+ ToyTokenKind kind;
+ CfreeSrcLoc loc; /* file_id, line, col */
+ const uint8_t* text; /* points into source buffer */
+ size_t text_len;
+ int64_t int_value; /* valid for TOK_NUMBER */
+} ToyToken;
+```
+
+The lexer tracks `cur`, `end`, `bol` (beginning-of-line), and `line` so that
+every emitted token gets an accurate `CfreeSrcLoc`. The parser holds a
+`ToyLexer` as its token iterator and calls `toy_lexer_next()` to advance,
+keeping the current token in `parser->cur`.
+
### Semantics
- **One integer type**: `int` is a signed integer whose width equals the target
@@ -299,7 +319,10 @@ primary ::= number | string | name | lvalue | "(" expr ")"
### Frontend pipeline
-1. **Lex** (`lex.c`) — plain recursive descent with 1-char lookahead.
+1. **Lex** (`lex.c`) — token iterator (`ToyLexer`) with 1-char lookahead.
+ Every token carries its kind, source span (`text`/`text_len`), `CfreeSrcLoc`
+ (line/col), and an `int_value` for number literals. The parser calls
+ `toy_lexer_next()` to advance the iterator and inspects the current `ToyToken`.
2. **Type check** (`type.c`) — minimal bidirectional inference:
- `let` requires an explicit type (or an initializer from which to infer).
- Every expression node carries a `ToyType*`.
diff --git a/driver/cc.c b/driver/cc.c
@@ -22,7 +22,7 @@
* -l name -L dir
* -x c (no-op; rejected for any other language)
* - (stdin source)
- * .c/.cc/.cpp -> source; .o/.obj -> object inputs; .a -> archive inputs.
+ * .c/.cc -> source; .o/.obj -> object inputs; .a -> archive inputs.
*
* Library resolution (-lfoo against -L paths) happens here and produces
* concrete archive paths for libcfree. */
@@ -81,7 +81,7 @@ typedef struct CcOptions {
DriverCflags cf;
/* Positional inputs split by suffix. */
- const char** source_files; /* .c/.cc/.cpp paths */
+ const char** source_files; /* .c paths */
uint32_t nsource_files;
CfreeBytesInput* source_memory; /* "-" stdin slurp */
uint32_t nsource_memory;
@@ -150,7 +150,7 @@ void driver_help_cc(void) {
"DESCRIPTION\n"
" Compiles C11 sources and links them with .o/.a inputs. Inputs are\n"
" classified by suffix:\n"
- " .c .cc .cpp C source\n"
+ " .c C source\n"
" .o .obj object file (link-time input)\n"
" .a static archive (link-time input)\n"
" - read C source from stdin (single source only)\n"
@@ -384,8 +384,7 @@ static int cc_record_wl(CcOptions* o, const char* arg) {
/* Suffix predicate: is `s` a recognized C source suffix? */
static int cc_is_c_source(const char* s) {
- return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".cc") ||
- driver_has_suffix(s, ".cpp");
+ return driver_has_suffix(s, ".c") || driver_has_suffix(s, ".toy");
}
/* Decimal uint64 parse for SOURCE_DATE_EPOCH. Stops at the first non-digit;
@@ -945,6 +944,7 @@ static int cc_load_single_source(DriverEnv* env, const CfreeEnv* cenv,
in->name = o->source_files[0];
in->data = fd->data;
in->len = fd->size;
+ in->lang = cfree_language_for_path(o->source_files[0]);
(void)env;
return 0;
}
diff --git a/driver/env.c b/driver/env.c
@@ -35,6 +35,7 @@
#include <libkern/OSCacheControl.h>
#endif
+#include "lang/toy/toy.h"
#include "driver.h"
/* Dual-mapping back-ends for strict W^X. Picks per-platform:
@@ -139,7 +140,10 @@ void driver_diag_set_compiler(CfreeCompiler* c) { g_diag_active_compiler = c; }
* file name rather than a bare numeric file_id. */
CfreeCompiler* driver_compiler_new(CfreeTarget t, const CfreeEnv* env) {
CfreeCompiler* c = cfree_compiler_new(t, env);
- if (c) driver_diag_set_compiler(c);
+ if (c) {
+ (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile);
+ driver_diag_set_compiler(c);
+ }
return c;
}
@@ -151,7 +155,11 @@ void driver_compiler_free(CfreeCompiler* c) {
CfreePipeline* driver_pipeline_new(CfreeTarget t, const CfreeEnv* env) {
CfreePipeline* p = cfree_pipeline_new(t, env);
- if (p) driver_diag_set_compiler(cfree_pipeline_compiler(p));
+ if (p) {
+ CfreeCompiler* c = cfree_pipeline_compiler(p);
+ (void)cfree_register_frontend(c, CFREE_LANG_TOY, cfree_toy_compile);
+ driver_diag_set_compiler(c);
+ }
return p;
}
@@ -1234,6 +1242,7 @@ int driver_load_bytes(const CfreeFileIO* io, const char* tool, const char* path,
in->name = path;
in->data = out->fd.data;
in->len = out->fd.size;
+ in->lang = cfree_language_for_path(path);
return 0;
}
diff --git a/driver/inputs.c b/driver/inputs.c
@@ -57,8 +57,7 @@ static int inputs_record_stdin(DriverInputs* in) {
int driver_inputs_classify(DriverInputs* in, const char* arg) {
if (driver_streq(arg, "-")) return inputs_record_stdin(in);
- if (driver_has_suffix(arg, ".c") || driver_has_suffix(arg, ".cc") ||
- driver_has_suffix(arg, ".cpp")) {
+ if (driver_has_suffix(arg, ".c") || driver_has_suffix(arg, ".toy")) {
in->sources[in->nsources++] = arg;
return 1;
}
diff --git a/include/cfree.h b/include/cfree.h
@@ -21,6 +21,9 @@ typedef struct CfreeJit CfreeJit;
typedef struct CfreeJitSession CfreeJitSession;
typedef struct CfreeObjFile CfreeObjFile;
typedef struct CfreeDebugInfo CfreeDebugInfo;
+typedef struct CfreeBytesInput CfreeBytesInput;
+typedef struct CfreeCompileOptions CfreeCompileOptions;
+typedef uint32_t CfreeSym;
/* ============================================================
* Source locations (carried in diagnostics)
@@ -416,6 +419,12 @@ void cfree_compiler_free(CfreeCompiler*);
* `path:line:col` instead of the bare numeric `file_id`. */
const char* cfree_compiler_file_name(CfreeCompiler*, uint32_t file_id);
+/* Intern a string into the compiler's global symbol pool. The returned symbol
+ * is stable until cfree_compiler_free and may be passed through public APIs
+ * that traffic in pre-interned names. 0 is reserved for "no symbol"; this
+ * entry never returns 0 for a non-NULL string. */
+CfreeSym cfree_sym_intern(CfreeCompiler*, const char* str);
+
/* ============================================================
* Writer dispatch (inline)
* ============================================================
@@ -696,24 +705,34 @@ typedef struct CfreeDefine {
typedef enum CfreeLanguage {
CFREE_LANG_C = 0,
CFREE_LANG_ASM = 1,
+ CFREE_LANG_TOY = 2,
+ CFREE_LANG_COUNT = 3,
} CfreeLanguage;
+typedef int (*CfreeCompileFn)(CfreeCompiler*, const CfreeCompileOptions*,
+ const CfreeBytesInput*, CfreeObjBuilder* out);
+
+/* Register an out-of-core language frontend for this compiler instance.
+ * Built-in C and asm compilation remain available without registration.
+ * Passing NULL clears the slot. Returns nonzero on bad args. */
+int cfree_register_frontend(CfreeCompiler*, CfreeLanguage, CfreeCompileFn);
+
/* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects,
* and archives. `name` is a diagnostic label (typically a path or pseudo-
* path); the linker interns it on entry. `data` may be any byte-shaped
* content. `lang` is consulted only by source-consuming entries; other
* entries ignore it. */
-typedef struct CfreeBytesInput {
+struct CfreeBytesInput {
const char* name;
const uint8_t* data;
size_t len;
CfreeLanguage lang;
-} CfreeBytesInput;
+};
-/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM; `.c`,
- * `.cc`, `.cpp` and any other suffix (including a path with no suffix) ->
- * CFREE_LANG_C. `.S` (preprocessed asm) is not recognized — drivers must
- * preprocess first and submit the result as CFREE_LANG_ASM. */
+/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM, `.toy` ->
+ * CFREE_LANG_TOY, `.c`, `.cc`, `.cpp` and any other suffix (including a path
+ * with no suffix) -> CFREE_LANG_C. `.S` (preprocessed asm) is not recognized
+ * — drivers must preprocess first and submit the result as CFREE_LANG_ASM. */
CfreeLanguage cfree_language_for_path(const char* path);
/* Preprocessor configuration shared by compile_* and the convenience run. */
@@ -737,7 +756,7 @@ typedef struct CfreePathPrefixMap {
} CfreePathPrefixMap;
/* Per-TU compile knobs. */
-typedef struct CfreeCompileOptions {
+struct CfreeCompileOptions {
int opt_level; /* 0 direct, 1 minimal, 2 full */
int debug_info;
CfreePpOptions pp;
@@ -759,7 +778,7 @@ typedef struct CfreeCompileOptions {
* (N+1)th is not), and compile_* returns nonzero. */
int warnings_are_errors;
uint32_t max_errors;
-} CfreeCompileOptions;
+};
/* Preprocess one C input.
*
diff --git a/include/cfree/cg.h b/include/cfree/cg.h
@@ -0,0 +1,311 @@
+#ifndef CFREE_PUBLIC_CG_H
+#define CFREE_PUBLIC_CG_H
+
+#include <cfree.h>
+
+typedef struct CfreeCg CfreeCg;
+typedef struct CfreeCgValue CfreeCgValue;
+
+typedef uint32_t CfreeCgLabel;
+typedef uint32_t CfreeCgScope;
+typedef uint32_t CfreeCgSlot;
+typedef uint32_t CfreeCgTypeId;
+
+#define CFREE_CG_LABEL_NONE 0u
+#define CFREE_CG_SCOPE_NONE 0u
+#define CFREE_CG_TYPE_NONE 0u
+
+typedef struct CfreeCgBuiltinTypes {
+ CfreeCgTypeId void_;
+ CfreeCgTypeId bool_;
+ CfreeCgTypeId i8;
+ CfreeCgTypeId u8;
+ CfreeCgTypeId i16;
+ CfreeCgTypeId u16;
+ CfreeCgTypeId i32;
+ CfreeCgTypeId u32;
+ CfreeCgTypeId i64;
+ CfreeCgTypeId u64;
+ CfreeCgTypeId isize;
+ CfreeCgTypeId usize;
+ CfreeCgTypeId f32;
+ CfreeCgTypeId f64;
+} CfreeCgBuiltinTypes;
+
+typedef enum CfreeCgTypeQual {
+ CFREE_CG_TQ_CONST = 1u << 0,
+ CFREE_CG_TQ_VOLATILE = 1u << 1,
+ CFREE_CG_TQ_RESTRICT = 1u << 2,
+} CfreeCgTypeQual;
+
+typedef struct CfreeCgField {
+ CfreeSym name; /* 0 for anonymous fields/tuple elements */
+ CfreeCgTypeId type;
+ uint32_t align_override; /* 0 = natural, 1 = packed, >1 explicit align */
+} CfreeCgField;
+
+typedef struct CfreeCgEnumValue {
+ CfreeSym name;
+ int64_t value;
+} CfreeCgEnumValue;
+
+/* Builtin ids are stable for the compiler. All constructors below allocate a
+ * fresh user-facing type id; construct aliases/qualified types to describe
+ * distinct source-language identities over the same ABI layout. */
+CfreeCgBuiltinTypes cfree_cg_builtin_types(CfreeCompiler*);
+CfreeCgTypeId cfree_cg_type_ptr(CfreeCompiler*, CfreeCgTypeId pointee);
+CfreeCgTypeId cfree_cg_type_array(CfreeCompiler*, CfreeCgTypeId elem,
+ uint32_t count);
+CfreeCgTypeId cfree_cg_type_qualified(CfreeCompiler*, CfreeCgTypeId base,
+ uint32_t quals);
+CfreeCgTypeId cfree_cg_type_alias(CfreeCompiler*, CfreeSym name,
+ CfreeCgTypeId base);
+CfreeCgTypeId cfree_cg_type_record(CfreeCompiler*, CfreeSym tag, int is_union,
+ const CfreeCgField* fields,
+ uint32_t nfields);
+CfreeCgTypeId cfree_cg_type_enum(CfreeCompiler*, CfreeSym tag,
+ CfreeCgTypeId base,
+ const CfreeCgEnumValue* values,
+ uint32_t nvalues);
+CfreeCgTypeId cfree_cg_type_func(CfreeCompiler*, CfreeCgTypeId ret,
+ const CfreeCgTypeId* params, uint32_t nparams,
+ int variadic);
+
+typedef enum CfreeCgVisibility {
+ CFREE_CG_VIS_DEFAULT,
+ CFREE_CG_VIS_HIDDEN,
+ CFREE_CG_VIS_PROTECTED,
+} CfreeCgVisibility;
+
+typedef enum CfreeCgDeclFlag {
+ CFREE_CG_DECL_NONE = 0,
+ CFREE_CG_DECL_DEFINED = 1u << 0,
+ CFREE_CG_DECL_READONLY = 1u << 1,
+ CFREE_CG_DECL_TLS = 1u << 2,
+ CFREE_CG_DECL_COMMON = 1u << 3,
+ CFREE_CG_DECL_USED = 1u << 4,
+ CFREE_CG_DECL_NORETURN = 1u << 5,
+} CfreeCgDeclFlag;
+
+typedef enum CfreeCgTlsModel {
+ CFREE_CG_TLS_DEFAULT,
+ CFREE_CG_TLS_LOCAL_EXEC,
+ CFREE_CG_TLS_INITIAL_EXEC,
+ CFREE_CG_TLS_LOCAL_DYNAMIC,
+ CFREE_CG_TLS_GENERAL_DYNAMIC,
+ CFREE_CG_TLS_TLVP,
+} CfreeCgTlsModel;
+
+typedef struct CfreeCgDeclAttrs {
+ CfreeSymBind bind;
+ CfreeCgVisibility visibility;
+ CfreeCgTlsModel tls_model;
+ CfreeSym section; /* 0 = default section */
+ uint32_t align; /* 0 = natural */
+ uint32_t flags; /* CfreeCgDeclFlag */
+} CfreeCgDeclAttrs;
+
+typedef enum CfreeCgSymbolRefKind {
+ CFREE_CG_SYMREF_ADDR,
+ CFREE_CG_SYMREF_PCREL,
+ CFREE_CG_SYMREF_GOT,
+ CFREE_CG_SYMREF_PLT,
+ CFREE_CG_SYMREF_TLS_LE,
+ CFREE_CG_SYMREF_TLS_IE,
+ CFREE_CG_SYMREF_TLS_LD,
+ CFREE_CG_SYMREF_TLS_GD,
+ CFREE_CG_SYMREF_TLVP,
+} CfreeCgSymbolRefKind;
+
+CfreeCg* cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out);
+void cfree_cg_free(CfreeCg*);
+
+/* Sticky source location. Function, scope, local, param, instruction, and
+ * data-definition debug records use the current location. */
+void cfree_cg_set_loc(CfreeCg*, CfreeSrcLoc);
+
+void cfree_cg_func_decl(CfreeCg*, CfreeSym name, CfreeCgTypeId fn_type,
+ CfreeCgDeclAttrs attrs);
+void cfree_cg_func_begin(CfreeCg*, CfreeSym name, CfreeCgTypeId fn_type,
+ CfreeCgDeclAttrs attrs);
+void cfree_cg_func_end(CfreeCg*);
+
+/* Scope debug metadata is attached to the same nesting object used for
+ * structured control flow; pass CFREE_CG_TYPE_NONE for statement-only scopes.
+ * Break/continue are valid only for scopes the frontend treats as loop/block
+ * control-flow targets. */
+CfreeCgScope cfree_cg_scope_begin(CfreeCg*, CfreeCgTypeId result_type);
+void cfree_cg_scope_end(CfreeCg*, CfreeCgScope);
+void cfree_cg_break(CfreeCg*, CfreeCgScope);
+void cfree_cg_break_true(CfreeCg*, CfreeCgScope);
+void cfree_cg_break_false(CfreeCg*, CfreeCgScope);
+void cfree_cg_continue(CfreeCg*, CfreeCgScope);
+void cfree_cg_continue_true(CfreeCg*, CfreeCgScope);
+void cfree_cg_continue_false(CfreeCg*, CfreeCgScope);
+
+CfreeCgSlot cfree_cg_local_slot(CfreeCg*, CfreeCgTypeId type, CfreeSym name);
+CfreeCgSlot cfree_cg_param_slot(CfreeCg*, uint32_t index, CfreeCgTypeId type,
+ CfreeSym name);
+
+/* Dynamic stack allocation. Pops size in bytes and pushes result_ptr_type.
+ * `align` 0 means target default stack alignment. */
+void cfree_cg_alloca(CfreeCg*, CfreeCgTypeId result_ptr_type, uint32_t align);
+
+void cfree_cg_push_int(CfreeCg*, int64_t value, CfreeCgTypeId type);
+void cfree_cg_push_float(CfreeCg*, double value, CfreeCgTypeId type);
+/* Anonymous immutable bytes in rodata; pushes a pointer to the first byte. */
+void cfree_cg_push_bytes(CfreeCg*, const uint8_t* str, size_t len);
+void cfree_cg_push_local(CfreeCg*, CfreeCgSlot slot);
+void cfree_cg_push_symbol(CfreeCg*, CfreeSym name, CfreeCgTypeId type,
+ CfreeCgSymbolRefKind kind, int64_t addend);
+
+void cfree_cg_load(CfreeCg*);
+void cfree_cg_addr(CfreeCg*);
+void cfree_cg_store(CfreeCg*);
+
+void cfree_cg_dup(CfreeCg*);
+void cfree_cg_swap(CfreeCg*);
+void cfree_cg_drop(CfreeCg*);
+void cfree_cg_rot3(CfreeCg*);
+
+typedef enum CfreeCgBinOp {
+ CFREE_CG_ADD,
+ CFREE_CG_SUB,
+ CFREE_CG_MUL,
+ CFREE_CG_SDIV,
+ CFREE_CG_UDIV,
+ CFREE_CG_SREM,
+ CFREE_CG_UREM,
+ CFREE_CG_AND,
+ CFREE_CG_OR,
+ CFREE_CG_XOR,
+ CFREE_CG_SHL,
+ CFREE_CG_SHR_S,
+ CFREE_CG_SHR_U,
+} CfreeCgBinOp;
+
+typedef enum CfreeCgCmpOp {
+ CFREE_CG_EQ,
+ CFREE_CG_NE,
+ CFREE_CG_LT_S,
+ CFREE_CG_LE_S,
+ CFREE_CG_GT_S,
+ CFREE_CG_GE_S,
+ CFREE_CG_LT_U,
+ CFREE_CG_LE_U,
+ CFREE_CG_GT_U,
+ CFREE_CG_GE_U,
+} CfreeCgCmpOp;
+
+void cfree_cg_binop(CfreeCg*, CfreeCgBinOp);
+void cfree_cg_cmp(CfreeCg*, CfreeCgCmpOp);
+void cfree_cg_convert(CfreeCg*, CfreeCgTypeId dst);
+
+typedef enum CfreeCgIntrinsic {
+ CFREE_CG_INTRIN_TRAP,
+ CFREE_CG_INTRIN_UNREACHABLE,
+ CFREE_CG_INTRIN_CLZ,
+ CFREE_CG_INTRIN_CTZ,
+ CFREE_CG_INTRIN_POPCOUNT,
+ CFREE_CG_INTRIN_BSWAP,
+ CFREE_CG_INTRIN_FRAME_ADDRESS,
+ CFREE_CG_INTRIN_RETURN_ADDRESS,
+} CfreeCgIntrinsic;
+
+/* Pops nargs operands and pushes result_type unless result_type is
+ * CFREE_CG_TYPE_NONE or void. */
+void cfree_cg_intrinsic(CfreeCg*, CfreeCgIntrinsic, uint32_t nargs,
+ CfreeCgTypeId result_type);
+
+typedef enum CfreeCgAtomicOp {
+ CFREE_CG_ATOMIC_XCHG,
+ CFREE_CG_ATOMIC_ADD,
+ CFREE_CG_ATOMIC_SUB,
+ CFREE_CG_ATOMIC_AND,
+ CFREE_CG_ATOMIC_OR,
+ CFREE_CG_ATOMIC_XOR,
+ CFREE_CG_ATOMIC_NAND,
+} CfreeCgAtomicOp;
+
+typedef enum CfreeCgMemOrder {
+ CFREE_CG_MO_RELAXED,
+ CFREE_CG_MO_CONSUME,
+ CFREE_CG_MO_ACQUIRE,
+ CFREE_CG_MO_RELEASE,
+ CFREE_CG_MO_ACQ_REL,
+ CFREE_CG_MO_SEQ_CST,
+} CfreeCgMemOrder;
+
+void cfree_cg_atomic_load(CfreeCg*, CfreeCgMemOrder);
+void cfree_cg_atomic_store(CfreeCg*, CfreeCgMemOrder);
+void cfree_cg_atomic_rmw(CfreeCg*, CfreeCgAtomicOp, CfreeCgMemOrder);
+/* Stack: [ptr, expected, desired] -> [prior, ok_i1]. */
+void cfree_cg_atomic_cmpxchg(CfreeCg*, CfreeCgMemOrder success,
+ CfreeCgMemOrder failure);
+void cfree_cg_atomic_fence(CfreeCg*, CfreeCgMemOrder);
+
+typedef enum CfreeCgAsmDir {
+ CFREE_CG_ASM_IN,
+ CFREE_CG_ASM_OUT,
+ CFREE_CG_ASM_INOUT,
+} CfreeCgAsmDir;
+
+typedef enum CfreeCgAsmFlag {
+ CFREE_CG_ASM_NONE = 0,
+ CFREE_CG_ASM_VOLATILE = 1u << 0,
+ CFREE_CG_ASM_GOTO = 1u << 1,
+} CfreeCgAsmFlag;
+
+typedef struct CfreeCgAsmOperand {
+ CfreeSym constraint; /* interned GCC-style constraint string */
+ CfreeSym name; /* interned symbolic operand name; 0 if absent */
+ CfreeCgTypeId type;
+ uint8_t dir; /* CfreeCgAsmDir */
+ uint8_t pad[3];
+} CfreeCgAsmOperand;
+
+/* Inputs are popped in declaration order. Outputs are pushed in declaration
+ * order as fresh values after the asm block. Template, constraints, and
+ * clobbers are pre-interned strings. */
+void cfree_cg_inline_asm(CfreeCg*, CfreeSym tmpl,
+ const CfreeCgAsmOperand* outputs, uint32_t noutputs,
+ const CfreeCgAsmOperand* inputs, uint32_t ninputs,
+ const CfreeSym* clobbers, uint32_t nclobbers,
+ uint32_t flags);
+
+CfreeCgLabel cfree_cg_label_new(CfreeCg*);
+void cfree_cg_label_place(CfreeCg*, CfreeCgLabel);
+void cfree_cg_jump(CfreeCg*, CfreeCgLabel);
+void cfree_cg_branch_true(CfreeCg*, CfreeCgLabel);
+void cfree_cg_branch_false(CfreeCg*, CfreeCgLabel);
+
+void cfree_cg_memcpy(CfreeCg*, uint32_t size, uint32_t align);
+void cfree_cg_memset(CfreeCg*, uint8_t val, uint32_t size, uint32_t align);
+
+/* Computes base + offset + index * elemsz and pushes the element address.
+ * Stack is [base, index]. elemsz is inferred from the base pointer/array
+ * type; index may be a constant produced by cfree_cg_push_int. */
+void cfree_cg_index(CfreeCg*, uint32_t offset);
+
+/* Pops record base address and pushes the field address. Offset is inferred
+ * from the record type and field_index. */
+void cfree_cg_field_addr(CfreeCg*, uint32_t field_index);
+
+void cfree_cg_call(CfreeCg*, uint32_t nargs, CfreeCgTypeId fn_type);
+void cfree_cg_tail_call(CfreeCg*, uint32_t nargs, CfreeCgTypeId fn_type);
+void cfree_cg_ret(CfreeCg*);
+void cfree_cg_ret_void(CfreeCg*);
+
+/* Global data definitions. Use data_symbol for address constants in data
+ * initializers; code references use push_symbol. */
+void cfree_cg_data_decl(CfreeCg*, CfreeSym name, CfreeCgTypeId type,
+ CfreeCgDeclAttrs attrs);
+void cfree_cg_data_begin(CfreeCg*, CfreeSym name, CfreeCgTypeId type,
+ CfreeCgDeclAttrs attrs);
+void cfree_cg_data_bytes(CfreeCg*, const uint8_t* data, size_t len);
+void cfree_cg_data_zero(CfreeCg*, uint64_t size);
+void cfree_cg_data_symbol(CfreeCg*, CfreeCgSymbolRefKind kind, CfreeSym target,
+ int64_t addend, uint32_t nbytes);
+void cfree_cg_data_end(CfreeCg*);
+
+#endif
diff --git a/lang/toy/toy.c b/lang/toy/toy.c
@@ -0,0 +1,374 @@
+#include "toy.h"
+
+#include <cfree/cg.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* ============================================================
+ * Lexer / token iterator
+ * ============================================================ */
+
+typedef enum ToyTokenKind {
+ TOK_EOF = 0,
+ TOK_FN,
+ TOK_LET,
+ TOK_IF,
+ TOK_ELSE,
+ TOK_WHILE,
+ TOK_BREAK,
+ TOK_CONTINUE,
+ TOK_RETURN,
+ TOK_TYPE,
+ TOK_INT,
+ TOK_IDENT,
+ TOK_NUMBER,
+ TOK_STRING,
+ TOK_LPAREN,
+ TOK_RPAREN,
+ TOK_LBRACE,
+ TOK_RBRACE,
+ TOK_LBRACKET,
+ TOK_RBRACKET,
+ TOK_COMMA,
+ TOK_SEMI,
+ TOK_COLON,
+ TOK_EQ,
+ TOK_PLUS,
+ TOK_MINUS,
+ TOK_STAR,
+ TOK_SLASH,
+ TOK_PERCENT,
+ TOK_LT,
+ TOK_GT,
+ TOK_LE,
+ TOK_GE,
+ TOK_EQEQ,
+ TOK_NE,
+ TOK_ANDAND,
+ TOK_PIPEPIPE,
+ TOK_BANG,
+ TOK_AMPERSAND,
+ TOK_DOT,
+ TOK_DOTSTAR,
+} ToyTokenKind;
+
+typedef struct ToyToken {
+ ToyTokenKind kind;
+ CfreeSrcLoc loc; /* line/col within the source buffer */
+ const uint8_t* text; /* points into source buffer */
+ size_t text_len;
+ int64_t int_value; /* valid when kind == TOK_NUMBER */
+} ToyToken;
+
+typedef struct ToyLexer {
+ const uint8_t* cur;
+ const uint8_t* end;
+ const uint8_t* bol; /* beginning of current line */
+ uint32_t line;
+} ToyLexer;
+
+static void toy_lexer_init(ToyLexer* lex, const uint8_t* data, size_t len) {
+ lex->cur = data;
+ lex->end = data + len;
+ lex->bol = data;
+ lex->line = 1;
+}
+
+static int toy_is_space(uint8_t c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' ||
+ c == '\v';
+}
+
+static int toy_is_digit(uint8_t c) { return c >= '0' && c <= '9'; }
+
+static int toy_is_alpha(uint8_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
+}
+
+static int toy_is_alnum(uint8_t c) {
+ return toy_is_alpha(c) || toy_is_digit(c);
+}
+
+static void toy_lexer_advance_line(ToyLexer* lex) {
+ lex->bol = lex->cur + 1;
+ lex->line++;
+}
+
+static void toy_skip_ws(ToyLexer* lex) {
+ while (lex->cur < lex->end && toy_is_space(*lex->cur)) {
+ if (*lex->cur == '\n') toy_lexer_advance_line(lex);
+ lex->cur++;
+ }
+}
+
+static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind,
+ const uint8_t* start) {
+ ToyToken tok;
+ tok.kind = kind;
+ tok.loc.file_id = 0;
+ tok.loc.line = lex->line;
+ tok.loc.col = (uint32_t)(start - lex->bol) + 1;
+ tok.text = start;
+ tok.text_len = (size_t)(lex->cur - start);
+ tok.int_value = 0;
+ return tok;
+}
+
+/* Return the next token from the iterator. Always makes forward progress. */
+static ToyToken toy_lexer_next(ToyLexer* lex) {
+ const uint8_t* start;
+ ToyToken tok;
+
+ toy_skip_ws(lex);
+ start = lex->cur;
+ if (lex->cur >= lex->end) {
+ tok.kind = TOK_EOF;
+ tok.loc.file_id = 0;
+ tok.loc.line = lex->line;
+ tok.loc.col = (uint32_t)(start - lex->bol) + 1;
+ tok.text = start;
+ tok.text_len = 0;
+ tok.int_value = 0;
+ return tok;
+ }
+
+ uint8_t c = *lex->cur++;
+
+ /* Single-char tokens and multi-char operators */
+ switch (c) {
+ case '(':
+ return toy_lexer_emit(lex, TOK_LPAREN, start);
+ case ')':
+ return toy_lexer_emit(lex, TOK_RPAREN, start);
+ case '{':
+ return toy_lexer_emit(lex, TOK_LBRACE, start);
+ case '}':
+ return toy_lexer_emit(lex, TOK_RBRACE, start);
+ case '[':
+ return toy_lexer_emit(lex, TOK_LBRACKET, start);
+ case ']':
+ return toy_lexer_emit(lex, TOK_RBRACKET, start);
+ case ',':
+ return toy_lexer_emit(lex, TOK_COMMA, start);
+ case ';':
+ return toy_lexer_emit(lex, TOK_SEMI, start);
+ case ':':
+ return toy_lexer_emit(lex, TOK_COLON, start);
+ case '+':
+ return toy_lexer_emit(lex, TOK_PLUS, start);
+ case '*':
+ return toy_lexer_emit(lex, TOK_STAR, start);
+ case '/':
+ return toy_lexer_emit(lex, TOK_SLASH, start);
+ case '%':
+ return toy_lexer_emit(lex, TOK_PERCENT, start);
+ case '&':
+ if (lex->cur < lex->end && *lex->cur == '&') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_ANDAND, start);
+ }
+ return toy_lexer_emit(lex, TOK_AMPERSAND, start);
+ case '|':
+ if (lex->cur < lex->end && *lex->cur == '|') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_PIPEPIPE, start);
+ }
+ break;
+ case '=':
+ if (lex->cur < lex->end && *lex->cur == '=') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_EQEQ, start);
+ }
+ return toy_lexer_emit(lex, TOK_EQ, start);
+ case '!':
+ if (lex->cur < lex->end && *lex->cur == '=') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_NE, start);
+ }
+ return toy_lexer_emit(lex, TOK_BANG, start);
+ case '<':
+ if (lex->cur < lex->end && *lex->cur == '=') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_LE, start);
+ }
+ return toy_lexer_emit(lex, TOK_LT, start);
+ case '>':
+ if (lex->cur < lex->end && *lex->cur == '=') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_GE, start);
+ }
+ return toy_lexer_emit(lex, TOK_GT, start);
+ case '-':
+ return toy_lexer_emit(lex, TOK_MINUS, start);
+ case '.':
+ if (lex->cur < lex->end && *lex->cur == '*') {
+ lex->cur++;
+ return toy_lexer_emit(lex, TOK_DOTSTAR, start);
+ }
+ return toy_lexer_emit(lex, TOK_DOT, start);
+ }
+
+ /* Number literal */
+ if (toy_is_digit(c)) {
+ int64_t v = (int64_t)(c - '0');
+ while (lex->cur < lex->end && toy_is_digit(*lex->cur)) {
+ v = v * 10 + (int64_t)(*lex->cur - '0');
+ lex->cur++;
+ }
+ tok = toy_lexer_emit(lex, TOK_NUMBER, start);
+ tok.int_value = v;
+ return tok;
+ }
+
+ /* Identifier / keyword */
+ if (toy_is_alpha(c)) {
+ while (lex->cur < lex->end && toy_is_alnum(*lex->cur)) lex->cur++;
+ size_t len = (size_t)(lex->cur - start);
+ ToyTokenKind kind = TOK_IDENT;
+ if (len == 2 && start[0] == 'f' && start[1] == 'n')
+ kind = TOK_FN;
+ else if (len == 3 && start[0] == 'i' && start[1] == 'n' && start[2] == 't')
+ kind = TOK_INT;
+ else if (len == 3 && start[0] == 'l' && start[1] == 'e' && start[2] == 't')
+ kind = TOK_LET;
+ else if (len == 2 && start[0] == 'i' && start[1] == 'f')
+ kind = TOK_IF;
+ else if (len == 4 && start[0] == 'e' && start[1] == 'l' &&
+ start[2] == 's' && start[3] == 'e')
+ kind = TOK_ELSE;
+ else if (len == 5 && start[0] == 'w' && start[1] == 'h' &&
+ start[2] == 'i' && start[3] == 'l' && start[4] == 'e')
+ kind = TOK_WHILE;
+ else if (len == 5 && start[0] == 'b' && start[1] == 'r' &&
+ start[2] == 'e' && start[3] == 'a' && start[4] == 'k')
+ kind = TOK_BREAK;
+ else if (len == 8 && start[0] == 'c' && start[1] == 'o' &&
+ start[2] == 'n' && start[3] == 't' && start[4] == 'i' &&
+ start[5] == 'n' && start[6] == 'u' && start[7] == 'e')
+ kind = TOK_CONTINUE;
+ else if (len == 6 && start[0] == 'r' && start[1] == 'e' &&
+ start[2] == 't' && start[3] == 'u' && start[4] == 'r' &&
+ start[5] == 'n')
+ kind = TOK_RETURN;
+ else if (len == 4 && start[0] == 't' && start[1] == 'y' &&
+ start[2] == 'p' && start[3] == 'e')
+ kind = TOK_TYPE;
+ return toy_lexer_emit(lex, kind, start);
+ }
+
+ /* String literal */
+ if (c == '"') {
+ while (lex->cur < lex->end && *lex->cur != '"') {
+ if (*lex->cur == '\n') toy_lexer_advance_line(lex);
+ lex->cur++;
+ }
+ if (lex->cur < lex->end && *lex->cur == '"') lex->cur++;
+ return toy_lexer_emit(lex, TOK_STRING, start);
+ }
+
+ /* Unknown character */
+ return toy_lexer_emit(lex, TOK_EOF, start);
+}
+
+/* ============================================================
+ * Parser (consumes token iterator)
+ * ============================================================ */
+
+typedef struct ToyParser {
+ ToyLexer lex;
+ ToyToken cur; /* current token under inspection */
+ int64_t value; /* last parsed integer literal */
+} ToyParser;
+
+static void toy_parser_init(ToyParser* p, const uint8_t* data, size_t len) {
+ toy_lexer_init(&p->lex, data, len);
+ p->cur = toy_lexer_next(&p->lex);
+ p->value = 0;
+}
+
+static void toy_parser_advance(ToyParser* p) {
+ p->cur = toy_lexer_next(&p->lex);
+}
+
+static int toy_parser_match(ToyParser* p, ToyTokenKind kind) {
+ if (p->cur.kind == kind) {
+ toy_parser_advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+static int toy_parser_expect(ToyParser* p, ToyTokenKind kind) {
+ if (p->cur.kind == kind) {
+ toy_parser_advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+static int toy_parser_at_end(ToyParser* p) { return p->cur.kind == TOK_EOF; }
+
+static int toy_parse_main(ToyParser* p) {
+ if (!toy_parser_match(p, TOK_FN)) return 0;
+ if (p->cur.kind != TOK_IDENT || p->cur.text_len != 4 ||
+ p->cur.text[0] != 'm' || p->cur.text[1] != 'a' || p->cur.text[2] != 'i' ||
+ p->cur.text[3] != 'n')
+ return 0;
+ toy_parser_advance(p);
+ if (!toy_parser_expect(p, TOK_LPAREN)) return 0;
+ if (!toy_parser_expect(p, TOK_RPAREN)) return 0;
+ if (toy_parser_match(p, TOK_COLON)) {
+ if (!toy_parser_match(p, TOK_INT)) return 0;
+ }
+ if (!toy_parser_expect(p, TOK_LBRACE)) return 0;
+ if (!toy_parser_match(p, TOK_RETURN)) return 0;
+
+ /* Optional unary minus for negative return values */
+ int neg = 0;
+ if (p->cur.kind == TOK_MINUS) {
+ neg = 1;
+ toy_parser_advance(p);
+ }
+ if (p->cur.kind != TOK_NUMBER) return 0;
+ p->value = neg ? -p->cur.int_value : p->cur.int_value;
+ toy_parser_advance(p);
+
+ if (!toy_parser_expect(p, TOK_SEMI)) return 0;
+ if (!toy_parser_expect(p, TOK_RBRACE)) return 0;
+ return toy_parser_at_end(p);
+}
+
+int cfree_toy_compile(CfreeCompiler* c, const CfreeCompileOptions* opts,
+ const CfreeBytesInput* input, CfreeObjBuilder* out) {
+ ToyParser p;
+ const uint8_t* source;
+ CfreeCg* cg;
+ CfreeCgBuiltinTypes types;
+ CfreeCgTypeId fn_ty;
+ CfreeSym main_sym;
+ CfreeCgDeclAttrs main_attrs;
+
+ (void)opts;
+ if (!c || !input || !out) return 1;
+
+ source = input->data ? input->data : (const uint8_t*)"";
+ toy_parser_init(&p, source, input->len);
+ if (!toy_parse_main(&p)) return 1;
+
+ main_attrs.bind = CFREE_SB_GLOBAL;
+ main_attrs.visibility = CFREE_CG_VIS_DEFAULT;
+ main_attrs.tls_model = CFREE_CG_TLS_DEFAULT;
+ main_attrs.section = 0;
+ main_attrs.align = 0;
+ main_attrs.flags = CFREE_CG_DECL_DEFINED;
+ main_sym = cfree_sym_intern(c, "main");
+ cg = cfree_cg_new(c, out);
+ types = cfree_cg_builtin_types(c);
+ fn_ty = cfree_cg_type_func(c, types.i32, NULL, 0, 0);
+ cfree_cg_func_begin(cg, main_sym, fn_ty, main_attrs);
+ cfree_cg_push_int(cg, p.value, types.i32);
+ cfree_cg_ret(cg);
+ cfree_cg_func_end(cg);
+ cfree_cg_free(cg);
+ return 0;
+}
diff --git a/lang/toy/toy.h b/lang/toy/toy.h
@@ -0,0 +1,9 @@
+#ifndef CFREE_TOY_H
+#define CFREE_TOY_H
+
+#include <cfree.h>
+
+int cfree_toy_compile(CfreeCompiler*, const CfreeCompileOptions*,
+ const CfreeBytesInput* input, CfreeObjBuilder* out);
+
+#endif
diff --git a/src/api/lifecycle.c b/src/api/lifecycle.c
@@ -35,3 +35,15 @@ const char* cfree_compiler_file_name(CfreeCompiler* c, uint32_t file_id) {
if (!f) return NULL;
return pool_str(c->global, f->name, NULL);
}
+
+CfreeSym cfree_sym_intern(CfreeCompiler* c, const char* str) {
+ if (!c || !str) return 0;
+ return pool_intern_cstr(c->global, str);
+}
+
+int cfree_register_frontend(CfreeCompiler* c, CfreeLanguage lang,
+ CfreeCompileFn fn) {
+ if (!c || lang >= CFREE_LANG_COUNT) return 1;
+ c->frontends[lang] = fn;
+ return 0;
+}
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -201,9 +201,23 @@ int cfree_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input,
* feeds tokens straight to the assembler. */
static void compile_into(Compiler* c, const CfreeCompileOptions* opts,
const CfreeBytesInput* input, ObjBuilder* ob) {
- Lexer* lex =
- lex_open_mem(c, input->name, (const char*)input->data, input->len);
- MCEmitter* mc = mc_new(c, ob);
+ CfreeCompileFn frontend = NULL;
+ Lexer* lex;
+ MCEmitter* mc;
+
+ if (input->lang < CFREE_LANG_COUNT) {
+ frontend = c->frontends[input->lang];
+ }
+ if (frontend) {
+ if (frontend(c, opts, input, ob) != 0) {
+ compiler_panic(c, no_loc(), "frontend failed for input: %s", input->name);
+ }
+ obj_finalize(ob);
+ return;
+ }
+
+ lex = lex_open_mem(c, input->name, (const char*)input->data, input->len);
+ mc = mc_new(c, ob);
if (input->lang == CFREE_LANG_ASM) {
/* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */
@@ -264,6 +278,8 @@ CfreeLanguage cfree_language_for_path(const char* path) {
if (path[i] == '.') {
const char* ext = path + i + 1;
if (ext[0] == 's' && ext[1] == '\0') return CFREE_LANG_ASM;
+ if (ext[0] == 't' && ext[1] == 'o' && ext[2] == 'y' && ext[3] == '\0')
+ return CFREE_LANG_TOY;
return CFREE_LANG_C;
}
}
diff --git a/src/core/core.h b/src/core/core.h
@@ -125,6 +125,7 @@ struct CfreeCompiler {
TargetABI* abi;
Target target;
CompilerCleanup* cleanup; /* top of LIFO cleanup stack */
+ CfreeCompileFn frontends[CFREE_LANG_COUNT];
void* reserved;
};