commit 0b6eaea8eb3bdcdbc9a6ce0e51d95c3da1385ff3
parent 001f39c2a663a220893629d9ee90590e70504874
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 12 May 2026 19:00:46 -0700
toy: use structured scopes, unop, fix call stack order and store drops
Diffstat:
| M | lang/toy/toy.c | | | 938 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
1 file changed, 880 insertions(+), 58 deletions(-)
diff --git a/lang/toy/toy.c b/lang/toy/toy.c
@@ -1,8 +1,71 @@
#include "toy.h"
#include <cfree/cg.h>
+#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
+#include <string.h>
+
+/* ============================================================
+ * CG API coverage checklist — APIs NOT yet exercised by toy:
+ *
+ * Types:
+ * cfree_cg_type_array
+ * cfree_cg_type_qualified
+ * cfree_cg_type_alias
+ * cfree_cg_type_record
+ * cfree_cg_type_enum
+ * cfree_cg_type_is_ptr / ptr_pointee
+ * cfree_cg_type_is_func / func_ret / func_nparams / func_param
+ * cfree_cg_type_is_record / record_nfields / record_field
+ * cfree_cg_type_size / type_align
+ *
+ * Functions & data:
+ * cfree_cg_func_decl
+ * cfree_cg_data_decl / data_begin / data_bytes / data_zero
+ * cfree_cg_data_symbol / data_end
+ *
+ * Values:
+ * cfree_cg_push_float
+ * cfree_cg_push_bytes
+ * cfree_cg_push_symbol (non-ADDR kinds: PCREL, GOT, PLT, TLS_*)
+ * cfree_cg_alloca
+ *
+ * Stack:
+ * cfree_cg_swap
+ * cfree_cg_rot3
+ * cfree_cg_addr (only used for &var, not for general lvalues)
+ *
+ * Arithmetic:
+ * cfree_cg_binop (only ADD, SUB, MUL, SDIV, SREM used)
+ * cfree_cg_unop (NEG, NOT used; BNOT unused)
+ * cfree_cg_cmp (only EQ, NE, LT_S, GT_S, LE_S, GE_S used)
+ * cfree_cg_convert (only i1→int used)
+ *
+ * Scopes & control flow:
+ * cfree_cg_break_true (only with result_type != NONE)
+ * cfree_cg_continue_true / continue_false
+ * cfree_cg_label_new / label_place / jump / branch_true / branch_false
+ *
+ * Calls:
+ * cfree_cg_tail_call
+ *
+ * Intrinsics:
+ * cfree_cg_intrinsic (all)
+ *
+ * Atomics:
+ * cfree_cg_atomic_load / atomic_store / atomic_rmw
+ * cfree_cg_atomic_cmpxchg / atomic_fence
+ *
+ * Variadics:
+ * cfree_cg_va_start / va_arg / va_end / va_copy
+ *
+ * Memory:
+ * cfree_cg_memcpy / memset / index / field_addr
+ *
+ * Inline asm:
+ * cfree_cg_inline_asm
+ * ============================================================ */
/* ============================================================
* Lexer / token iterator
@@ -54,16 +117,16 @@ typedef enum ToyTokenKind {
typedef struct ToyToken {
ToyTokenKind kind;
- CfreeSrcLoc loc; /* line/col within the source buffer */
- const uint8_t* text; /* points into source buffer */
+ CfreeSrcLoc loc;
+ const uint8_t* text;
size_t text_len;
- int64_t int_value; /* valid when kind == TOK_NUMBER */
+ int64_t int_value;
} ToyToken;
typedef struct ToyLexer {
const uint8_t* cur;
const uint8_t* end;
- const uint8_t* bol; /* beginning of current line */
+ const uint8_t* bol;
uint32_t line;
} ToyLexer;
@@ -102,7 +165,7 @@ static void toy_skip_ws(ToyLexer* lex) {
}
static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind,
- const uint8_t* start) {
+ const uint8_t* start) {
ToyToken tok;
tok.kind = kind;
tok.loc.file_id = 0;
@@ -114,7 +177,6 @@ static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind,
return tok;
}
-/* Return the next token from the iterator. Always makes forward progress. */
static ToyToken toy_lexer_next(ToyLexer* lex) {
const uint8_t* start;
ToyToken tok;
@@ -134,7 +196,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) {
uint8_t c = *lex->cur++;
- /* Single-char tokens and multi-char operators */
switch (c) {
case '(':
return toy_lexer_emit(lex, TOK_LPAREN, start);
@@ -208,7 +269,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) {
return toy_lexer_emit(lex, TOK_DOT, start);
}
- /* Number literal */
if (toy_is_digit(c)) {
int64_t v = (int64_t)(c - '0');
while (lex->cur < lex->end && toy_is_digit(*lex->cur)) {
@@ -220,7 +280,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) {
return tok;
}
- /* Identifier / keyword */
if (toy_is_alpha(c)) {
while (lex->cur < lex->end && toy_is_alnum(*lex->cur)) lex->cur++;
size_t len = (size_t)(lex->cur - start);
@@ -256,7 +315,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) {
return toy_lexer_emit(lex, kind, start);
}
- /* String literal */
if (c == '"') {
while (lex->cur < lex->end && *lex->cur != '"') {
if (*lex->cur == '\n') toy_lexer_advance_line(lex);
@@ -266,24 +324,81 @@ static ToyToken toy_lexer_next(ToyLexer* lex) {
return toy_lexer_emit(lex, TOK_STRING, start);
}
- /* Unknown character */
return toy_lexer_emit(lex, TOK_EOF, start);
}
+/* Peek at the next token without consuming. */
+static ToyToken toy_lexer_peek(const ToyLexer* lex) {
+ ToyLexer tmp = *lex;
+ return toy_lexer_next(&tmp);
+}
+
/* ============================================================
- * Parser (consumes token iterator)
+ * Parser (single-pass parse -> codegen)
* ============================================================ */
+#define TOY_MAX_VARS 64
+#define TOY_MAX_FNS 32
+#define TOY_MAX_SCOPES 16
+#define TOY_MAX_PARAMS 16
+
+typedef struct ToyVar {
+ CfreeSym name;
+ CfreeCgTypeId type;
+ CfreeCgSlot slot;
+ int is_param;
+} ToyVar;
+
+typedef struct ToyFn {
+ CfreeSym name;
+ CfreeCgTypeId type;
+ CfreeCgTypeId ret;
+ CfreeCgTypeId params[TOY_MAX_PARAMS];
+ size_t nparams;
+} ToyFn;
+
+typedef struct ToyScope {
+ CfreeCgScope cg_scope;
+} ToyScope;
+
typedef struct ToyParser {
ToyLexer lex;
- ToyToken cur; /* current token under inspection */
- int64_t value; /* last parsed integer literal */
+ ToyToken cur;
+ CfreeCompiler* c;
+ CfreeCg* cg;
+ CfreeCgBuiltinTypes types;
+ CfreeCgTypeId int_type;
+ CfreeCgTypeId int_ptr_type;
+
+ ToyVar vars[TOY_MAX_VARS];
+ size_t nvars;
+
+ ToyFn fns[TOY_MAX_FNS];
+ size_t nfns;
+
+ ToyScope scopes[TOY_MAX_SCOPES];
+ size_t nscopes;
+
+ CfreeCgTypeId cur_fn_ret;
+ CfreeDiagSink* diag;
+ int has_error;
} ToyParser;
-static void toy_parser_init(ToyParser* p, const uint8_t* data, size_t len) {
+static void toy_parser_init(ToyParser* p, CfreeCompiler* c, CfreeCg* cg,
+ const uint8_t* data, size_t len) {
toy_lexer_init(&p->lex, data, len);
p->cur = toy_lexer_next(&p->lex);
- p->value = 0;
+ p->c = c;
+ p->cg = cg;
+ p->types = cfree_cg_builtin_types(c);
+ p->int_type = p->types.isize;
+ p->int_ptr_type = cfree_cg_type_ptr(c, p->int_type);
+ p->nvars = 0;
+ p->nfns = 0;
+ p->nscopes = 0;
+ p->cur_fn_ret = p->types.void_;
+ p->diag = cfree_compiler_diag_sink(c);
+ p->has_error = 0;
}
static void toy_parser_advance(ToyParser* p) {
@@ -306,69 +421,776 @@ static int toy_parser_expect(ToyParser* p, ToyTokenKind kind) {
return 0;
}
-static int toy_parser_at_end(ToyParser* p) { return p->cur.kind == TOK_EOF; }
+static void toy_error(ToyParser* p, CfreeSrcLoc loc, const char* fmt, ...) {
+ va_list ap;
+ p->has_error = 1;
+ if (!p->diag) return;
+ va_start(ap, fmt);
+ p->diag->emit(p->diag, CFREE_DIAG_ERROR, loc, fmt, ap);
+ va_end(ap);
+}
-static int toy_parse_main(ToyParser* p) {
- if (!toy_parser_match(p, TOK_FN)) return 0;
- if (p->cur.kind != TOK_IDENT || p->cur.text_len != 4 ||
- p->cur.text[0] != 'm' || p->cur.text[1] != 'a' || p->cur.text[2] != 'i' ||
- p->cur.text[3] != 'n')
+static void toy_set_loc(ToyParser* p) {
+ if (p->cg) cfree_cg_set_loc(p->cg, p->cur.loc);
+}
+
+static CfreeSym toy_tok_sym(ToyParser* p, ToyToken tok) {
+ char buf[64];
+ if (tok.text_len >= sizeof(buf)) {
+ toy_error(p, tok.loc, "identifier too long");
+ return 0;
+ }
+ memcpy(buf, tok.text, tok.text_len);
+ buf[tok.text_len] = '\0';
+ return cfree_sym_intern(p->c, buf);
+}
+
+/* ============================================================
+ * Symbol tables
+ * ============================================================ */
+
+static ToyVar* toy_find_var(ToyParser* p, CfreeSym name) {
+ size_t i;
+ for (i = p->nvars; i > 0; --i) {
+ if (p->vars[i - 1].name == name) return &p->vars[i - 1];
+ }
+ return NULL;
+}
+
+static ToyFn* toy_find_fn(ToyParser* p, CfreeSym name) {
+ size_t i;
+ for (i = p->nfns; i > 0; --i) {
+ if (p->fns[i - 1].name == name) return &p->fns[i - 1];
+ }
+ return NULL;
+}
+
+/* ============================================================
+ * Type parsing
+ * ============================================================ */
+
+static CfreeCgTypeId toy_parse_type(ToyParser* p) {
+ if (toy_parser_match(p, TOK_INT)) {
+ return p->int_type;
+ }
+ if (toy_parser_match(p, TOK_STAR)) {
+ CfreeCgTypeId pointee = toy_parse_type(p);
+ if (pointee == CFREE_CG_TYPE_NONE) {
+ toy_error(p, p->cur.loc, "expected type after '*'");
+ return CFREE_CG_TYPE_NONE;
+ }
+ return cfree_cg_type_ptr(p->c, pointee);
+ }
+ toy_error(p, p->cur.loc, "expected type");
+ return CFREE_CG_TYPE_NONE;
+}
+
+/* ============================================================
+ * Expression parsing
+ * ============================================================ */
+
+static CfreeCgTypeId toy_parse_expr(ToyParser* p);
+
+static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) {
+ toy_set_loc(p);
+ if (p->cur.kind == TOK_NUMBER) {
+ cfree_cg_push_int(p->cg, p->cur.int_value, p->int_type);
+ toy_parser_advance(p);
+ return p->int_type;
+ }
+
+ if (p->cur.kind == TOK_IDENT) {
+ CfreeSym name = toy_tok_sym(p, p->cur);
+ ToyToken ident_tok = p->cur;
+ toy_parser_advance(p);
+
+ if (p->cur.kind == TOK_LPAREN) {
+ /* Function call */
+ ToyFn* fn = toy_find_fn(p, name);
+ if (!fn) {
+ toy_error(p, ident_tok.loc, "undefined function '%s'",
+ (const char*)ident_tok.text);
+ return CFREE_CG_TYPE_NONE;
+ }
+ toy_parser_advance(p); /* ( */
+
+ cfree_cg_push_symbol(p->cg, name, fn->type, CFREE_CG_SYMREF_ADDR, 0);
+
+ CfreeCgTypeId arg_types[TOY_MAX_PARAMS];
+ size_t nargs = 0;
+ if (p->cur.kind != TOK_RPAREN) {
+ for (;;) {
+ if (nargs >= TOY_MAX_PARAMS) {
+ toy_error(p, p->cur.loc, "too many arguments");
+ return CFREE_CG_TYPE_NONE;
+ }
+ CfreeCgTypeId arg_ty = toy_parse_expr(p);
+ if (arg_ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ arg_types[nargs++] = arg_ty;
+ if (p->cur.kind == TOK_COMMA) {
+ toy_parser_advance(p);
+ } else {
+ break;
+ }
+ }
+ }
+ if (!toy_parser_expect(p, TOK_RPAREN)) {
+ toy_error(p, p->cur.loc, "expected ')' after arguments");
+ return CFREE_CG_TYPE_NONE;
+ }
+
+ /* Verify argument count */
+ if (nargs != fn->nparams) {
+ toy_error(p, ident_tok.loc,
+ "function '%s' expects %zu arguments, got %zu",
+ (const char*)ident_tok.text, fn->nparams, nargs);
+ return CFREE_CG_TYPE_NONE;
+ }
+
+ cfree_cg_call(p->cg, (uint32_t)nargs, fn->type);
+ return fn->ret;
+ }
+
+ /* Variable reference */
+ ToyVar* v = toy_find_var(p, name);
+ if (!v) {
+ toy_error(p, ident_tok.loc, "undefined variable '%s'",
+ (const char*)ident_tok.text);
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_push_local(p->cg, v->slot);
+ cfree_cg_load(p->cg);
+ return v->type;
+ }
+
+ if (p->cur.kind == TOK_LPAREN) {
+ toy_parser_advance(p);
+ CfreeCgTypeId ty = toy_parse_expr(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (!toy_parser_expect(p, TOK_RPAREN)) {
+ toy_error(p, p->cur.loc, "expected ')'");
+ return CFREE_CG_TYPE_NONE;
+ }
+ return ty;
+ }
+
+ toy_error(p, p->cur.loc, "expected expression");
+ return CFREE_CG_TYPE_NONE;
+}
+
+static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) {
+ toy_set_loc(p);
+ if (toy_parser_match(p, TOK_MINUS)) {
+ CfreeCgTypeId ty = toy_parse_expr_unary(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != p->int_type) {
+ toy_error(p, p->cur.loc, "invalid operand for unary '-'");
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_unop(p->cg, CFREE_CG_NEG);
+ return p->int_type;
+ }
+
+ if (toy_parser_match(p, TOK_BANG)) {
+ CfreeCgTypeId ty = toy_parse_expr_unary(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != p->int_type) {
+ toy_error(p, p->cur.loc, "invalid operand for '!'");
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_unop(p->cg, CFREE_CG_NOT);
+ return p->int_type;
+ }
+
+ if (toy_parser_match(p, TOK_AMPERSAND)) {
+ if (p->cur.kind != TOK_IDENT) {
+ toy_error(p, p->cur.loc, "expected identifier after '&'");
+ return CFREE_CG_TYPE_NONE;
+ }
+ CfreeSym name = toy_tok_sym(p, p->cur);
+ ToyVar* v = toy_find_var(p, name);
+ if (!v) {
+ toy_error(p, p->cur.loc, "undefined variable");
+ return CFREE_CG_TYPE_NONE;
+ }
+ toy_parser_advance(p);
+ cfree_cg_push_local(p->cg, v->slot);
+ cfree_cg_addr(p->cg);
+ return p->int_ptr_type;
+ }
+
+ if (toy_parser_match(p, TOK_STAR)) {
+ CfreeCgTypeId ty = toy_parse_expr_unary(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != p->int_ptr_type) {
+ toy_error(p, p->cur.loc, "cannot dereference non-pointer");
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_load(p->cg);
+ return p->int_type;
+ }
+
+ return toy_parse_expr_primary(p);
+}
+
+static CfreeCgTypeId toy_parse_expr_mul(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr_unary(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ while (p->cur.kind == TOK_STAR || p->cur.kind == TOK_SLASH ||
+ p->cur.kind == TOK_PERCENT) {
+ ToyTokenKind op = p->cur.kind;
+ CfreeCgBinOp binop;
+ toy_parser_advance(p);
+ CfreeCgTypeId ty2 = toy_parse_expr_unary(p);
+ if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != p->int_type || ty2 != p->int_type) {
+ toy_error(p, p->cur.loc, "arithmetic operands must be int");
+ return CFREE_CG_TYPE_NONE;
+ }
+ switch (op) {
+ case TOK_STAR:
+ binop = CFREE_CG_MUL;
+ break;
+ case TOK_SLASH:
+ binop = CFREE_CG_SDIV;
+ break;
+ case TOK_PERCENT:
+ binop = CFREE_CG_SREM;
+ break;
+ default:
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_binop(p->cg, binop);
+ }
+ return ty;
+}
+
+static CfreeCgTypeId toy_parse_expr_add(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr_mul(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ while (p->cur.kind == TOK_PLUS || p->cur.kind == TOK_MINUS) {
+ ToyTokenKind op = p->cur.kind;
+ CfreeCgBinOp binop = (op == TOK_PLUS) ? CFREE_CG_ADD : CFREE_CG_SUB;
+ toy_parser_advance(p);
+ CfreeCgTypeId ty2 = toy_parse_expr_mul(p);
+ if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != p->int_type || ty2 != p->int_type) {
+ toy_error(p, p->cur.loc, "arithmetic operands must be int");
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_binop(p->cg, binop);
+ }
+ return ty;
+}
+
+static CfreeCgTypeId toy_parse_expr_cmp(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr_add(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (p->cur.kind == TOK_EQEQ || p->cur.kind == TOK_NE ||
+ p->cur.kind == TOK_LT || p->cur.kind == TOK_GT ||
+ p->cur.kind == TOK_LE || p->cur.kind == TOK_GE) {
+ ToyTokenKind op = p->cur.kind;
+ CfreeCgCmpOp cmp;
+ toy_parser_advance(p);
+ CfreeCgTypeId ty2 = toy_parse_expr_add(p);
+ if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ if (ty != ty2) {
+ toy_error(p, p->cur.loc, "comparison operands must have same type");
+ return CFREE_CG_TYPE_NONE;
+ }
+ switch (op) {
+ case TOK_EQEQ:
+ cmp = CFREE_CG_EQ;
+ break;
+ case TOK_NE:
+ cmp = CFREE_CG_NE;
+ break;
+ case TOK_LT:
+ cmp = CFREE_CG_LT_S;
+ break;
+ case TOK_GT:
+ cmp = CFREE_CG_GT_S;
+ break;
+ case TOK_LE:
+ cmp = CFREE_CG_LE_S;
+ break;
+ case TOK_GE:
+ cmp = CFREE_CG_GE_S;
+ break;
+ default:
+ return CFREE_CG_TYPE_NONE;
+ }
+ cfree_cg_cmp(p->cg, cmp);
+ cfree_cg_convert(p->cg, p->int_type);
+ ty = p->int_type;
+ }
+ return ty;
+}
+
+static CfreeCgTypeId toy_parse_expr_and(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr_cmp(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ while (p->cur.kind == TOK_ANDAND) {
+ CfreeCgScope scope;
+ toy_parser_advance(p);
+ scope = cfree_cg_scope_begin(p->cg, p->int_type);
+ cfree_cg_push_int(p->cg, 0, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_false(p->cg, scope);
+ cfree_cg_drop(p->cg);
+ ty = toy_parse_expr_cmp(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ cfree_cg_push_int(p->cg, 0, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_false(p->cg, scope);
+ cfree_cg_drop(p->cg);
+ cfree_cg_push_int(p->cg, 1, p->int_type);
+ cfree_cg_scope_end(p->cg, scope);
+ ty = p->int_type;
+ }
+ return ty;
+}
+
+static CfreeCgTypeId toy_parse_expr_or(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr_and(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ while (p->cur.kind == TOK_PIPEPIPE) {
+ CfreeCgScope scope;
+ toy_parser_advance(p);
+ scope = cfree_cg_scope_begin(p->cg, p->int_type);
+ cfree_cg_push_int(p->cg, 1, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_true(p->cg, scope);
+ cfree_cg_drop(p->cg);
+ ty = toy_parse_expr_and(p);
+ if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE;
+ cfree_cg_push_int(p->cg, 1, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_true(p->cg, scope);
+ cfree_cg_drop(p->cg);
+ cfree_cg_push_int(p->cg, 0, p->int_type);
+ cfree_cg_scope_end(p->cg, scope);
+ ty = p->int_type;
+ }
+ return ty;
+}
+
+static CfreeCgTypeId toy_parse_expr(ToyParser* p) {
+ return toy_parse_expr_or(p);
+}
+
+/* ============================================================
+ * Statement parsing
+ * ============================================================ */
+
+static int toy_parse_stmt(ToyParser* p);
+
+static int toy_parse_block(ToyParser* p) {
+ if (!toy_parser_expect(p, TOK_LBRACE)) {
+ toy_error(p, p->cur.loc, "expected '{'");
+ return 0;
+ }
+ while (p->cur.kind != TOK_RBRACE && p->cur.kind != TOK_EOF) {
+ if (!toy_parse_stmt(p)) return 0;
+ }
+ if (!toy_parser_expect(p, TOK_RBRACE)) {
+ toy_error(p, p->cur.loc, "expected '}'");
+ return 0;
+ }
+ return 1;
+}
+
+static int toy_parse_let_stmt(ToyParser* p) {
+ CfreeSym name;
+ CfreeCgTypeId ty;
+ CfreeCgSlot slot;
+ toy_parser_advance(p); /* let */
+ if (p->cur.kind != TOK_IDENT) {
+ toy_error(p, p->cur.loc, "expected identifier after 'let'");
return 0;
+ }
+ name = toy_tok_sym(p, p->cur);
toy_parser_advance(p);
- if (!toy_parser_expect(p, TOK_LPAREN)) return 0;
- if (!toy_parser_expect(p, TOK_RPAREN)) return 0;
- if (toy_parser_match(p, TOK_COLON)) {
- if (!toy_parser_match(p, TOK_INT)) return 0;
+ if (!toy_parser_expect(p, TOK_COLON)) {
+ toy_error(p, p->cur.loc, "expected ':' after identifier");
+ return 0;
+ }
+ ty = toy_parse_type(p);
+ if (ty == CFREE_CG_TYPE_NONE) return 0;
+ slot = cfree_cg_local_slot(p->cg, ty, name);
+ if (p->nvars >= TOY_MAX_VARS) {
+ toy_error(p, p->cur.loc, "too many locals");
+ return 0;
+ }
+ p->vars[p->nvars].name = name;
+ p->vars[p->nvars].type = ty;
+ p->vars[p->nvars].slot = slot;
+ p->vars[p->nvars].is_param = 0;
+ p->nvars++;
+
+ if (toy_parser_match(p, TOK_EQ)) {
+ CfreeCgTypeId expr_ty = toy_parse_expr(p);
+ if (expr_ty == CFREE_CG_TYPE_NONE) return 0;
+ if (expr_ty != ty) {
+ toy_error(p, p->cur.loc, "type mismatch in let initializer");
+ return 0;
+ }
+ cfree_cg_push_local(p->cg, slot);
+ cfree_cg_store(p->cg);
+ cfree_cg_drop(p->cg);
}
- if (!toy_parser_expect(p, TOK_LBRACE)) return 0;
- if (!toy_parser_match(p, TOK_RETURN)) return 0;
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after let");
+ return 0;
+ }
+ return 1;
+}
+
+static int toy_parse_if_stmt(ToyParser* p) {
+ CfreeCgScope scope;
+ CfreeCgTypeId cond_ty;
+ toy_parser_advance(p); /* if */
+ cond_ty = toy_parse_expr(p);
+ if (cond_ty == CFREE_CG_TYPE_NONE) return 0;
+ if (cond_ty != p->int_type) {
+ toy_error(p, p->cur.loc, "if condition must be int");
+ return 0;
+ }
+
+ scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE);
+ cfree_cg_push_int(p->cg, 0, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_false(p->cg, scope);
+
+ if (!toy_parse_block(p)) return 0;
+
+ if (p->cur.kind == TOK_ELSE) {
+ cfree_cg_break(p->cg, scope);
+ toy_parser_advance(p); /* else */
+ if (p->cur.kind == TOK_LBRACE) {
+ if (!toy_parse_block(p)) return 0;
+ } else {
+ if (!toy_parse_stmt(p)) return 0;
+ }
+ }
+ cfree_cg_scope_end(p->cg, scope);
+ return 1;
+}
+
+static int toy_parse_while_stmt(ToyParser* p) {
+ CfreeCgScope scope;
+ CfreeCgTypeId cond_ty;
+
+ toy_parser_advance(p); /* while */
+
+ if (p->nscopes >= TOY_MAX_SCOPES) {
+ toy_error(p, p->cur.loc, "too many nested scopes");
+ return 0;
+ }
+ scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE);
+ p->scopes[p->nscopes].cg_scope = scope;
+ p->nscopes++;
+
+ cond_ty = toy_parse_expr(p);
+ if (cond_ty == CFREE_CG_TYPE_NONE) {
+ p->nscopes--;
+ return 0;
+ }
+ if (cond_ty != p->int_type) {
+ toy_error(p, p->cur.loc, "while condition must be int");
+ p->nscopes--;
+ return 0;
+ }
+
+ cfree_cg_push_int(p->cg, 0, p->int_type);
+ cfree_cg_cmp(p->cg, CFREE_CG_NE);
+ cfree_cg_break_false(p->cg, scope);
- /* Optional unary minus for negative return values */
- int neg = 0;
- if (p->cur.kind == TOK_MINUS) {
- neg = 1;
+ if (!toy_parse_block(p)) {
+ p->nscopes--;
+ return 0;
+ }
+
+ cfree_cg_continue(p->cg, scope);
+ cfree_cg_scope_end(p->cg, scope);
+ p->nscopes--;
+ return 1;
+}
+
+static int toy_parse_break_stmt(ToyParser* p) {
+ toy_parser_advance(p); /* break */
+ if (p->nscopes == 0) {
+ toy_error(p, p->cur.loc, "break outside loop");
+ return 0;
+ }
+ cfree_cg_break(p->cg, p->scopes[p->nscopes - 1].cg_scope);
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after break");
+ return 0;
+ }
+ return 1;
+}
+
+static int toy_parse_continue_stmt(ToyParser* p) {
+ toy_parser_advance(p); /* continue */
+ if (p->nscopes == 0) {
+ toy_error(p, p->cur.loc, "continue outside loop");
+ return 0;
+ }
+ cfree_cg_continue(p->cg, p->scopes[p->nscopes - 1].cg_scope);
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after continue");
+ return 0;
+ }
+ return 1;
+}
+
+static int toy_parse_return_stmt(ToyParser* p) {
+ CfreeCgTypeId ty;
+ toy_parser_advance(p); /* return */
+ if (p->cur.kind == TOK_SEMI) {
toy_parser_advance(p);
+ if (p->cur_fn_ret != p->types.void_) {
+ toy_error(p, p->cur.loc, "return without value in non-void function");
+ return 0;
+ }
+ cfree_cg_ret_void(p->cg);
+ return 1;
+ }
+ ty = toy_parse_expr(p);
+ if (ty == CFREE_CG_TYPE_NONE) return 0;
+ if (ty != p->cur_fn_ret) {
+ toy_error(p, p->cur.loc, "return type mismatch");
+ return 0;
+ }
+ cfree_cg_ret(p->cg);
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after return");
+ return 0;
}
- if (p->cur.kind != TOK_NUMBER) return 0;
- p->value = neg ? -p->cur.int_value : p->cur.int_value;
+ return 1;
+}
+
+static int toy_parse_expr_stmt(ToyParser* p) {
+ CfreeCgTypeId ty = toy_parse_expr(p);
+ if (ty == CFREE_CG_TYPE_NONE) return 0;
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after expression");
+ return 0;
+ }
+ cfree_cg_drop(p->cg);
+ return 1;
+}
+
+static int toy_parse_stmt(ToyParser* p) {
+ toy_set_loc(p);
+ if (p->cur.kind == TOK_LET) return toy_parse_let_stmt(p);
+ if (p->cur.kind == TOK_IF) return toy_parse_if_stmt(p);
+ if (p->cur.kind == TOK_WHILE) return toy_parse_while_stmt(p);
+ if (p->cur.kind == TOK_BREAK) return toy_parse_break_stmt(p);
+ if (p->cur.kind == TOK_CONTINUE) return toy_parse_continue_stmt(p);
+ if (p->cur.kind == TOK_RETURN) return toy_parse_return_stmt(p);
+ if (p->cur.kind == TOK_LBRACE) return toy_parse_block(p);
+
+ /* Assignment or expression statement */
+ if (p->cur.kind == TOK_IDENT &&
+ toy_lexer_peek(&p->lex).kind == TOK_EQ) {
+ CfreeSym name = toy_tok_sym(p, p->cur);
+ ToyVar* v = toy_find_var(p, name);
+ if (!v) {
+ toy_error(p, p->cur.loc, "undefined variable in assignment");
+ return 0;
+ }
+ toy_parser_advance(p); /* ident */
+ toy_parser_advance(p); /* = */
+ CfreeCgTypeId expr_ty = toy_parse_expr(p);
+ if (expr_ty == CFREE_CG_TYPE_NONE) return 0;
+ if (expr_ty != v->type) {
+ toy_error(p, p->cur.loc, "type mismatch in assignment");
+ return 0;
+ }
+ cfree_cg_push_local(p->cg, v->slot);
+ cfree_cg_store(p->cg);
+ cfree_cg_drop(p->cg);
+ if (!toy_parser_expect(p, TOK_SEMI)) {
+ toy_error(p, p->cur.loc, "expected ';' after assignment");
+ return 0;
+ }
+ return 1;
+ }
+
+ return toy_parse_expr_stmt(p);
+}
+
+/* ============================================================
+ * Function parsing
+ * ============================================================ */
+
+static int toy_parse_fn(ToyParser* p) {
+ CfreeSym name;
+ CfreeCgTypeId ret_type;
+ CfreeCgTypeId param_types[TOY_MAX_PARAMS];
+ CfreeSym param_names[TOY_MAX_PARAMS];
+ size_t nparams = 0;
+ CfreeCgDeclAttrs attrs;
+ CfreeCgTypeId fn_ty;
+ ToyFn* fn_entry;
+ size_t i;
+
+ if (!toy_parser_match(p, TOK_FN)) return 0;
+
+ if (p->cur.kind != TOK_IDENT) {
+ toy_error(p, p->cur.loc, "expected function name");
+ return -1;
+ }
+ name = toy_tok_sym(p, p->cur);
toy_parser_advance(p);
- if (!toy_parser_expect(p, TOK_SEMI)) return 0;
- if (!toy_parser_expect(p, TOK_RBRACE)) return 0;
- return toy_parser_at_end(p);
+ if (!toy_parser_expect(p, TOK_LPAREN)) {
+ toy_error(p, p->cur.loc, "expected '(' after function name");
+ return -1;
+ }
+
+ if (p->cur.kind != TOK_RPAREN) {
+ for (;;) {
+ if (p->cur.kind != TOK_IDENT) {
+ toy_error(p, p->cur.loc, "expected parameter name");
+ return -1;
+ }
+ if (nparams >= TOY_MAX_PARAMS) {
+ toy_error(p, p->cur.loc, "too many parameters");
+ return -1;
+ }
+ param_names[nparams] = toy_tok_sym(p, p->cur);
+ toy_parser_advance(p);
+ if (!toy_parser_expect(p, TOK_COLON)) {
+ toy_error(p, p->cur.loc, "expected ':' after parameter name");
+ return -1;
+ }
+ param_types[nparams] = toy_parse_type(p);
+ if (param_types[nparams] == CFREE_CG_TYPE_NONE) return -1;
+ nparams++;
+ if (p->cur.kind == TOK_COMMA) {
+ toy_parser_advance(p);
+ } else {
+ break;
+ }
+ }
+ }
+ if (!toy_parser_expect(p, TOK_RPAREN)) {
+ toy_error(p, p->cur.loc, "expected ')' after parameters");
+ return -1;
+ }
+
+ if (toy_parser_match(p, TOK_COLON)) {
+ ret_type = toy_parse_type(p);
+ if (ret_type == CFREE_CG_TYPE_NONE) return -1;
+ } else {
+ ret_type = p->types.void_;
+ }
+
+ /* Build function type */
+ fn_ty = cfree_cg_type_func(p->c, ret_type, param_types, (uint32_t)nparams, 0);
+ if (fn_ty == CFREE_CG_TYPE_NONE) {
+ toy_error(p, p->cur.loc, "failed to create function type");
+ return -1;
+ }
+
+ /* Register function for recursion and later calls */
+ if (p->nfns >= TOY_MAX_FNS) {
+ toy_error(p, p->cur.loc, "too many functions");
+ return -1;
+ }
+ fn_entry = &p->fns[p->nfns];
+ fn_entry->name = name;
+ fn_entry->type = fn_ty;
+ fn_entry->ret = ret_type;
+ fn_entry->nparams = nparams;
+ for (i = 0; i < nparams; i++) fn_entry->params[i] = param_types[i];
+ p->nfns++;
+
+ /* Emit function */
+ memset(&attrs, 0, sizeof(attrs));
+ attrs.bind = CFREE_SB_GLOBAL;
+ attrs.visibility = CFREE_CG_VIS_DEFAULT;
+ attrs.tls_model = CFREE_CG_TLS_DEFAULT;
+ attrs.flags = CFREE_CG_DECL_DEFINED;
+ cfree_cg_func_begin(p->cg, name, fn_ty, attrs);
+
+ /* Setup parameter slots */
+ p->nvars = 0;
+ p->cur_fn_ret = ret_type;
+ for (i = 0; i < nparams; i++) {
+ CfreeCgSlot slot =
+ cfree_cg_param_slot(p->cg, (uint32_t)i, param_types[i], param_names[i]);
+ if (p->nvars >= TOY_MAX_VARS) {
+ toy_error(p, p->cur.loc, "too many vars");
+ return -1;
+ }
+ p->vars[p->nvars].name = param_names[i];
+ p->vars[p->nvars].type = param_types[i];
+ p->vars[p->nvars].slot = slot;
+ p->vars[p->nvars].is_param = 1;
+ p->nvars++;
+ }
+
+ if (!toy_parse_block(p)) return -1;
+
+ /* Implicit return for void functions */
+ if (ret_type == p->types.void_) {
+ cfree_cg_ret_void(p->cg);
+ }
+
+ cfree_cg_func_end(p->cg);
+ p->nvars = 0;
+ p->cur_fn_ret = p->types.void_;
+ return 1;
+}
+
+/* ============================================================
+ * Program parsing
+ * ============================================================ */
+
+static int toy_parse_program(ToyParser* p) {
+ while (p->cur.kind != TOK_EOF) {
+ int r = toy_parse_fn(p);
+ if (r < 0) return 0;
+ if (r == 0) {
+ toy_error(p, p->cur.loc, "expected function declaration");
+ return 0;
+ }
+ }
+ return 1;
}
+/* ============================================================
+ * Public entry point
+ * ============================================================ */
+
int cfree_toy_compile(CfreeCompiler* c, const CfreeCompileOptions* opts,
const CfreeBytesInput* input, CfreeObjBuilder* out) {
ToyParser p;
- const uint8_t* source;
CfreeCg* cg;
- CfreeCgBuiltinTypes types;
- CfreeCgTypeId fn_ty;
- CfreeSym main_sym;
- CfreeCgDeclAttrs main_attrs;
+ const uint8_t* source;
(void)opts;
if (!c || !input || !out) return 1;
source = input->data ? input->data : (const uint8_t*)"";
- toy_parser_init(&p, source, input->len);
- if (!toy_parse_main(&p)) return 1;
-
- main_attrs.bind = CFREE_SB_GLOBAL;
- main_attrs.visibility = CFREE_CG_VIS_DEFAULT;
- main_attrs.tls_model = CFREE_CG_TLS_DEFAULT;
- main_attrs.section = 0;
- main_attrs.align = 0;
- main_attrs.flags = CFREE_CG_DECL_DEFINED;
- main_sym = cfree_sym_intern(c, "main");
cg = cfree_cg_new(c, out);
- types = cfree_cg_builtin_types(c);
- fn_ty = cfree_cg_type_func(c, types.i32, NULL, 0, 0);
- cfree_cg_func_begin(cg, main_sym, fn_ty, main_attrs);
- cfree_cg_push_int(cg, p.value, types.i32);
- cfree_cg_ret(cg);
- cfree_cg_func_end(cg);
+ if (!cg) return 1;
+
+ toy_parser_init(&p, c, cg, source, input->len);
+ if (!toy_parse_program(&p)) {
+ cfree_cg_free(cg);
+ return 1;
+ }
+ if (p.cur.kind != TOK_EOF) {
+ toy_error(&p, p.cur.loc, "unexpected token after program end");
+ cfree_cg_free(cg);
+ return 1;
+ }
+
cfree_cg_free(cg);
return 0;
}