kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 0b6eaea8eb3bdcdbc9a6ce0e51d95c3da1385ff3
parent 001f39c2a663a220893629d9ee90590e70504874
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 12 May 2026 19:00:46 -0700

toy: use structured scopes, unop, fix call stack order and store drops

Diffstat:
Mlang/toy/toy.c | 938++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 880 insertions(+), 58 deletions(-)

diff --git a/lang/toy/toy.c b/lang/toy/toy.c @@ -1,8 +1,71 @@ #include "toy.h" #include <cfree/cg.h> +#include <stdarg.h> #include <stddef.h> #include <stdint.h> +#include <string.h> + +/* ============================================================ + * CG API coverage checklist — APIs NOT yet exercised by toy: + * + * Types: + * cfree_cg_type_array + * cfree_cg_type_qualified + * cfree_cg_type_alias + * cfree_cg_type_record + * cfree_cg_type_enum + * cfree_cg_type_is_ptr / ptr_pointee + * cfree_cg_type_is_func / func_ret / func_nparams / func_param + * cfree_cg_type_is_record / record_nfields / record_field + * cfree_cg_type_size / type_align + * + * Functions & data: + * cfree_cg_func_decl + * cfree_cg_data_decl / data_begin / data_bytes / data_zero + * cfree_cg_data_symbol / data_end + * + * Values: + * cfree_cg_push_float + * cfree_cg_push_bytes + * cfree_cg_push_symbol (non-ADDR kinds: PCREL, GOT, PLT, TLS_*) + * cfree_cg_alloca + * + * Stack: + * cfree_cg_swap + * cfree_cg_rot3 + * cfree_cg_addr (only used for &var, not for general lvalues) + * + * Arithmetic: + * cfree_cg_binop (only ADD, SUB, MUL, SDIV, SREM used) + * cfree_cg_unop (NEG, NOT used; BNOT unused) + * cfree_cg_cmp (only EQ, NE, LT_S, GT_S, LE_S, GE_S used) + * cfree_cg_convert (only i1→int used) + * + * Scopes & control flow: + * cfree_cg_break_true (only with result_type != NONE) + * cfree_cg_continue_true / continue_false + * cfree_cg_label_new / label_place / jump / branch_true / branch_false + * + * Calls: + * cfree_cg_tail_call + * + * Intrinsics: + * cfree_cg_intrinsic (all) + * + * Atomics: + * cfree_cg_atomic_load / atomic_store / atomic_rmw + * cfree_cg_atomic_cmpxchg / atomic_fence + * + * Variadics: + * cfree_cg_va_start / va_arg / va_end / va_copy + * + * Memory: + * cfree_cg_memcpy / memset / index / field_addr + * + * Inline asm: + * cfree_cg_inline_asm + * ============================================================ */ /* ============================================================ * Lexer / token iterator @@ -54,16 +117,16 @@ typedef enum ToyTokenKind { typedef struct ToyToken { ToyTokenKind kind; - CfreeSrcLoc loc; /* line/col within the source buffer */ - const uint8_t* text; /* points into source buffer */ + CfreeSrcLoc loc; + const uint8_t* text; size_t text_len; - int64_t int_value; /* valid when kind == TOK_NUMBER */ + int64_t int_value; } ToyToken; typedef struct ToyLexer { const uint8_t* cur; const uint8_t* end; - const uint8_t* bol; /* beginning of current line */ + const uint8_t* bol; uint32_t line; } ToyLexer; @@ -102,7 +165,7 @@ static void toy_skip_ws(ToyLexer* lex) { } static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind, - const uint8_t* start) { + const uint8_t* start) { ToyToken tok; tok.kind = kind; tok.loc.file_id = 0; @@ -114,7 +177,6 @@ static ToyToken toy_lexer_emit(ToyLexer* lex, ToyTokenKind kind, return tok; } -/* Return the next token from the iterator. Always makes forward progress. */ static ToyToken toy_lexer_next(ToyLexer* lex) { const uint8_t* start; ToyToken tok; @@ -134,7 +196,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) { uint8_t c = *lex->cur++; - /* Single-char tokens and multi-char operators */ switch (c) { case '(': return toy_lexer_emit(lex, TOK_LPAREN, start); @@ -208,7 +269,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) { return toy_lexer_emit(lex, TOK_DOT, start); } - /* Number literal */ if (toy_is_digit(c)) { int64_t v = (int64_t)(c - '0'); while (lex->cur < lex->end && toy_is_digit(*lex->cur)) { @@ -220,7 +280,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) { return tok; } - /* Identifier / keyword */ if (toy_is_alpha(c)) { while (lex->cur < lex->end && toy_is_alnum(*lex->cur)) lex->cur++; size_t len = (size_t)(lex->cur - start); @@ -256,7 +315,6 @@ static ToyToken toy_lexer_next(ToyLexer* lex) { return toy_lexer_emit(lex, kind, start); } - /* String literal */ if (c == '"') { while (lex->cur < lex->end && *lex->cur != '"') { if (*lex->cur == '\n') toy_lexer_advance_line(lex); @@ -266,24 +324,81 @@ static ToyToken toy_lexer_next(ToyLexer* lex) { return toy_lexer_emit(lex, TOK_STRING, start); } - /* Unknown character */ return toy_lexer_emit(lex, TOK_EOF, start); } +/* Peek at the next token without consuming. */ +static ToyToken toy_lexer_peek(const ToyLexer* lex) { + ToyLexer tmp = *lex; + return toy_lexer_next(&tmp); +} + /* ============================================================ - * Parser (consumes token iterator) + * Parser (single-pass parse -> codegen) * ============================================================ */ +#define TOY_MAX_VARS 64 +#define TOY_MAX_FNS 32 +#define TOY_MAX_SCOPES 16 +#define TOY_MAX_PARAMS 16 + +typedef struct ToyVar { + CfreeSym name; + CfreeCgTypeId type; + CfreeCgSlot slot; + int is_param; +} ToyVar; + +typedef struct ToyFn { + CfreeSym name; + CfreeCgTypeId type; + CfreeCgTypeId ret; + CfreeCgTypeId params[TOY_MAX_PARAMS]; + size_t nparams; +} ToyFn; + +typedef struct ToyScope { + CfreeCgScope cg_scope; +} ToyScope; + typedef struct ToyParser { ToyLexer lex; - ToyToken cur; /* current token under inspection */ - int64_t value; /* last parsed integer literal */ + ToyToken cur; + CfreeCompiler* c; + CfreeCg* cg; + CfreeCgBuiltinTypes types; + CfreeCgTypeId int_type; + CfreeCgTypeId int_ptr_type; + + ToyVar vars[TOY_MAX_VARS]; + size_t nvars; + + ToyFn fns[TOY_MAX_FNS]; + size_t nfns; + + ToyScope scopes[TOY_MAX_SCOPES]; + size_t nscopes; + + CfreeCgTypeId cur_fn_ret; + CfreeDiagSink* diag; + int has_error; } ToyParser; -static void toy_parser_init(ToyParser* p, const uint8_t* data, size_t len) { +static void toy_parser_init(ToyParser* p, CfreeCompiler* c, CfreeCg* cg, + const uint8_t* data, size_t len) { toy_lexer_init(&p->lex, data, len); p->cur = toy_lexer_next(&p->lex); - p->value = 0; + p->c = c; + p->cg = cg; + p->types = cfree_cg_builtin_types(c); + p->int_type = p->types.isize; + p->int_ptr_type = cfree_cg_type_ptr(c, p->int_type); + p->nvars = 0; + p->nfns = 0; + p->nscopes = 0; + p->cur_fn_ret = p->types.void_; + p->diag = cfree_compiler_diag_sink(c); + p->has_error = 0; } static void toy_parser_advance(ToyParser* p) { @@ -306,69 +421,776 @@ static int toy_parser_expect(ToyParser* p, ToyTokenKind kind) { return 0; } -static int toy_parser_at_end(ToyParser* p) { return p->cur.kind == TOK_EOF; } +static void toy_error(ToyParser* p, CfreeSrcLoc loc, const char* fmt, ...) { + va_list ap; + p->has_error = 1; + if (!p->diag) return; + va_start(ap, fmt); + p->diag->emit(p->diag, CFREE_DIAG_ERROR, loc, fmt, ap); + va_end(ap); +} -static int toy_parse_main(ToyParser* p) { - if (!toy_parser_match(p, TOK_FN)) return 0; - if (p->cur.kind != TOK_IDENT || p->cur.text_len != 4 || - p->cur.text[0] != 'm' || p->cur.text[1] != 'a' || p->cur.text[2] != 'i' || - p->cur.text[3] != 'n') +static void toy_set_loc(ToyParser* p) { + if (p->cg) cfree_cg_set_loc(p->cg, p->cur.loc); +} + +static CfreeSym toy_tok_sym(ToyParser* p, ToyToken tok) { + char buf[64]; + if (tok.text_len >= sizeof(buf)) { + toy_error(p, tok.loc, "identifier too long"); + return 0; + } + memcpy(buf, tok.text, tok.text_len); + buf[tok.text_len] = '\0'; + return cfree_sym_intern(p->c, buf); +} + +/* ============================================================ + * Symbol tables + * ============================================================ */ + +static ToyVar* toy_find_var(ToyParser* p, CfreeSym name) { + size_t i; + for (i = p->nvars; i > 0; --i) { + if (p->vars[i - 1].name == name) return &p->vars[i - 1]; + } + return NULL; +} + +static ToyFn* toy_find_fn(ToyParser* p, CfreeSym name) { + size_t i; + for (i = p->nfns; i > 0; --i) { + if (p->fns[i - 1].name == name) return &p->fns[i - 1]; + } + return NULL; +} + +/* ============================================================ + * Type parsing + * ============================================================ */ + +static CfreeCgTypeId toy_parse_type(ToyParser* p) { + if (toy_parser_match(p, TOK_INT)) { + return p->int_type; + } + if (toy_parser_match(p, TOK_STAR)) { + CfreeCgTypeId pointee = toy_parse_type(p); + if (pointee == CFREE_CG_TYPE_NONE) { + toy_error(p, p->cur.loc, "expected type after '*'"); + return CFREE_CG_TYPE_NONE; + } + return cfree_cg_type_ptr(p->c, pointee); + } + toy_error(p, p->cur.loc, "expected type"); + return CFREE_CG_TYPE_NONE; +} + +/* ============================================================ + * Expression parsing + * ============================================================ */ + +static CfreeCgTypeId toy_parse_expr(ToyParser* p); + +static CfreeCgTypeId toy_parse_expr_primary(ToyParser* p) { + toy_set_loc(p); + if (p->cur.kind == TOK_NUMBER) { + cfree_cg_push_int(p->cg, p->cur.int_value, p->int_type); + toy_parser_advance(p); + return p->int_type; + } + + if (p->cur.kind == TOK_IDENT) { + CfreeSym name = toy_tok_sym(p, p->cur); + ToyToken ident_tok = p->cur; + toy_parser_advance(p); + + if (p->cur.kind == TOK_LPAREN) { + /* Function call */ + ToyFn* fn = toy_find_fn(p, name); + if (!fn) { + toy_error(p, ident_tok.loc, "undefined function '%s'", + (const char*)ident_tok.text); + return CFREE_CG_TYPE_NONE; + } + toy_parser_advance(p); /* ( */ + + cfree_cg_push_symbol(p->cg, name, fn->type, CFREE_CG_SYMREF_ADDR, 0); + + CfreeCgTypeId arg_types[TOY_MAX_PARAMS]; + size_t nargs = 0; + if (p->cur.kind != TOK_RPAREN) { + for (;;) { + if (nargs >= TOY_MAX_PARAMS) { + toy_error(p, p->cur.loc, "too many arguments"); + return CFREE_CG_TYPE_NONE; + } + CfreeCgTypeId arg_ty = toy_parse_expr(p); + if (arg_ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + arg_types[nargs++] = arg_ty; + if (p->cur.kind == TOK_COMMA) { + toy_parser_advance(p); + } else { + break; + } + } + } + if (!toy_parser_expect(p, TOK_RPAREN)) { + toy_error(p, p->cur.loc, "expected ')' after arguments"); + return CFREE_CG_TYPE_NONE; + } + + /* Verify argument count */ + if (nargs != fn->nparams) { + toy_error(p, ident_tok.loc, + "function '%s' expects %zu arguments, got %zu", + (const char*)ident_tok.text, fn->nparams, nargs); + return CFREE_CG_TYPE_NONE; + } + + cfree_cg_call(p->cg, (uint32_t)nargs, fn->type); + return fn->ret; + } + + /* Variable reference */ + ToyVar* v = toy_find_var(p, name); + if (!v) { + toy_error(p, ident_tok.loc, "undefined variable '%s'", + (const char*)ident_tok.text); + return CFREE_CG_TYPE_NONE; + } + cfree_cg_push_local(p->cg, v->slot); + cfree_cg_load(p->cg); + return v->type; + } + + if (p->cur.kind == TOK_LPAREN) { + toy_parser_advance(p); + CfreeCgTypeId ty = toy_parse_expr(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (!toy_parser_expect(p, TOK_RPAREN)) { + toy_error(p, p->cur.loc, "expected ')'"); + return CFREE_CG_TYPE_NONE; + } + return ty; + } + + toy_error(p, p->cur.loc, "expected expression"); + return CFREE_CG_TYPE_NONE; +} + +static CfreeCgTypeId toy_parse_expr_unary(ToyParser* p) { + toy_set_loc(p); + if (toy_parser_match(p, TOK_MINUS)) { + CfreeCgTypeId ty = toy_parse_expr_unary(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != p->int_type) { + toy_error(p, p->cur.loc, "invalid operand for unary '-'"); + return CFREE_CG_TYPE_NONE; + } + cfree_cg_unop(p->cg, CFREE_CG_NEG); + return p->int_type; + } + + if (toy_parser_match(p, TOK_BANG)) { + CfreeCgTypeId ty = toy_parse_expr_unary(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != p->int_type) { + toy_error(p, p->cur.loc, "invalid operand for '!'"); + return CFREE_CG_TYPE_NONE; + } + cfree_cg_unop(p->cg, CFREE_CG_NOT); + return p->int_type; + } + + if (toy_parser_match(p, TOK_AMPERSAND)) { + if (p->cur.kind != TOK_IDENT) { + toy_error(p, p->cur.loc, "expected identifier after '&'"); + return CFREE_CG_TYPE_NONE; + } + CfreeSym name = toy_tok_sym(p, p->cur); + ToyVar* v = toy_find_var(p, name); + if (!v) { + toy_error(p, p->cur.loc, "undefined variable"); + return CFREE_CG_TYPE_NONE; + } + toy_parser_advance(p); + cfree_cg_push_local(p->cg, v->slot); + cfree_cg_addr(p->cg); + return p->int_ptr_type; + } + + if (toy_parser_match(p, TOK_STAR)) { + CfreeCgTypeId ty = toy_parse_expr_unary(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != p->int_ptr_type) { + toy_error(p, p->cur.loc, "cannot dereference non-pointer"); + return CFREE_CG_TYPE_NONE; + } + cfree_cg_load(p->cg); + return p->int_type; + } + + return toy_parse_expr_primary(p); +} + +static CfreeCgTypeId toy_parse_expr_mul(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr_unary(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + while (p->cur.kind == TOK_STAR || p->cur.kind == TOK_SLASH || + p->cur.kind == TOK_PERCENT) { + ToyTokenKind op = p->cur.kind; + CfreeCgBinOp binop; + toy_parser_advance(p); + CfreeCgTypeId ty2 = toy_parse_expr_unary(p); + if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != p->int_type || ty2 != p->int_type) { + toy_error(p, p->cur.loc, "arithmetic operands must be int"); + return CFREE_CG_TYPE_NONE; + } + switch (op) { + case TOK_STAR: + binop = CFREE_CG_MUL; + break; + case TOK_SLASH: + binop = CFREE_CG_SDIV; + break; + case TOK_PERCENT: + binop = CFREE_CG_SREM; + break; + default: + return CFREE_CG_TYPE_NONE; + } + cfree_cg_binop(p->cg, binop); + } + return ty; +} + +static CfreeCgTypeId toy_parse_expr_add(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr_mul(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + while (p->cur.kind == TOK_PLUS || p->cur.kind == TOK_MINUS) { + ToyTokenKind op = p->cur.kind; + CfreeCgBinOp binop = (op == TOK_PLUS) ? CFREE_CG_ADD : CFREE_CG_SUB; + toy_parser_advance(p); + CfreeCgTypeId ty2 = toy_parse_expr_mul(p); + if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != p->int_type || ty2 != p->int_type) { + toy_error(p, p->cur.loc, "arithmetic operands must be int"); + return CFREE_CG_TYPE_NONE; + } + cfree_cg_binop(p->cg, binop); + } + return ty; +} + +static CfreeCgTypeId toy_parse_expr_cmp(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr_add(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (p->cur.kind == TOK_EQEQ || p->cur.kind == TOK_NE || + p->cur.kind == TOK_LT || p->cur.kind == TOK_GT || + p->cur.kind == TOK_LE || p->cur.kind == TOK_GE) { + ToyTokenKind op = p->cur.kind; + CfreeCgCmpOp cmp; + toy_parser_advance(p); + CfreeCgTypeId ty2 = toy_parse_expr_add(p); + if (ty2 == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + if (ty != ty2) { + toy_error(p, p->cur.loc, "comparison operands must have same type"); + return CFREE_CG_TYPE_NONE; + } + switch (op) { + case TOK_EQEQ: + cmp = CFREE_CG_EQ; + break; + case TOK_NE: + cmp = CFREE_CG_NE; + break; + case TOK_LT: + cmp = CFREE_CG_LT_S; + break; + case TOK_GT: + cmp = CFREE_CG_GT_S; + break; + case TOK_LE: + cmp = CFREE_CG_LE_S; + break; + case TOK_GE: + cmp = CFREE_CG_GE_S; + break; + default: + return CFREE_CG_TYPE_NONE; + } + cfree_cg_cmp(p->cg, cmp); + cfree_cg_convert(p->cg, p->int_type); + ty = p->int_type; + } + return ty; +} + +static CfreeCgTypeId toy_parse_expr_and(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr_cmp(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + while (p->cur.kind == TOK_ANDAND) { + CfreeCgScope scope; + toy_parser_advance(p); + scope = cfree_cg_scope_begin(p->cg, p->int_type); + cfree_cg_push_int(p->cg, 0, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_false(p->cg, scope); + cfree_cg_drop(p->cg); + ty = toy_parse_expr_cmp(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + cfree_cg_push_int(p->cg, 0, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_false(p->cg, scope); + cfree_cg_drop(p->cg); + cfree_cg_push_int(p->cg, 1, p->int_type); + cfree_cg_scope_end(p->cg, scope); + ty = p->int_type; + } + return ty; +} + +static CfreeCgTypeId toy_parse_expr_or(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr_and(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + while (p->cur.kind == TOK_PIPEPIPE) { + CfreeCgScope scope; + toy_parser_advance(p); + scope = cfree_cg_scope_begin(p->cg, p->int_type); + cfree_cg_push_int(p->cg, 1, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_true(p->cg, scope); + cfree_cg_drop(p->cg); + ty = toy_parse_expr_and(p); + if (ty == CFREE_CG_TYPE_NONE) return CFREE_CG_TYPE_NONE; + cfree_cg_push_int(p->cg, 1, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_true(p->cg, scope); + cfree_cg_drop(p->cg); + cfree_cg_push_int(p->cg, 0, p->int_type); + cfree_cg_scope_end(p->cg, scope); + ty = p->int_type; + } + return ty; +} + +static CfreeCgTypeId toy_parse_expr(ToyParser* p) { + return toy_parse_expr_or(p); +} + +/* ============================================================ + * Statement parsing + * ============================================================ */ + +static int toy_parse_stmt(ToyParser* p); + +static int toy_parse_block(ToyParser* p) { + if (!toy_parser_expect(p, TOK_LBRACE)) { + toy_error(p, p->cur.loc, "expected '{'"); + return 0; + } + while (p->cur.kind != TOK_RBRACE && p->cur.kind != TOK_EOF) { + if (!toy_parse_stmt(p)) return 0; + } + if (!toy_parser_expect(p, TOK_RBRACE)) { + toy_error(p, p->cur.loc, "expected '}'"); + return 0; + } + return 1; +} + +static int toy_parse_let_stmt(ToyParser* p) { + CfreeSym name; + CfreeCgTypeId ty; + CfreeCgSlot slot; + toy_parser_advance(p); /* let */ + if (p->cur.kind != TOK_IDENT) { + toy_error(p, p->cur.loc, "expected identifier after 'let'"); return 0; + } + name = toy_tok_sym(p, p->cur); toy_parser_advance(p); - if (!toy_parser_expect(p, TOK_LPAREN)) return 0; - if (!toy_parser_expect(p, TOK_RPAREN)) return 0; - if (toy_parser_match(p, TOK_COLON)) { - if (!toy_parser_match(p, TOK_INT)) return 0; + if (!toy_parser_expect(p, TOK_COLON)) { + toy_error(p, p->cur.loc, "expected ':' after identifier"); + return 0; + } + ty = toy_parse_type(p); + if (ty == CFREE_CG_TYPE_NONE) return 0; + slot = cfree_cg_local_slot(p->cg, ty, name); + if (p->nvars >= TOY_MAX_VARS) { + toy_error(p, p->cur.loc, "too many locals"); + return 0; + } + p->vars[p->nvars].name = name; + p->vars[p->nvars].type = ty; + p->vars[p->nvars].slot = slot; + p->vars[p->nvars].is_param = 0; + p->nvars++; + + if (toy_parser_match(p, TOK_EQ)) { + CfreeCgTypeId expr_ty = toy_parse_expr(p); + if (expr_ty == CFREE_CG_TYPE_NONE) return 0; + if (expr_ty != ty) { + toy_error(p, p->cur.loc, "type mismatch in let initializer"); + return 0; + } + cfree_cg_push_local(p->cg, slot); + cfree_cg_store(p->cg); + cfree_cg_drop(p->cg); } - if (!toy_parser_expect(p, TOK_LBRACE)) return 0; - if (!toy_parser_match(p, TOK_RETURN)) return 0; + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after let"); + return 0; + } + return 1; +} + +static int toy_parse_if_stmt(ToyParser* p) { + CfreeCgScope scope; + CfreeCgTypeId cond_ty; + toy_parser_advance(p); /* if */ + cond_ty = toy_parse_expr(p); + if (cond_ty == CFREE_CG_TYPE_NONE) return 0; + if (cond_ty != p->int_type) { + toy_error(p, p->cur.loc, "if condition must be int"); + return 0; + } + + scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE); + cfree_cg_push_int(p->cg, 0, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_false(p->cg, scope); + + if (!toy_parse_block(p)) return 0; + + if (p->cur.kind == TOK_ELSE) { + cfree_cg_break(p->cg, scope); + toy_parser_advance(p); /* else */ + if (p->cur.kind == TOK_LBRACE) { + if (!toy_parse_block(p)) return 0; + } else { + if (!toy_parse_stmt(p)) return 0; + } + } + cfree_cg_scope_end(p->cg, scope); + return 1; +} + +static int toy_parse_while_stmt(ToyParser* p) { + CfreeCgScope scope; + CfreeCgTypeId cond_ty; + + toy_parser_advance(p); /* while */ + + if (p->nscopes >= TOY_MAX_SCOPES) { + toy_error(p, p->cur.loc, "too many nested scopes"); + return 0; + } + scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE); + p->scopes[p->nscopes].cg_scope = scope; + p->nscopes++; + + cond_ty = toy_parse_expr(p); + if (cond_ty == CFREE_CG_TYPE_NONE) { + p->nscopes--; + return 0; + } + if (cond_ty != p->int_type) { + toy_error(p, p->cur.loc, "while condition must be int"); + p->nscopes--; + return 0; + } + + cfree_cg_push_int(p->cg, 0, p->int_type); + cfree_cg_cmp(p->cg, CFREE_CG_NE); + cfree_cg_break_false(p->cg, scope); - /* Optional unary minus for negative return values */ - int neg = 0; - if (p->cur.kind == TOK_MINUS) { - neg = 1; + if (!toy_parse_block(p)) { + p->nscopes--; + return 0; + } + + cfree_cg_continue(p->cg, scope); + cfree_cg_scope_end(p->cg, scope); + p->nscopes--; + return 1; +} + +static int toy_parse_break_stmt(ToyParser* p) { + toy_parser_advance(p); /* break */ + if (p->nscopes == 0) { + toy_error(p, p->cur.loc, "break outside loop"); + return 0; + } + cfree_cg_break(p->cg, p->scopes[p->nscopes - 1].cg_scope); + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after break"); + return 0; + } + return 1; +} + +static int toy_parse_continue_stmt(ToyParser* p) { + toy_parser_advance(p); /* continue */ + if (p->nscopes == 0) { + toy_error(p, p->cur.loc, "continue outside loop"); + return 0; + } + cfree_cg_continue(p->cg, p->scopes[p->nscopes - 1].cg_scope); + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after continue"); + return 0; + } + return 1; +} + +static int toy_parse_return_stmt(ToyParser* p) { + CfreeCgTypeId ty; + toy_parser_advance(p); /* return */ + if (p->cur.kind == TOK_SEMI) { toy_parser_advance(p); + if (p->cur_fn_ret != p->types.void_) { + toy_error(p, p->cur.loc, "return without value in non-void function"); + return 0; + } + cfree_cg_ret_void(p->cg); + return 1; + } + ty = toy_parse_expr(p); + if (ty == CFREE_CG_TYPE_NONE) return 0; + if (ty != p->cur_fn_ret) { + toy_error(p, p->cur.loc, "return type mismatch"); + return 0; + } + cfree_cg_ret(p->cg); + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after return"); + return 0; } - if (p->cur.kind != TOK_NUMBER) return 0; - p->value = neg ? -p->cur.int_value : p->cur.int_value; + return 1; +} + +static int toy_parse_expr_stmt(ToyParser* p) { + CfreeCgTypeId ty = toy_parse_expr(p); + if (ty == CFREE_CG_TYPE_NONE) return 0; + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after expression"); + return 0; + } + cfree_cg_drop(p->cg); + return 1; +} + +static int toy_parse_stmt(ToyParser* p) { + toy_set_loc(p); + if (p->cur.kind == TOK_LET) return toy_parse_let_stmt(p); + if (p->cur.kind == TOK_IF) return toy_parse_if_stmt(p); + if (p->cur.kind == TOK_WHILE) return toy_parse_while_stmt(p); + if (p->cur.kind == TOK_BREAK) return toy_parse_break_stmt(p); + if (p->cur.kind == TOK_CONTINUE) return toy_parse_continue_stmt(p); + if (p->cur.kind == TOK_RETURN) return toy_parse_return_stmt(p); + if (p->cur.kind == TOK_LBRACE) return toy_parse_block(p); + + /* Assignment or expression statement */ + if (p->cur.kind == TOK_IDENT && + toy_lexer_peek(&p->lex).kind == TOK_EQ) { + CfreeSym name = toy_tok_sym(p, p->cur); + ToyVar* v = toy_find_var(p, name); + if (!v) { + toy_error(p, p->cur.loc, "undefined variable in assignment"); + return 0; + } + toy_parser_advance(p); /* ident */ + toy_parser_advance(p); /* = */ + CfreeCgTypeId expr_ty = toy_parse_expr(p); + if (expr_ty == CFREE_CG_TYPE_NONE) return 0; + if (expr_ty != v->type) { + toy_error(p, p->cur.loc, "type mismatch in assignment"); + return 0; + } + cfree_cg_push_local(p->cg, v->slot); + cfree_cg_store(p->cg); + cfree_cg_drop(p->cg); + if (!toy_parser_expect(p, TOK_SEMI)) { + toy_error(p, p->cur.loc, "expected ';' after assignment"); + return 0; + } + return 1; + } + + return toy_parse_expr_stmt(p); +} + +/* ============================================================ + * Function parsing + * ============================================================ */ + +static int toy_parse_fn(ToyParser* p) { + CfreeSym name; + CfreeCgTypeId ret_type; + CfreeCgTypeId param_types[TOY_MAX_PARAMS]; + CfreeSym param_names[TOY_MAX_PARAMS]; + size_t nparams = 0; + CfreeCgDeclAttrs attrs; + CfreeCgTypeId fn_ty; + ToyFn* fn_entry; + size_t i; + + if (!toy_parser_match(p, TOK_FN)) return 0; + + if (p->cur.kind != TOK_IDENT) { + toy_error(p, p->cur.loc, "expected function name"); + return -1; + } + name = toy_tok_sym(p, p->cur); toy_parser_advance(p); - if (!toy_parser_expect(p, TOK_SEMI)) return 0; - if (!toy_parser_expect(p, TOK_RBRACE)) return 0; - return toy_parser_at_end(p); + if (!toy_parser_expect(p, TOK_LPAREN)) { + toy_error(p, p->cur.loc, "expected '(' after function name"); + return -1; + } + + if (p->cur.kind != TOK_RPAREN) { + for (;;) { + if (p->cur.kind != TOK_IDENT) { + toy_error(p, p->cur.loc, "expected parameter name"); + return -1; + } + if (nparams >= TOY_MAX_PARAMS) { + toy_error(p, p->cur.loc, "too many parameters"); + return -1; + } + param_names[nparams] = toy_tok_sym(p, p->cur); + toy_parser_advance(p); + if (!toy_parser_expect(p, TOK_COLON)) { + toy_error(p, p->cur.loc, "expected ':' after parameter name"); + return -1; + } + param_types[nparams] = toy_parse_type(p); + if (param_types[nparams] == CFREE_CG_TYPE_NONE) return -1; + nparams++; + if (p->cur.kind == TOK_COMMA) { + toy_parser_advance(p); + } else { + break; + } + } + } + if (!toy_parser_expect(p, TOK_RPAREN)) { + toy_error(p, p->cur.loc, "expected ')' after parameters"); + return -1; + } + + if (toy_parser_match(p, TOK_COLON)) { + ret_type = toy_parse_type(p); + if (ret_type == CFREE_CG_TYPE_NONE) return -1; + } else { + ret_type = p->types.void_; + } + + /* Build function type */ + fn_ty = cfree_cg_type_func(p->c, ret_type, param_types, (uint32_t)nparams, 0); + if (fn_ty == CFREE_CG_TYPE_NONE) { + toy_error(p, p->cur.loc, "failed to create function type"); + return -1; + } + + /* Register function for recursion and later calls */ + if (p->nfns >= TOY_MAX_FNS) { + toy_error(p, p->cur.loc, "too many functions"); + return -1; + } + fn_entry = &p->fns[p->nfns]; + fn_entry->name = name; + fn_entry->type = fn_ty; + fn_entry->ret = ret_type; + fn_entry->nparams = nparams; + for (i = 0; i < nparams; i++) fn_entry->params[i] = param_types[i]; + p->nfns++; + + /* Emit function */ + memset(&attrs, 0, sizeof(attrs)); + attrs.bind = CFREE_SB_GLOBAL; + attrs.visibility = CFREE_CG_VIS_DEFAULT; + attrs.tls_model = CFREE_CG_TLS_DEFAULT; + attrs.flags = CFREE_CG_DECL_DEFINED; + cfree_cg_func_begin(p->cg, name, fn_ty, attrs); + + /* Setup parameter slots */ + p->nvars = 0; + p->cur_fn_ret = ret_type; + for (i = 0; i < nparams; i++) { + CfreeCgSlot slot = + cfree_cg_param_slot(p->cg, (uint32_t)i, param_types[i], param_names[i]); + if (p->nvars >= TOY_MAX_VARS) { + toy_error(p, p->cur.loc, "too many vars"); + return -1; + } + p->vars[p->nvars].name = param_names[i]; + p->vars[p->nvars].type = param_types[i]; + p->vars[p->nvars].slot = slot; + p->vars[p->nvars].is_param = 1; + p->nvars++; + } + + if (!toy_parse_block(p)) return -1; + + /* Implicit return for void functions */ + if (ret_type == p->types.void_) { + cfree_cg_ret_void(p->cg); + } + + cfree_cg_func_end(p->cg); + p->nvars = 0; + p->cur_fn_ret = p->types.void_; + return 1; +} + +/* ============================================================ + * Program parsing + * ============================================================ */ + +static int toy_parse_program(ToyParser* p) { + while (p->cur.kind != TOK_EOF) { + int r = toy_parse_fn(p); + if (r < 0) return 0; + if (r == 0) { + toy_error(p, p->cur.loc, "expected function declaration"); + return 0; + } + } + return 1; } +/* ============================================================ + * Public entry point + * ============================================================ */ + int cfree_toy_compile(CfreeCompiler* c, const CfreeCompileOptions* opts, const CfreeBytesInput* input, CfreeObjBuilder* out) { ToyParser p; - const uint8_t* source; CfreeCg* cg; - CfreeCgBuiltinTypes types; - CfreeCgTypeId fn_ty; - CfreeSym main_sym; - CfreeCgDeclAttrs main_attrs; + const uint8_t* source; (void)opts; if (!c || !input || !out) return 1; source = input->data ? input->data : (const uint8_t*)""; - toy_parser_init(&p, source, input->len); - if (!toy_parse_main(&p)) return 1; - - main_attrs.bind = CFREE_SB_GLOBAL; - main_attrs.visibility = CFREE_CG_VIS_DEFAULT; - main_attrs.tls_model = CFREE_CG_TLS_DEFAULT; - main_attrs.section = 0; - main_attrs.align = 0; - main_attrs.flags = CFREE_CG_DECL_DEFINED; - main_sym = cfree_sym_intern(c, "main"); cg = cfree_cg_new(c, out); - types = cfree_cg_builtin_types(c); - fn_ty = cfree_cg_type_func(c, types.i32, NULL, 0, 0); - cfree_cg_func_begin(cg, main_sym, fn_ty, main_attrs); - cfree_cg_push_int(cg, p.value, types.i32); - cfree_cg_ret(cg); - cfree_cg_func_end(cg); + if (!cg) return 1; + + toy_parser_init(&p, c, cg, source, input->len); + if (!toy_parse_program(&p)) { + cfree_cg_free(cg); + return 1; + } + if (p.cur.kind != TOK_EOF) { + toy_error(&p, p.cur.loc, "unexpected token after program end"); + cfree_cg_free(cg); + return 1; + } + cfree_cg_free(cg); return 0; }