kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c1a6bf61d4e24de878d42dd717f69cc7589de591
parent 4a221b1ee6db53d17a49c0af494e10f234502f53
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 15 May 2026 14:12:26 -0700

Implement constfold delayed arithmetic and local shadows

Diffstat:
Mdoc/CONSTFOLD.md | 56+++++++++++++++++++++++++++++++++++++++-----------------
Msrc/api/cg.c | 507+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/api/cg_type_test.c | 590++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 1122 insertions(+), 31 deletions(-)

diff --git a/doc/CONSTFOLD.md b/doc/CONSTFOLD.md @@ -21,14 +21,16 @@ Implemented: - immediate-only integer and pointer-like compare folding; - scalar immediate return preservation through `cfree_cg_ret`; - CG API shape coverage for O1 literal folds; -- delayed-compare materialization now reuses only owned operand registers. +- delayed-compare materialization now reuses only owned operand registers; +- delayed `SV_ARITH` for unary and binary arithmetic; +- expression-local arithmetic chain folding; +- straight-line local constant shadowing with conservative boundary + invalidation. Remaining: -- delayed `SV_ARITH` for unary and binary arithmetic; -- expression-local arithmetic chain folding; -- straight-line local constant shadowing; -- disassembly and metrics updates after the remaining phases land. +- disassembly and metrics updates after the remaining phases land; +- cleanup/refactor work listed in "Future Refactors". ## Current Shape @@ -196,7 +198,6 @@ typedef struct ApiSValue { UnOp arith_un_op; u8 kind; u8 arith_kind; - u8 arith_has_b; u8 res; u8 pinned; u8 lvalue; @@ -231,8 +232,6 @@ Creation rules: Immediate-only cases fold to `OPK_IMM` instead. - For unary operations, delay only non-immediate sources. Immediate-only cases fold to `OPK_IMM` instead. -- Store unary operations with `arith_has_b == 0`; store binary operations with - `arith_has_b == 1`. - Own registers only when the source `ApiSValue` owns that register, following the existing compare ownership pattern. - Never create delayed arithmetic for lvalues, aggregates, FP values, calls, or @@ -287,7 +286,6 @@ typedef struct ApiSourceLocal { ... i64 const_value; u8 const_valid; - u8 const_type_class; u8 pad[2]; } ApiSourceLocal; ``` @@ -300,6 +298,8 @@ Rules: - Parameters start unknown. - On store of an immediate to an eligible source local, set the shadow value. - On store of a non-immediate to that local, clear the shadow value. +- On partial-width or mismatched-type access to that local, clear/do not use the + shadow value. - On load of an eligible source local with a valid shadow, push `OPK_IMM` instead of loading/materializing the local. - On taking a local address, clear the local's shadow. @@ -405,19 +405,19 @@ Final observed parse result: ### Phase 3: Delayed Arithmetic -Add `SV_ARITH`, operation-kind metadata, optional second-operand handling, -ownership helpers, materialization, and release logic. +Done. Added `SV_ARITH`, operation-kind metadata, ownership helpers, +materialization, release logic, and register-pressure materialization. -Add tests for: +Tests cover: - one delayed unary arithmetic value consumed by return; - one delayed binary arithmetic value consumed by return; - delayed arithmetic consumed by compare; - delayed arithmetic consumed by another binop with immediate folding; - delayed unary arithmetic consumed by another unop with chain folding; -- delayed arithmetic forced by store/call path; -- register pressure path materializing delayed arithmetic when no ordinary - victim is available. +- delayed arithmetic forced by store path; +- register pressure path is still best covered indirectly by `test-opt`; add a + direct CG API pressure case if a future regression appears. Run: @@ -428,7 +428,7 @@ make test-opt ### Phase 4: Local Constant Shadow -Add local shadow state to `ApiSourceLocal` and invalidation helpers: +Done. Added local shadow state to `ApiSourceLocal` and invalidation helpers: ```c static void api_local_const_clear(ApiSourceLocal*); @@ -454,7 +454,7 @@ Wire invalidation through: - indirect stores; - local address-taking. -Add tests for: +Tests cover: - `int x = 40; x = x + 2; return x;`; - shadow cleared by label/jump; @@ -462,6 +462,7 @@ Add tests for: - shadow cleared by address-taking; - shadow cleared by volatile access; - shadow cleared by indirect store; +- partial-width stores do not preserve a full-local shadow; - parameter locals are initially unknown. Run: @@ -482,6 +483,27 @@ make test-aa64-inline and an RV64 targeted case if available locally. +### Future Refactors + +These are not required for the phase 3/4 patch, but should be considered before +growing the vstack simplifier further: + +- Move `SV_CMP` and `SV_ARITH` payload fields into a small union once the shape + stabilizes. The current flat struct keeps the first implementation simple but + increases every vstack entry. +- Centralize local-shadow invalidation behind named boundary helpers such as + `api_memory_boundary`, `api_control_boundary`, and + `api_local_address_taken`. The first implementation clears at call sites so + correctness is visible, but the repeated calls are easy to miss when adding + new CG API operations. +- Add an explicit CG API register-pressure test that forces delayed arithmetic + materialization when no ordinary spill victim exists. +- Consider a shadow generation counter if whole-function local counts grow + enough for repeated `clear_all` scans to show up in O1 metrics. +- Consider one small builder helper in `test/api/cg_type_test.c` for creating + O1 one-parameter functions; the expanded constfold tests intentionally stayed + direct but now duplicate setup boilerplate. + ### Phase 5: Disassembly And Metrics Re-run the probe cases from `doc/OPT1.md`: diff --git a/src/api/cg.c b/src/api/cg.c @@ -996,21 +996,33 @@ typedef enum SResidency { typedef enum ApiSValueKind { SV_OPERAND, SV_CMP, + SV_ARITH, } ApiSValueKind; +typedef enum ApiDelayedArithKind { + API_DELAYED_UNOP, + API_DELAYED_BINOP, +} ApiDelayedArithKind; + typedef struct ApiSValue { Operand op; Operand cmp_a; Operand cmp_b; + Operand arith_a; + Operand arith_b; CfreeCgTypeId type; CmpOp cmp_op; + BinOp arith_bin_op; + UnOp arith_un_op; u8 kind; + u8 arith_kind; u8 res; u8 pinned; u8 lvalue; u8 cmp_a_owned; u8 cmp_b_owned; - u8 pad[3]; + u8 arith_a_owned; + u8 arith_b_owned; FrameSlot spill_slot; CfreeCgLocal source_local; } ApiSValue; @@ -1042,9 +1054,11 @@ typedef struct ApiSourceLocal { SrcLoc loc; CGLocalDesc desc; CGLocalStorage storage; + i64 const_value; u32 param_index; u8 kind; - u8 pad[3]; + u8 const_valid; + u8 pad[2]; } ApiSourceLocal; struct CfreeCg { @@ -1289,6 +1303,49 @@ static ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, return sv; } +static ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, + int a_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.arith_kind = API_DELAYED_UNOP; + sv.type = ty; + sv.arith_un_op = op; + sv.arith_a = a; + sv.arith_a_owned = a_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +static ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, + CfreeCgTypeId ty, int a_owned, + int b_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.arith_kind = API_DELAYED_BINOP; + sv.type = ty; + sv.arith_bin_op = op; + sv.arith_a = a; + sv.arith_b = b; + sv.arith_a_owned = a_owned ? 1u : 0u; + sv.arith_b_owned = b_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.spill_slot = FRAME_SLOT_NONE; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +static ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, + int owned) { + ApiSValue sv = api_make_sv(op, ty); + if (op.kind == OPK_REG && !owned) + sv.res = RES_FIXED_REG; + return sv; +} + static CfreeCgTypeId api_sv_type(const ApiSValue *sv) { return sv->type ? sv->type : sv->op.type; } @@ -1346,7 +1403,7 @@ static ApiSValue api_pop(CfreeCg *g) { /* ---- register class helpers ---- */ static u8 api_class_of_sv(const ApiSValue *sv) { - if (sv->kind == SV_CMP) + if (sv->kind == SV_CMP || sv->kind == SV_ARITH) return RC_INT; if (sv->op.kind == OPK_INDIRECT) return RC_INT; @@ -1356,6 +1413,8 @@ static u8 api_class_of_sv(const ApiSValue *sv) { } static Reg api_reg_of_sv(const ApiSValue *sv) { + if (sv->kind == SV_ARITH || sv->kind == SV_CMP) + return (Reg)REG_NONE; if (sv->op.kind == OPK_REG) return sv->op.v.reg; if (sv->op.kind == OPK_INDIRECT) @@ -1588,6 +1647,22 @@ static void api_release_cmp(CfreeCg *g, ApiSValue *sv) { sv->kind = SV_OPERAND; } +static void api_release_arith(CfreeCg *g, ApiSValue *sv) { + if (sv->arith_a_owned) + api_release_operand_reg(g, sv->arith_a); + if (sv->arith_b_owned && + (sv->arith_b.kind != OPK_REG || sv->arith_a.kind != OPK_REG || + sv->arith_b.v.reg != sv->arith_a.v.reg || + sv->arith_b.cls != sv->arith_a.cls || !sv->arith_a_owned)) { + api_release_operand_reg(g, sv->arith_b); + } + memset(&sv->arith_a, 0, sizeof sv->arith_a); + memset(&sv->arith_b, 0, sizeof sv->arith_b); + sv->arith_a_owned = 0; + sv->arith_b_owned = 0; + sv->kind = SV_OPERAND; +} + static void api_materialize_cmp_to(CfreeCg *g, ApiSValue *sv, Operand dst) { g->target->cmp(g->target, sv->cmp_op, dst, sv->cmp_a, sv->cmp_b); if (sv->cmp_a_owned && sv->cmp_a.kind == OPK_REG && @@ -1609,6 +1684,47 @@ static void api_materialize_cmp_to(CfreeCg *g, ApiSValue *sv, Operand dst) { sv->lvalue = 0; } +static void api_materialize_arith_to(CfreeCg *g, ApiSValue *sv, Operand dst) { + if (sv->arith_kind == API_DELAYED_UNOP) { + g->target->unop(g->target, sv->arith_un_op, dst, sv->arith_a); + } else { + g->target->binop(g->target, sv->arith_bin_op, dst, sv->arith_a, + sv->arith_b); + } + if (sv->arith_a_owned && sv->arith_a.kind == OPK_REG && + (sv->arith_a.v.reg != dst.v.reg || sv->arith_a.cls != dst.cls)) { + api_release_operand_reg(g, sv->arith_a); + } + if (sv->arith_b_owned && sv->arith_b.kind == OPK_REG && + (sv->arith_b.v.reg != dst.v.reg || sv->arith_b.cls != dst.cls)) { + api_release_operand_reg(g, sv->arith_b); + } + memset(&sv->arith_a, 0, sizeof sv->arith_a); + memset(&sv->arith_b, 0, sizeof sv->arith_b); + sv->arith_a_owned = 0; + sv->arith_b_owned = 0; + sv->kind = SV_OPERAND; + sv->op = dst; + sv->type = dst.type; + sv->res = RES_REG; + sv->lvalue = 0; +} + +static int api_arith_rhs_reusable(const ApiSValue *sv) { + if (sv->arith_kind == API_DELAYED_UNOP) + return 0; + switch (sv->arith_bin_op) { + case BO_IADD: + case BO_IMUL: + case BO_AND: + case BO_OR: + case BO_XOR: + return 1; + default: + return 0; + } +} + static int api_materialize_cmp_victim(CfreeCg *g, u8 cls) { if (cls != RC_INT) return 0; @@ -1632,6 +1748,29 @@ static int api_materialize_cmp_victim(CfreeCg *g, u8 cls) { return 0; } +static int api_materialize_arith_victim(CfreeCg *g, u8 cls) { + if (cls != RC_INT) + return 0; + for (u32 i = 0; i < g->sp; ++i) { + ApiSValue *sv = &g->stack[i]; + Operand dst; + if (sv->kind != SV_ARITH || sv->pinned) + continue; + if (sv->arith_a_owned && sv->arith_a.kind == OPK_REG && + sv->arith_a.cls == RC_INT) { + dst = api_op_reg(sv->arith_a.v.reg, api_sv_type(sv)); + } else if (api_arith_rhs_reusable(sv) && sv->arith_b_owned && + sv->arith_b.kind == OPK_REG && sv->arith_b.cls == RC_INT) { + dst = api_op_reg(sv->arith_b.v.reg, api_sv_type(sv)); + } else { + continue; + } + api_materialize_arith_to(g, sv, dst); + return 1; + } + return 0; +} + static Reg api_alloc_reg_or_spill(CfreeCg *g, u8 cls, CfreeCgTypeId ty) { CGTarget *T = g->target; Reg r; @@ -1647,6 +1786,12 @@ static Reg api_alloc_reg_or_spill(CfreeCg *g, u8 cls, CfreeCgTypeId ty) { return r; victim = api_pick_victim(g, cls); } + if (!victim && api_materialize_arith_victim(g, cls)) { + r = api_alloc_reg(g, cls); + if (r != (Reg)REG_NONE) + return r; + victim = api_pick_victim(g, cls); + } if (victim) { FrameSlot slot = api_take_spill_slot(g, cls); CfreeCgTypeId rty = api_owned_reg_type(g, victim); @@ -1690,6 +1835,24 @@ static void api_ensure_reg(CfreeCg *g, ApiSValue *sv) { api_materialize_cmp_to(g, sv, dst); return; } + if (sv->kind == SV_ARITH) { + CfreeCgTypeId ty = api_sv_type(sv); + Operand dst; + if (sv->arith_a_owned && sv->arith_a.kind == OPK_REG && + sv->arith_a.cls == RC_INT) { + dst = api_op_reg(sv->arith_a.v.reg, ty); + } else if (api_arith_rhs_reusable(sv) && sv->arith_b_owned && + sv->arith_b.kind == OPK_REG && sv->arith_b.cls == RC_INT) { + dst = api_op_reg(sv->arith_b.v.reg, ty); + } else { + Reg r = + api_alloc_reg_or_spill(g, RC_INT, + ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + dst = api_op_reg(r, ty); + } + api_materialize_arith_to(g, sv, dst); + return; + } if (sv->res != RES_SPILLED) return; CGTarget *T = g->target; @@ -1743,6 +1906,8 @@ static Operand api_force_reg_unless_imm(CfreeCg *g, ApiSValue *v, static void api_release(CfreeCg *g, ApiSValue *sv) { if (sv->kind == SV_CMP) { api_release_cmp(g, sv); + } else if (sv->kind == SV_ARITH) { + api_release_arith(g, sv); } else if (sv->res == RES_REG) { api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); } else if (sv->res == RES_SPILLED) { @@ -2159,6 +2324,227 @@ static int api_try_fold_int_cmp(CfreeCg *g, CmpOp op, CfreeCgTypeId ty, i64 a, return 1; } +static int api_source_flags_addr_taken(u32 flags); +static ApiSourceLocal *api_local_from_handle(CfreeCg *g, CfreeCgLocal local); + +static void api_local_const_clear(ApiSourceLocal *rec) { + if (!rec) + return; + rec->const_valid = 0; + rec->const_value = 0; +} + +static void api_local_const_clear_all(CfreeCg *g) { + if (!g) + return; + for (u32 i = 0; i < g->nlocals; ++i) + api_local_const_clear(&g->locals[i]); +} + +static int api_local_const_can_track(CfreeCg *g, const ApiSourceLocal *rec, + CfreeCgMemAccess access) { + u32 width; + CfreeCgTypeId ty; + u64 access_size; + u64 local_size; + if (!g || !rec) + return 0; + if (rec->kind != API_SOURCE_LOCAL_AUTO) + return 0; + if (api_source_flags_addr_taken(rec->attrs.flags)) + return 0; + if (access.flags & CFREE_CG_MEM_VOLATILE) + return 0; + ty = resolve_type(g->c, access.type); + if (!ty) + ty = rec->type; + if (ty != rec->type) + return 0; + access_size = abi_cg_sizeof(g->c->abi, ty); + local_size = abi_cg_sizeof(g->c->abi, rec->type); + if (access_size != local_size) + return 0; + return api_foldable_int_like_type(g->c, ty, &width); +} + +static void api_local_const_store(CfreeCg *g, CfreeCgLocal local, + CfreeCgMemAccess access, i64 value) { + ApiSourceLocal *rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!api_local_const_can_track(g, rec, access)) { + api_local_const_clear(rec); + return; + } + ty = resolve_type(g->c, access.type); + if (!ty) + ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) { + api_local_const_clear(rec); + return; + } + rec->const_value = api_fold_result(g->c, ty, (u64)value, width); + rec->const_valid = 1; +} + +static int api_local_const_load(CfreeCg *g, CfreeCgLocal local, + CfreeCgMemAccess access, Operand *out) { + ApiSourceLocal *rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!out || !api_local_const_can_track(g, rec, access)) + return 0; + if (!rec->const_valid) + return 0; + ty = resolve_type(g->c, access.type); + if (!ty) + ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) + return 0; + *out = + api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); + return 1; +} + +static int api_can_delay_int_arith(CfreeCg *g, CfreeCgTypeId ty, u32 flags) { + u32 width; + return g && !flags && api_foldable_int_type(g->c, ty, &width); +} + +static int api_op_is_int_identity(CfreeCg *g, BinOp op, CfreeCgTypeId ty, + i64 imm) { + u32 width; + u64 v; + if (!api_foldable_int_type(g->c, ty, &width)) + return 0; + v = api_mask_width((u64)imm, width); + switch (op) { + case BO_IADD: + case BO_ISUB: + case BO_OR: + case BO_XOR: + return v == 0; + case BO_AND: + return v == api_width_mask(width); + default: + return 0; + } +} + +static int api_try_collapse_binop_identity(CfreeCg *g, BinOp op, + CfreeCgTypeId ty, ApiSValue *a, + ApiSValue *b, ApiSValue *out) { + if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && + a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { + *out = api_make_sv_with_reg_ownership(a->op, ty, + api_sv_owns_operand_reg(a, &a->op)); + a->res = RES_INHERENT; + return 1; + } + if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && + b->op.kind != OPK_IMM && + (op == BO_IADD || op == BO_OR || op == BO_XOR || op == BO_AND) && + api_op_is_int_identity(g, op, ty, a->op.v.imm)) { + *out = api_make_sv_with_reg_ownership(b->op, ty, + api_sv_owns_operand_reg(b, &b->op)); + b->res = RES_INHERENT; + return 1; + } + return 0; +} + +static int api_try_fold_arith_chain(CfreeCg *g, BinOp op, CfreeCgTypeId ty, + ApiSValue *a, ApiSValue *b, + ApiSValue *out) { + i64 folded; + BinOp result_op; + if (a->kind != SV_ARITH || a->arith_kind != API_DELAYED_BINOP || + a->arith_a.kind != OPK_REG || a->arith_b.kind != OPK_IMM || + b->kind != SV_OPERAND || b->op.kind != OPK_IMM) { + return 0; + } + result_op = a->arith_bin_op; + switch (a->arith_bin_op) { + case BO_IADD: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->arith_b.v.imm, b->op.v.imm, + &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->arith_b.v.imm, b->op.v.imm, + &folded)) + return 0; + result_op = BO_IADD; + } else { + return 0; + } + break; + case BO_ISUB: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, a->arith_b.v.imm, + &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->arith_b.v.imm, b->op.v.imm, + &folded)) + return 0; + result_op = BO_ISUB; + } else { + return 0; + } + break; + case BO_XOR: + if (op != BO_XOR || !api_try_fold_int_binop(g, BO_XOR, ty, a->arith_b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_XOR; + break; + case BO_AND: + if (op != BO_AND || !api_try_fold_int_binop(g, BO_AND, ty, a->arith_b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_AND; + break; + case BO_OR: + if (op != BO_OR || !api_try_fold_int_binop(g, BO_OR, ty, a->arith_b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_OR; + break; + default: + return 0; + } + if (api_op_is_int_identity(g, result_op, ty, folded)) { + *out = api_make_sv_with_reg_ownership(a->arith_a, ty, a->arith_a_owned); + a->arith_a_owned = 0; + memset(&a->arith_a, 0, sizeof a->arith_a); + return 1; + } + a->arith_bin_op = result_op; + a->arith_b.v.imm = folded; + *out = *a; + a->arith_a_owned = 0; + a->arith_b_owned = 0; + memset(&a->arith_a, 0, sizeof a->arith_a); + memset(&a->arith_b, 0, sizeof a->arith_b); + return 1; +} + +static int api_try_fold_unary_chain(ApiSValue *a, UnOp op, CfreeCgTypeId ty, + ApiSValue *out) { + if (op != UO_BNOT || a->kind != SV_ARITH || + a->arith_kind != API_DELAYED_UNOP || a->arith_un_op != UO_BNOT || + a->arith_a.kind != OPK_REG) { + return 0; + } + *out = api_make_sv_with_reg_ownership(a->arith_a, ty, a->arith_a_owned); + a->arith_a_owned = 0; + memset(&a->arith_a, 0, sizeof a->arith_a); + return 1; +} + /* ---- C-symbol mangling ---- */ static SymBind api_map_bind(CfreeSymBind b) { @@ -2630,6 +3016,7 @@ CfreeCgLocal cfree_cg_local(CfreeCg *g, CfreeCgTypeId type, storage.v.reg); } rec = &g->locals[g->nlocals++]; + memset(rec, 0, sizeof *rec); rec->type = ty; rec->name = attrs.name; rec->attrs = attrs; @@ -2683,6 +3070,7 @@ CfreeCgLocal cfree_cg_param(CfreeCg *g, uint32_t index, CfreeCgTypeId type, g->target->param(g->target, &pd); rec = &g->locals[g->nlocals++]; + memset(rec, 0, sizeof *rec); rec->type = ty; rec->name = attrs.name; rec->attrs = attrs; @@ -2898,6 +3286,8 @@ void cfree_cg_addr_offset(CfreeCg *g, int64_t byte_offset, if (!rty) return; v = api_pop(g); + if (v.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_clear(api_local_from_handle(g, v.source_local)); api_ensure_reg(g, &v); if (v.op.kind == OPK_GLOBAL) { result = api_op_global(v.op.v.global.sym, @@ -2942,11 +3332,19 @@ void cfree_cg_load(CfreeCg *g, CfreeCgMemAccess access) { Operand dst; if (!g) return; + if (access.flags & CFREE_CG_MEM_VOLATILE) + api_local_const_clear_all(g); v = api_pop(g); if (!api_is_lvalue_sv(&v)) { api_push(g, v); return; } + if (v.source_local != CFREE_CG_LOCAL_NONE && + api_local_const_load(g, v.source_local, access, &dst)) { + api_release(g, &v); + api_push(g, api_make_sv(dst, dst.type)); + return; + } api_ensure_reg(g, &v); ty = resolve_type(g->c, access.type); if (!ty) @@ -2994,8 +3392,11 @@ void cfree_cg_addr(CfreeCg *g) { ApiSourceLocal *rec; if (!g) return; + api_local_const_clear_all(g); T = g->target; v = api_pop(g); + if (v.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_clear(api_local_from_handle(g, v.source_local)); api_ensure_reg(g, &v); if (!api_is_lvalue_sv(&v)) { compiler_panic(g->c, g->cur_loc, "CfreeCg: addr operand is not an lvalue"); @@ -3022,6 +3423,8 @@ void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) { Operand src; if (!g) return; + if (access.flags & CFREE_CG_MEM_VOLATILE) + api_local_const_clear_all(g); T = g->target; rv = api_pop(g); lv = api_pop(g); @@ -3040,6 +3443,16 @@ void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) { } else { src = api_force_reg(g, &rv, api_sv_type(&rv)); } + if (lv.source_local != CFREE_CG_LOCAL_NONE) { + if (src.kind == OPK_IMM) { + api_local_const_store(g, lv.source_local, access, src.v.imm); + } else { + api_local_const_clear(api_local_from_handle(g, lv.source_local)); + } + } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_clear_all(g); + } if (lv.source_local != CFREE_CG_LOCAL_NONE && lv.op.kind == OPK_REG) { Operand dst = lv.op; dst.type = ty; @@ -3133,6 +3546,7 @@ static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) { Operand ra, rb; Reg rr; Operand dst; + ApiSValue folded_sv; i64 folded; if (!g) return; @@ -3149,8 +3563,41 @@ static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) { return; } + if (api_can_delay_int_arith(g, ty, flags) && + api_try_fold_arith_chain(g, iop, ty, &a, &b, &folded_sv)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, folded_sv); + return; + } + ra = api_force_reg_unless_imm(g, &a, ty); rb = api_force_reg_unless_imm(g, &b, ty); + + if (api_can_delay_int_arith(g, ty, flags) && + api_try_collapse_binop_identity(g, iop, ty, &a, &b, &folded_sv)) { + api_release(g, &a); + api_release(g, &b); + api_push(g, folded_sv); + return; + } + + if (api_can_delay_int_arith(g, ty, flags) && + (ra.kind == OPK_REG || rb.kind == OPK_REG) && + (ra.kind == OPK_REG || ra.kind == OPK_IMM) && + (rb.kind == OPK_REG || rb.kind == OPK_IMM)) { + int a_owned = api_sv_owns_operand_reg(&a, &ra); + int b_owned = api_sv_owns_operand_reg(&b, &rb); + api_push(g, api_make_arith_binop(iop, ra, rb, ty, a_owned, b_owned)); + if (a_owned) + a.res = RES_INHERENT; + if (b_owned) + b.res = RES_INHERENT; + api_release(g, &a); + api_release(g, &b); + return; + } + rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); dst = api_op_reg(rr, ty); T->binop(T, iop, dst, ra, rb); @@ -3166,6 +3613,7 @@ static void api_cg_unop(CfreeCg *g, UnOp iop, u32 flags) { Operand ra; Reg rr; Operand dst; + ApiSValue folded_sv; i64 folded; if (!g) return; @@ -3180,7 +3628,22 @@ static void api_cg_unop(CfreeCg *g, UnOp iop, u32 flags) { return; } + if (api_can_delay_int_arith(g, ty, flags) && + api_try_fold_unary_chain(&a, iop, ty, &folded_sv)) { + api_release(g, &a); + api_push(g, folded_sv); + return; + } + ra = api_force_reg_unless_imm(g, &a, ty); + if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_REG) { + int a_owned = api_sv_owns_operand_reg(&a, &ra); + api_push(g, api_make_arith_unop(iop, ra, ty, a_owned)); + if (a_owned) + a.res = RES_INHERENT; + api_release(g, &a); + return; + } rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); dst = api_op_reg(rr, ty); T->unop(T, iop, dst, ra); @@ -3557,6 +4020,7 @@ void cfree_cg_atomic_load(CfreeCg *g, CfreeCgMemAccess access, Reg rr; if (!g) return; + api_local_const_clear_all(g); ptr = api_pop(g); pty = api_sv_type(&ptr); val_ty = resolve_type(g->c, access.type); @@ -3578,6 +4042,7 @@ void cfree_cg_atomic_store(CfreeCg *g, CfreeCgMemAccess access, Operand addr, src; if (!g) return; + api_local_const_clear_all(g); val = api_pop(g); ptr = api_pop(g); pty = api_sv_type(&ptr); @@ -3602,6 +4067,7 @@ void cfree_cg_atomic_rmw(CfreeCg *g, CfreeCgMemAccess access, Reg rr; if (!g) return; + api_local_const_clear_all(g); val = api_pop(g); ptr = api_pop(g); pty = api_sv_type(&ptr); @@ -3609,9 +4075,7 @@ void cfree_cg_atomic_rmw(CfreeCg *g, CfreeCgMemAccess access, if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_rmw"); addr = api_force_reg(g, &ptr, pty); - vop = api_sv_op_is_reg_or_imm(&val) - ? val.op - : api_force_reg(g, &val, val_ty); + vop = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); dst = api_op_reg(rr, val_ty); g->target->atomic_rmw(g->target, api_map_atomic_op(op), dst, addr, vop, @@ -3631,6 +4095,7 @@ void cfree_cg_atomic_cmpxchg(CfreeCg *g, CfreeCgMemAccess access, Reg pr, kr; if (!g) return; + api_local_const_clear_all(g); (void)weak; desired = api_pop(g); expected = api_pop(g); @@ -3664,6 +4129,7 @@ void cfree_cg_atomic_cmpxchg(CfreeCg *g, CfreeCgMemAccess access, void cfree_cg_atomic_fence(CfreeCg *g, CfreeCgMemOrder order) { if (!g) return; + api_local_const_clear_all(g); g->target->fence(g->target, api_map_mem_order(order)); } @@ -3748,6 +4214,7 @@ void cfree_cg_inline_asm(CfreeCg *g, CfreeCgInlineAsm asm_block) { (void)asm_block.clobber_abi_sets; if (!g) return; + api_local_const_clear_all(g); T = g->target; h = g->c->env->heap; fallback_ty = builtin_id(CFREE_CG_BUILTIN_I64); @@ -3826,8 +4293,7 @@ void cfree_cg_inline_asm(CfreeCg *g, CfreeCgInlineAsm asm_block) { if (nclobbers) { clobs = (Sym *)h->alloc(h, sizeof(*clobs) * nclobbers, _Alignof(Sym)); - for (u32 i = 0; i < nclobbers; ++i) - clobs[i] = (Sym)clobbers[i]; + for (u32 i = 0; i < nclobbers; ++i) clobs[i] = (Sym)clobbers[i]; } for (u32 i = 0; i < noutputs; ++i) { @@ -3979,8 +4445,7 @@ void cfree_cg_inline_asm(CfreeCg *g, CfreeCgInlineAsm asm_block) { T->asm_block(T, tmpl_str, outs, noutputs, out_ops, ins, total_inputs, in_ops, clobs, nclobbers); - for (u32 i = 0; i < total_inputs; ++i) - api_release(g, &in_svs[i]); + for (u32 i = 0; i < total_inputs; ++i) api_release(g, &in_svs[i]); for (u32 i = 0; i < noutputs; ++i) { CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; ApiSValue sv = api_make_sv(out_ops[i], oty); @@ -4018,12 +4483,14 @@ CfreeCgLabel cfree_cg_label_new(CfreeCg *g) { void cfree_cg_label_place(CfreeCg *g, CfreeCgLabel label) { if (!g) return; + api_local_const_clear_all(g); g->target->label_place(g->target, (Label)label); } void cfree_cg_jump(CfreeCg *g, CfreeCgLabel label) { if (!g) return; + api_local_const_clear_all(g); g->target->jump(g->target, (Label)label); } @@ -4033,6 +4500,7 @@ static void api_branch_if(CfreeCg *g, ApiSValue *v, int branch_when_true, CfreeCgTypeId ty; if (!g) return; + api_local_const_clear_all(g); T = g->target; ty = v->type ? v->type : builtin_id(CFREE_CG_BUILTIN_I32); if (v->op.kind == OPK_IMM && v->kind == SV_OPERAND) { @@ -4079,6 +4547,7 @@ void cfree_cg_switch(CfreeCg *g, CfreeCgSwitch sw) { return; if (g->sp == 0) return; + api_local_const_clear_all(g); selector = api_pop(g); ty = resolve_type(g->c, sw.selector_type); if (!ty) @@ -4112,6 +4581,7 @@ void cfree_cg_computed_goto(CfreeCg *g, const CfreeCgLabel *valid_targets, (void)ntargets; if (!g) return; + api_local_const_clear_all(g); target = api_pop(g); api_release(g, &target); compiler_panic(g->c, g->cur_loc, @@ -4121,6 +4591,7 @@ void cfree_cg_computed_goto(CfreeCg *g, const CfreeCgLabel *valid_targets, void cfree_cg_unreachable(CfreeCg *g) { if (!g) return; + api_local_const_clear_all(g); g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); } @@ -4205,6 +4676,7 @@ CfreeCgScope cfree_cg_scope_begin(CfreeCg *g, CfreeCgTypeId result_type) { return 0; break_lbl = g->target->label_new(g->target); cont_lbl = g->target->label_new(g->target); + api_local_const_clear_all(g); g->target->label_place(g->target, cont_lbl); if (g->nscopes >= API_CG_MAX_SCOPES) { @@ -4251,6 +4723,7 @@ void cfree_cg_scope_end(CfreeCg *g, CfreeCgScope scope) { ApiSValue result = api_pop(g); api_scope_store_result(g, s, &result); } + api_local_const_clear_all(g); g->target->label_place(g->target, s->break_lbl); g->target->scope_end(g->target, s->target_scope); api_scope_push_result(g, s); @@ -4266,6 +4739,7 @@ void cfree_cg_break(CfreeCg *g, CfreeCgScope scope) { ApiSValue result = api_pop(g); api_scope_store_result(g, s, &result); } + api_local_const_clear_all(g); g->target->jump(g->target, s->break_lbl); } @@ -4284,6 +4758,7 @@ void cfree_cg_break_true(CfreeCg *g, CfreeCgScope scope) { if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { if (cond.op.v.imm != 0) { api_scope_store_result(g, s, &result); + api_local_const_clear_all(g); g->target->jump(g->target, s->break_lbl); } else { api_release(g, &result); @@ -4293,7 +4768,9 @@ void cfree_cg_break_true(CfreeCg *g, CfreeCgScope scope) { Label skip = g->target->label_new(g->target); api_branch_if(g, &cond, 0, skip); api_scope_store_result(g, s, &result); + api_local_const_clear_all(g); g->target->jump(g->target, s->break_lbl); + api_local_const_clear_all(g); g->target->label_place(g->target, skip); } } else { @@ -4316,6 +4793,7 @@ void cfree_cg_break_false(CfreeCg *g, CfreeCgScope scope) { if (cond.kind == SV_OPERAND && cond.op.kind == OPK_IMM) { if (cond.op.v.imm == 0) { api_scope_store_result(g, s, &result); + api_local_const_clear_all(g); g->target->jump(g->target, s->break_lbl); } else { api_release(g, &result); @@ -4325,7 +4803,9 @@ void cfree_cg_break_false(CfreeCg *g, CfreeCgScope scope) { Label skip = g->target->label_new(g->target); api_branch_if(g, &cond, 1, skip); api_scope_store_result(g, s, &result); + api_local_const_clear_all(g); g->target->jump(g->target, s->break_lbl); + api_local_const_clear_all(g); g->target->label_place(g->target, skip); } } else { @@ -4337,6 +4817,7 @@ void cfree_cg_continue(CfreeCg *g, CfreeCgScope scope) { ApiCgScope *s = api_scope_from_handle(g, scope, 0, "CfreeCg: continue"); if (!s) return; + api_local_const_clear_all(g); g->target->jump(g->target, s->continue_lbl); } @@ -4468,6 +4949,7 @@ void cfree_cg_memcpy(CfreeCg *g, uint64_t size, CfreeCgMemAccess dst_access, Operand dst_op, src_op; if (!g) return; + api_local_const_clear_all(g); (void)src_access; if (size > UINT32_MAX) { compiler_panic(g->c, g->cur_loc, "CfreeCg: memcpy size exceeds CGTarget"); @@ -4492,6 +4974,7 @@ void cfree_cg_memmove(CfreeCg *g, uint64_t size, CfreeCgMemAccess dst_access, Operand args[3]; if (!g) return; + api_local_const_clear_all(g); (void)dst_access; (void)src_access; if (size > INT64_MAX) { @@ -4516,6 +4999,7 @@ void cfree_cg_memset(CfreeCg *g, uint8_t val, uint64_t size, Operand dst_op, byte_val; if (!g) return; + api_local_const_clear_all(g); if (size > UINT32_MAX) { compiler_panic(g->c, g->cur_loc, "CfreeCg: memset size exceeds CGTarget"); return; @@ -4684,6 +5168,7 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, int tail; if (!g) return; + api_local_const_clear_all(g); tail = attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; T = g->target; @@ -4804,6 +5289,7 @@ static void api_cg_tail_call(CfreeCg *g, uint32_t nargs, ApiSValue callee; if (!g) return; + api_local_const_clear_all(g); T = g->target; fty = resolve_type(g->c, fn_type); if (!fty) @@ -4862,6 +5348,7 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, Operand callee_op; if (!g) return; + api_local_const_clear_all(g); int tail = attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; T = g->target; diff --git a/test/api/cg_type_test.c b/test/api/cg_type_test.c @@ -353,12 +353,593 @@ static void exercise_cg_literal_folds(CfreeCompiler* c, CfreeCgTypeId i32_ty) { obj_free((ObjBuilder*)ob); } +static uint32_t cg_emit_delayed_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncParam param_desc; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal param; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "delayed chain obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "delayed chain cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&param_desc, 0, sizeof param_desc); + param_desc.type = i32_ty; + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.params = &param_desc; + sig.nparams = 1; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "delayed chain decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "p"); + param = cfree_cg_param(cg, 0, i32_ty, attrs); + EXPECT(param != CFREE_CG_LOCAL_NONE, "delayed chain param failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + cfree_cg_push_local(cg, param); + cfree_cg_load(cg, mem); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static uint32_t cg_emit_unary_chain(CfreeCompiler* c, CfreeCgTypeId i32_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncParam param_desc; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal param; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "unary chain obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "unary chain cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&param_desc, 0, sizeof param_desc); + param_desc.type = i32_ty; + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.params = &param_desc; + sig.nparams = 1; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "unary chain decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "p"); + param = cfree_cg_param(cg, 0, i32_ty, attrs); + EXPECT(param != CFREE_CG_LOCAL_NONE, "unary chain param failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + cfree_cg_push_local(cg, param); + cfree_cg_load(cg, mem); + cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0); + cfree_cg_int_unop(cg, CFREE_CG_INT_BNOT, 0); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static uint32_t cg_emit_local_shadow(CfreeCompiler* c, CfreeCgTypeId i32_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal local; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "local shadow obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "local shadow cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "local shadow decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "x"); + local = cfree_cg_local(cg, i32_ty, attrs); + EXPECT(local != CFREE_CG_LOCAL_NONE, "local shadow local failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + cfree_cg_push_local(cg, local); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_store(cg, mem); + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, mem); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static uint32_t cg_emit_delayed_cmp(CfreeCompiler* c, CfreeCgTypeId i32_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncParam param_desc; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal param; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "delayed cmp obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "delayed cmp cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&param_desc, 0, sizeof param_desc); + param_desc.type = i32_ty; + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.params = &param_desc; + sig.nparams = 1; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "delayed cmp decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "p"); + param = cfree_cg_param(cg, 0, i32_ty, attrs); + EXPECT(param != CFREE_CG_LOCAL_NONE, "delayed cmp param failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + cfree_cg_push_local(cg, param); + cfree_cg_load(cg, mem); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_push_int(cg, 42, i32_ty); + cfree_cg_int_cmp(cg, CFREE_CG_INT_EQ); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static uint32_t cg_emit_delayed_store(CfreeCompiler* c, CfreeCgTypeId i32_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncParam param_desc; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal param; + CfreeCgLocal local; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "delayed store obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "delayed store cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&param_desc, 0, sizeof param_desc); + param_desc.type = i32_ty; + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.params = &param_desc; + sig.nparams = 1; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "delayed store decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "p"); + param = cfree_cg_param(cg, 0, i32_ty, attrs); + attrs.name = cfree_sym_intern(c, "x"); + local = cfree_cg_local(cg, i32_ty, attrs); + EXPECT(param != CFREE_CG_LOCAL_NONE, "delayed store param failed"); + EXPECT(local != CFREE_CG_LOCAL_NONE, "delayed store local failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + cfree_cg_push_local(cg, local); + cfree_cg_push_local(cg, param); + cfree_cg_load(cg, mem); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_store(cg, mem); + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, mem); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +typedef enum CgShadowBoundary { + CG_SHADOW_LABEL, + CG_SHADOW_BRANCH, + CG_SHADOW_ADDR, + CG_SHADOW_VOLATILE, + CG_SHADOW_INDIRECT_STORE, +} CgShadowBoundary; + +static uint32_t cg_emit_local_shadow_boundary(CfreeCompiler* c, + CfreeCgTypeId i32_ty, + const char* name, + CgShadowBoundary boundary) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal local; + CfreeCgMemAccess mem; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "local shadow boundary obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "local shadow boundary cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "local shadow boundary decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "x"); + local = cfree_cg_local(cg, i32_ty, attrs); + EXPECT(local != CFREE_CG_LOCAL_NONE, "local shadow boundary local failed"); + memset(&mem, 0, sizeof mem); + mem.type = i32_ty; + mem.align = cfree_cg_type_align(c, i32_ty); + + cfree_cg_push_local(cg, local); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_store(cg, mem); + + switch (boundary) { + case CG_SHADOW_LABEL: { + CfreeCgLabel label = cfree_cg_label_new(cg); + cfree_cg_label_place(cg, label); + break; + } + case CG_SHADOW_BRANCH: { + CfreeCgLabel label = cfree_cg_label_new(cg); + cfree_cg_push_int(cg, 0, i32_ty); + cfree_cg_branch_true(cg, label); + cfree_cg_label_place(cg, label); + break; + } + case CG_SHADOW_ADDR: + cfree_cg_push_local_addr(cg, local); + cfree_cg_drop(cg); + break; + case CG_SHADOW_VOLATILE: + mem.flags = CFREE_CG_MEM_VOLATILE; + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, mem); + cfree_cg_drop(cg); + mem.flags = 0; + break; + case CG_SHADOW_INDIRECT_STORE: + cfree_cg_push_local_addr(cg, local); + cfree_cg_indirect(cg); + cfree_cg_push_int(cg, 41, i32_ty); + cfree_cg_store(cg, mem); + break; + } + + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, mem); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static uint32_t cg_emit_local_shadow_partial_store(CfreeCompiler* c, + CfreeCgTypeId i32_ty, + CfreeCgTypeId i8_ty, + const char* name) { + CfreeCompileOptions opts; + CfreeObjBuilder* ob; + CfreeCg* cg; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + CfreeCgSym sym; + CfreeCgLocalAttrs attrs; + CfreeCgLocal local; + CfreeCgMemAccess mem_i32; + CfreeCgMemAccess mem_i8; + uint32_t size; + + memset(&opts, 0, sizeof opts); + opts.opt_level = 1; + ob = (CfreeObjBuilder*)obj_new((Compiler*)c); + EXPECT(ob != NULL, "partial shadow obj builder allocation failed"); + if (!ob) return 0; + cg = cfree_cg_new(c, ob, &opts); + EXPECT(cg != NULL, "partial shadow cg allocation failed"); + if (!cg) { + obj_free((ObjBuilder*)ob); + return 0; + } + + memset(&sig, 0, sizeof sig); + sig.ret = i32_ty; + sig.call_conv = CFREE_CG_CC_TARGET_C; + + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = cfree_sym_intern(c, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func(c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + sym = cfree_cg_decl(cg, decl); + EXPECT(sym != CFREE_CG_SYM_NONE, "partial shadow decl failed"); + + cfree_cg_func_begin(cg, sym); + memset(&attrs, 0, sizeof attrs); + attrs.name = cfree_sym_intern(c, "x"); + local = cfree_cg_local(cg, i32_ty, attrs); + EXPECT(local != CFREE_CG_LOCAL_NONE, "partial shadow local failed"); + memset(&mem_i32, 0, sizeof mem_i32); + mem_i32.type = i32_ty; + mem_i32.align = cfree_cg_type_align(c, i32_ty); + memset(&mem_i8, 0, sizeof mem_i8); + mem_i8.type = i8_ty; + mem_i8.align = cfree_cg_type_align(c, i8_ty); + + cfree_cg_push_local(cg, local); + cfree_cg_push_int(cg, 40, i32_ty); + cfree_cg_store(cg, mem_i32); + cfree_cg_push_local(cg, local); + cfree_cg_push_int(cg, 7, i8_ty); + cfree_cg_store(cg, mem_i8); + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, mem_i32); + cfree_cg_push_int(cg, 2, i32_ty); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + + cfree_cg_free(cg); + size = text_size((ObjBuilder*)ob); + obj_free((ObjBuilder*)ob); + return size; +} + +static void exercise_cg_constfold_phases(CfreeCompiler* c, + CfreeCgTypeId i32_ty, + CfreeCgTypeId i8_ty) { + uint32_t delayed_size = + cg_emit_delayed_chain(c, i32_ty, "cg_delayed_chain_o1"); + uint32_t unary_size = cg_emit_unary_chain(c, i32_ty, "cg_unary_chain_o1"); + uint32_t local_size = cg_emit_local_shadow(c, i32_ty, "cg_local_shadow_o1"); + uint32_t delayed_cmp_size = + cg_emit_delayed_cmp(c, i32_ty, "cg_delayed_cmp_o1"); + uint32_t delayed_store_size = + cg_emit_delayed_store(c, i32_ty, "cg_delayed_store_o1"); + uint32_t label_size = cg_emit_local_shadow_boundary( + c, i32_ty, "cg_shadow_label_o1", CG_SHADOW_LABEL); + uint32_t branch_size = cg_emit_local_shadow_boundary( + c, i32_ty, "cg_shadow_branch_o1", CG_SHADOW_BRANCH); + uint32_t addr_size = cg_emit_local_shadow_boundary( + c, i32_ty, "cg_shadow_addr_o1", CG_SHADOW_ADDR); + uint32_t volatile_size = cg_emit_local_shadow_boundary( + c, i32_ty, "cg_shadow_volatile_o1", CG_SHADOW_VOLATILE); + uint32_t indirect_size = cg_emit_local_shadow_boundary( + c, i32_ty, "cg_shadow_indirect_o1", CG_SHADOW_INDIRECT_STORE); + uint32_t partial_size = cg_emit_local_shadow_partial_store( + c, i32_ty, i8_ty, "cg_shadow_partial_o1"); + + EXPECT(delayed_size <= 52, + "delayed arithmetic chain should materialize as one add, text " + "size=%u", + delayed_size); + EXPECT(unary_size <= 48, + "delayed unary chain should collapse before return, text size=%u", + unary_size); + EXPECT(local_size <= 32, + "local constant shadow should fold x=40; return x+2, text size=%u", + local_size); + EXPECT(delayed_cmp_size <= 60, + "delayed arithmetic consumed by compare should stay compact, text " + "size=%u", + delayed_cmp_size); + EXPECT(delayed_store_size <= 64, + "delayed arithmetic forced by store should stay compact, text size=%u", + delayed_store_size); + EXPECT(label_size > local_size, + "label should clear local shadow, label=%u folded=%u", label_size, + local_size); + EXPECT(branch_size > local_size, + "branch should clear local shadow, branch=%u folded=%u", branch_size, + local_size); + EXPECT(addr_size > local_size, + "address-taking should clear local shadow, addr=%u folded=%u", + addr_size, local_size); + EXPECT(volatile_size > local_size, + "volatile access should clear local shadow, volatile=%u folded=%u", + volatile_size, local_size); + EXPECT(indirect_size > local_size, + "indirect store should clear local shadow, indirect=%u folded=%u", + indirect_size, local_size); + EXPECT(partial_size > local_size, + "partial-width store should clear local shadow, partial=%u folded=%u", + partial_size, local_size); +} + int main(void) { CfreeTarget target; CfreeEnv env; CfreeCompiler* c; CfreeCgBuiltinTypes bi; CfreeCgTypeId void_ty; + CfreeCgTypeId i8_ty; CfreeCgTypeId i32_ty; CfreeCgTypeId i64_ty; CfreeCgTypeId f64_ty; @@ -402,11 +983,13 @@ int main(void) { bi = cfree_cg_builtin_types(c); void_ty = bi.id[CFREE_CG_BUILTIN_VOID]; + i8_ty = bi.id[CFREE_CG_BUILTIN_I8]; i32_ty = bi.id[CFREE_CG_BUILTIN_I32]; i64_ty = bi.id[CFREE_CG_BUILTIN_I64]; f64_ty = bi.id[CFREE_CG_BUILTIN_F64]; va_list_ty = bi.id[CFREE_CG_BUILTIN_VARARG_STATE]; EXPECT(void_ty != CFREE_CG_TYPE_NONE, "void builtin id is none"); + EXPECT(i8_ty != CFREE_CG_TYPE_NONE, "i8 builtin id is none"); EXPECT(i32_ty != CFREE_CG_TYPE_NONE, "i32 builtin id is none"); EXPECT(f64_ty != CFREE_CG_TYPE_NONE, "f64 builtin id is none"); EXPECT(va_list_ty != CFREE_CG_TYPE_NONE, "va_list builtin id is none"); @@ -428,8 +1011,7 @@ int main(void) { "ptr pointee mismatch"); EXPECT(cfree_cg_type_array_elem(c, array_i32) == i32_ty, "array elem mismatch"); - EXPECT(cfree_cg_type_array_count(c, array_i32) == 4, - "array count mismatch"); + EXPECT(cfree_cg_type_array_count(c, array_i32) == 4, "array count mismatch"); alias = cfree_cg_type_alias(c, cfree_sym_intern(c, "I"), i32_ty); EXPECT(alias != CFREE_CG_TYPE_NONE && alias != i32_ty, @@ -470,8 +1052,7 @@ int main(void) { EXPECT(rec_ex != CFREE_CG_TYPE_NONE, "record desc type failed"); EXPECT(cfree_cg_type_size(c, rec_ex) == 16, "record desc size mismatch"); EXPECT(cfree_cg_type_align(c, rec_ex) == 16, "record desc align mismatch"); - EXPECT(cfree_cg_type_record_field(c, rec_ex, 1, &field_out, &field_off) == - 0, + EXPECT(cfree_cg_type_record_field(c, rec_ex, 1, &field_out, &field_off) == 0, "record desc field query failed"); EXPECT(field_off == 0, "union field offset mismatch"); @@ -512,6 +1093,7 @@ int main(void) { exercise_cg_late_local_addr(c, i32_ty, 0); exercise_cg_late_local_addr(c, i32_ty, 1); exercise_cg_literal_folds(c, i32_ty); + exercise_cg_constfold_phases(c, i32_ty, i8_ty); cfree_compiler_free(c); return g_fail ? 1 : 0;