kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b25fcf61b2c648bb5c0376b5d6d7d04976239499
parent 679337e910a27991bdf2665e6986afe130c607a0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 18 May 2026 12:22:43 -0700

Support stack args in tail calls

Diffstat:
Msrc/arch/aa64/ops.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/arch/arch.h | 3+++
Msrc/arch/rv64/ops.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/arch/x64/ops.c | 77++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Msrc/opt/opt.c | 23+++++++++++++++++------
Msrc/opt/pass_lower.c | 3++-
6 files changed, 220 insertions(+), 69 deletions(-)

diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -764,9 +764,62 @@ static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { * Calls * ============================================================ */ +static Operand aa_call_stack_arg_addr(CGTarget* t, u32 stack_offset, + int tail) { + AAImpl* a = impl_of(t); + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_INDIRECT; + addr.cls = RC_INT; + addr.v.ind.base = tail && !a->omit_frame ? 29u : 31u; + addr.v.ind.ofs = (i32)stack_offset; + if (tail && !a->omit_frame) addr.v.ind.ofs += 16; + return addr; +} + +static void aa_check_tail_stack_args(CGTarget* t, u32 stack_size) { + AAImpl* a = impl_of(t); + if (stack_size > a->next_param_stack) { + compiler_panic(t->c, a->loc, + "aarch64 tail call: stack argument area too small"); + } +} + +static u32 aa_call_plan_stack_raw_size(const CGCallPlan* p) { + u32 size = 0; + for (u32 i = 0; i < p->nargs; ++i) { + const CGCallPlanMove* m = &p->args[i]; + if (m->dst_kind == CG_CALL_PLAN_STACK || + m->dst_kind == CG_CALL_PLAN_TAIL_STACK) { + u32 end = m->stack_offset + 8u; + if (end > size) size = end; + } + } + return size; +} + +static void aa_store_stack_reg(CGTarget* t, u32 reg, RegClass cls, + CfreeCgTypeId type, u32 size, + u32 stack_offset, int tail) { + Operand addr = aa_call_stack_arg_addr(t, stack_offset, tail); + Operand src; + MemAccess ma; + memset(&src, 0, sizeof src); + memset(&ma, 0, sizeof ma); + src.kind = OPK_REG; + src.cls = (u8)cls; + src.type = type; + src.v.reg = reg; + addr.type = type; + ma.type = type; + ma.size = size; + ma.align = size ? size : 1u; + aa_store(t, addr, src, ma); +} + static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, const CGABIValue* av, u32* next_int, u32* next_fp, - u32* stack_off) { + u32* stack_off, int tail) { AAImpl* a = impl_of(t); ABIArgInfo va_ai; ABIArgPart va_pt; @@ -810,7 +863,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, (int)av->storage.kind); } if (to_stack) { - aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); + aa_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail); *stack_off += 8; } return; @@ -859,7 +912,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, (int)av->storage.kind); } if (to_stack) { - aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off)); + aa_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail); *stack_off += 8; } } else if (pt->cls == ABI_CLASS_FP) { @@ -891,8 +944,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, } else { switch (av->storage.kind) { case OPK_REG: - aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31, - (i32)*stack_off)); + aa_store_stack_reg(t, reg_num(av->storage), RC_FP, av->type, sz, + *stack_off, tail); break; case OPK_INDIRECT: { Operand src; @@ -903,7 +956,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, i32 off; u32 base = addr_base(t, src, &off, AA_TMP0); aa64_emit32(t->mc, aa64_ldur_fp(sidx, AA_FP_TMP0, base, off)); - aa64_emit32(t->mc, aa64_stur_fp(sidx, AA_FP_TMP0, 31, (i32)*stack_off)); + aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz, + *stack_off, tail); break; } default: @@ -1063,11 +1117,12 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { } for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off); + emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off, + (d->flags & CG_CALL_TAIL) != 0); } u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) { + if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) { if (a->known_frame) { compiler_panic(t->c, a->loc, "aarch64 call: known frame outgoing area too small"); @@ -1078,9 +1133,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { if (d->flags & CG_CALL_TAIL) { if (d->abi && d->abi->has_sret) compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported"); - if (needed) - compiler_panic(t->c, a->loc, - "aarch64 tail call: stack arguments unsupported"); + aa_check_tail_stack_args(t, stack_off); aa_tail_branch(t, d->callee); return; } @@ -1166,9 +1219,7 @@ static void aa_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (p->flags & CG_CALL_TAIL) { if (p->has_sret) compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported"); - if (p->stack_arg_size) - compiler_panic(t->c, a->loc, - "aarch64 tail call: stack arguments unsupported"); + aa_check_tail_stack_args(t, aa_call_plan_stack_raw_size(p)); aa_tail_branch(t, p->callee); return; } @@ -1225,12 +1276,9 @@ static void aa_store_call_ret(CGTarget* t, const CGCallPlanRet* r, static void aa_store_call_arg(CGTarget* t, const CGCallPlanMove* m) { Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_INDIRECT; - addr.cls = RC_INT; + addr = aa_call_stack_arg_addr(t, m->stack_offset, + m->dst_kind == CG_CALL_PLAN_TAIL_STACK); addr.type = m->mem.type; - addr.v.ind.base = 31; - addr.v.ind.ofs = (i32)m->stack_offset; if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) { Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type}; diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -422,6 +422,9 @@ typedef struct CGPhysRegInfo { typedef enum CGCallPlanLocKind { CG_CALL_PLAN_REG, CG_CALL_PLAN_STACK, + /* Stack argument for a sibling call. The slot is addressed where the + * caller's stack pointer will be after this frame is restored. */ + CG_CALL_PLAN_TAIL_STACK, CG_CALL_PLAN_IGNORE, } CGCallPlanLocKind; diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -749,8 +749,63 @@ static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { /* ---- calls / return ---- */ +static Operand rv_call_stack_arg_addr(CGTarget* t, u32 stack_offset, + int tail) { + RImpl* a = impl_of(t); + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_INDIRECT; + addr.cls = RC_INT; + addr.v.ind.base = tail && !a->omit_frame ? RV_S0 : RV_SP; + addr.v.ind.ofs = (i32)stack_offset; + if (tail && !a->omit_frame) { + addr.v.ind.ofs += 16 + (a->is_variadic ? 64 : 0); + } + return addr; +} + +static void rv_check_tail_stack_args(CGTarget* t, u32 stack_size) { + RImpl* a = impl_of(t); + if (stack_size > a->next_param_stack) { + compiler_panic(t->c, a->loc, + "rv64 tail call: stack argument area too small"); + } +} + +static u32 rv_call_plan_stack_raw_size(const CGCallPlan* p) { + u32 size = 0; + for (u32 i = 0; i < p->nargs; ++i) { + const CGCallPlanMove* m = &p->args[i]; + if (m->dst_kind == CG_CALL_PLAN_STACK || + m->dst_kind == CG_CALL_PLAN_TAIL_STACK) { + u32 end = m->stack_offset + 8u; + if (end > size) size = end; + } + } + return size; +} + +static void rv_store_stack_reg(CGTarget* t, u32 reg, RegClass cls, + CfreeCgTypeId type, u32 size, + u32 stack_offset, int tail) { + Operand addr = rv_call_stack_arg_addr(t, stack_offset, tail); + Operand src; + MemAccess ma; + memset(&src, 0, sizeof src); + memset(&ma, 0, sizeof ma); + src.kind = OPK_REG; + src.cls = (u8)cls; + src.type = type; + src.v.reg = reg; + addr.type = type; + ma.type = type; + ma.size = size; + ma.align = size ? size : 1u; + rv_store(t, addr, src, ma); +} + static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, - u32* next_fp, u32* stack_off) { + u32* next_fp, u32* stack_off, int tail) { RImpl* a = impl_of(t); MCEmitter* mc = t->mc; @@ -804,7 +859,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, (int)av->storage.kind); } if (to_stack) { - rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail); *stack_off += 8; } return; @@ -855,7 +910,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, (int)av->storage.kind); } if (to_stack) { - rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail); *stack_off += 8; } } else if (pt->cls == ABI_CLASS_FP) { @@ -891,8 +946,8 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } else { switch (av->storage.kind) { case OPK_REG: - if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off)); - else rv64_emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, reg_num(av->storage), RC_FP, av->type, sz, + *stack_off, tail); break; case OPK_LOCAL: { RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot); @@ -900,10 +955,12 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, i32 off = -(i32)s->off + (i32)pt->src_offset; if (sz == 8) { rv64_emit32(mc, rv_fld(/*ft0=*/0u, RV_S0, off)); - rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz, + *stack_off, tail); } else { rv64_emit32(mc, rv_flw(/*ft0=*/0u, RV_S0, off)); - rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz, + *stack_off, tail); } break; } @@ -914,10 +971,12 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; if (sz == 8) { rv64_emit32(mc, rv_fld(/*ft0=*/0u, base, off)); - rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz, + *stack_off, tail); } else { rv64_emit32(mc, rv_flw(/*ft0=*/0u, base, off)); - rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz, + *stack_off, tail); } break; } @@ -1092,10 +1151,11 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) { } for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); + emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off, + (d->flags & CG_CALL_TAIL) != 0); } u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) { + if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) { if (a->known_frame) compiler_panic(t->c, a->loc, "rv64 call: known frame outgoing area too small"); @@ -1105,9 +1165,7 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) { if (d->flags & CG_CALL_TAIL) { if (d->abi && d->abi->has_sret) compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported"); - if (needed) - compiler_panic(t->c, a->loc, - "rv64 tail call: stack arguments unsupported"); + rv_check_tail_stack_args(t, stack_off); rv_tail_branch(t, d->callee); return; } @@ -1184,9 +1242,7 @@ static void rv_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (p->flags & CG_CALL_TAIL) { if (p->has_sret) compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported"); - if (p->stack_arg_size) - compiler_panic(t->c, a->loc, - "rv64 tail call: stack arguments unsupported"); + rv_check_tail_stack_args(t, rv_call_plan_stack_raw_size(p)); rv_tail_branch(t, p->callee); return; } @@ -1245,12 +1301,9 @@ static void rv_store_call_ret(CGTarget* t, const CGCallPlanRet* r, static void rv_store_call_arg(CGTarget* t, const CGCallPlanMove* m) { Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_INDIRECT; - addr.cls = RC_INT; + addr = rv_call_stack_arg_addr(t, m->stack_offset, + m->dst_kind == CG_CALL_PLAN_TAIL_STACK); addr.type = m->mem.type; - addr.v.ind.base = RV_SP; - addr.v.ind.ofs = (i32)m->stack_offset; if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) { Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type}; diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -700,8 +700,42 @@ static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { /* ============================================================ * Calls / return */ +static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset, + int tail) { + XImpl* a = impl_of(t); + Operand addr; + memset(&addr, 0, sizeof addr); + addr.kind = OPK_INDIRECT; + addr.cls = RC_INT; + addr.v.ind.base = tail && !a->omit_frame ? X64_RBP : X64_RSP; + addr.v.ind.ofs = (i32)stack_offset + (tail ? 8 : 0); + if (tail && !a->omit_frame) addr.v.ind.ofs = 16 + (i32)stack_offset; + return addr; +} + +static void x_check_tail_stack_args(CGTarget* t, u32 stack_size) { + XImpl* a = impl_of(t); + if (stack_size > a->next_param_stack) { + compiler_panic(t->c, a->loc, + "x64 tail call: stack argument area too small"); + } +} + +static u32 x_call_plan_stack_raw_size(const CGCallPlan* p) { + u32 size = 0; + for (u32 i = 0; i < p->nargs; ++i) { + const CGCallPlanMove* m = &p->args[i]; + if (m->dst_kind == CG_CALL_PLAN_STACK || + m->dst_kind == CG_CALL_PLAN_TAIL_STACK) { + u32 end = m->stack_offset + 8u; + if (end > size) size = end; + } + } + return size; +} + static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, - u32* next_fp, u32* stack_off) { + u32* next_fp, u32* stack_off, int tail) { XImpl* a = impl_of(t); /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */ ABIArgInfo va_ai; @@ -743,7 +777,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, (int)av->storage.kind); } if (to_stack) { - emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); + Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); + emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu, + addr.v.ind.ofs); *stack_off += 8; } return; @@ -788,7 +824,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, (int)av->storage.kind); } if (to_stack) { - emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off); + Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); + emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu, + addr.v.ind.ofs); *stack_off += 8; } } else if (pt->cls == ABI_CLASS_FP) { @@ -815,23 +853,26 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } } else { if (av->storage.kind == OPK_REG) { + Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu, - X64_RSP, (i32)*stack_off); + addr.v.ind.base & 0xFu, addr.v.ind.ofs); } else if (av->storage.kind == OPK_LOCAL) { + Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); XSlot* s = x64_slot_get(a, av->storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "x64 call: bad FP arg slot"); emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, X64_RBP, -(i32)s->off + (i32)pt->src_offset); - emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP, - (i32)*stack_off); + emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, + addr.v.ind.base & 0xFu, addr.v.ind.ofs); } else if (av->storage.kind == OPK_INDIRECT) { + Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); /* Load through xmm15 (scratch — last in g_fp_order so cg won't * have it live mid-call) then store. */ emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, av->storage.v.ind.base & 0xFu, av->storage.v.ind.ofs + (i32)pt->src_offset); - emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP, - (i32)*stack_off); + emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, + addr.v.ind.base & 0xFu, addr.v.ind.ofs); } else { compiler_panic(t->c, a->loc, "x64 call: FP stack-arg storage kind %d unsupported", @@ -967,10 +1008,11 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { next_int = 1; } for (u32 i = 0; i < d->nargs; ++i) { - emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off); + emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off, + (d->flags & CG_CALL_TAIL) != 0); } u32 needed = (stack_off + 15u) & ~15u; - if (needed > a->max_outgoing) { + if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) { if (a->known_frame) compiler_panic(t->c, a->loc, "x64 call: known frame outgoing area too small"); @@ -985,9 +1027,7 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { if (d->flags & CG_CALL_TAIL) { if (d->abi && d->abi->has_sret) compiler_panic(t->c, a->loc, "x64 tail call: sret unsupported"); - if (needed) - compiler_panic(t->c, a->loc, - "x64 tail call: stack arguments unsupported"); + x_check_tail_stack_args(t, stack_off); x_tail_branch(t, d->callee); return; } @@ -1081,9 +1121,7 @@ static void x_emit_call_plan(CGTarget* t, const CGCallPlan* p) { if (p->has_sret) compiler_panic(t->c, impl_of(t)->loc, "x64 tail call: sret unsupported"); - if (p->stack_arg_size) - compiler_panic(t->c, impl_of(t)->loc, - "x64 tail call: stack arguments unsupported"); + x_check_tail_stack_args(t, x_call_plan_stack_raw_size(p)); x_tail_branch(t, p->callee); return; } @@ -1166,12 +1204,9 @@ static void x_store_call_ret(CGTarget* t, const CGCallPlanRet* r, static void x_store_call_arg(CGTarget* t, const CGCallPlanMove* m) { Operand addr; - memset(&addr, 0, sizeof addr); - addr.kind = OPK_INDIRECT; - addr.cls = RC_INT; + addr = x_call_stack_arg_addr(t, m->stack_offset, + m->dst_kind == CG_CALL_PLAN_TAIL_STACK); addr.type = m->mem.type; - addr.v.ind.base = X64_RSP; - addr.v.ind.ofs = (i32)m->stack_offset; if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) { Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type}; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1359,12 +1359,13 @@ static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) { Operand dst = move->dst; Operand src = move->src; MemAccess mem = move->mem; - if (move->dst_kind == CG_CALL_PLAN_STACK) { + if (move->dst_kind == CG_CALL_PLAN_STACK || + move->dst_kind == CG_CALL_PLAN_TAIL_STACK) { CGCallPlanMove m; memset(&m, 0, sizeof m); m.src = src; m.src_kind = move->src_kind; - m.dst_kind = CG_CALL_PLAN_STACK; + m.dst_kind = move->dst_kind; m.cls = dst.cls; m.src_offset = move->src_offset; m.stack_offset = move->stack_offset; @@ -1481,7 +1482,9 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, static int replay_plan_supported(CGTarget* w, const CGCallPlan* p) { if (!p) return 0; for (u32 i = 0; i < p->nargs; ++i) { - if (p->args[i].dst_kind == CG_CALL_PLAN_STACK && !w->store_call_arg) + if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK || + p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && + !w->store_call_arg) return 0; if (p->args[i].dst_kind == CG_CALL_PLAN_REG && (p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR || @@ -1521,6 +1524,10 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { for (u32 i = 0; i < src_plan->nargs; ++i) { plan.args[i] = src_plan->args[i]; plan.args[i].src = xlat_op(r, src_plan->args[i].src); + if ((src_plan->flags & CG_CALL_TAIL) && + plan.args[i].dst_kind == CG_CALL_PLAN_STACK) { + plan.args[i].dst_kind = CG_CALL_PLAN_TAIL_STACK; + } Operand dst; if (plan.args[i].dst_kind == CG_CALL_PLAN_REG) { dst = phys_reg_operand(plan.args[i].dst_reg, @@ -2057,10 +2064,14 @@ static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) { if ((IROp)in->op == IR_ALLOCA) { out->has_alloca = 1; } else if ((IROp)in->op == IR_CALL) { - out->has_call = 1; - if (!w->call_stack_size) continue; IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (!aux) continue; + if (!aux) { + out->has_call = 1; + continue; + } + if ((aux->desc.flags & CG_CALL_TAIL) == 0) out->has_call = 1; + if ((aux->desc.flags & CG_CALL_TAIL) != 0) continue; + if (!w->call_stack_size) continue; u32 need = w->call_stack_size(w, &aux->desc); if (need > out->max_outgoing) out->max_outgoing = need; } diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -566,7 +566,8 @@ static int call_plan_replay_supported(const IRCallAux* aux, const CGTarget* target) { if (!aux || !aux->plan_valid || !target || !target->emit_call_plan) return 0; for (u32 i = 0; i < aux->plan.nargs; ++i) { - if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK && + if ((aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK || + aux->plan.args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) && !target->store_call_arg) return 0; if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG &&