commit b25fcf61b2c648bb5c0376b5d6d7d04976239499
parent 679337e910a27991bdf2665e6986afe130c607a0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 18 May 2026 12:22:43 -0700
Support stack args in tail calls
Diffstat:
6 files changed, 220 insertions(+), 69 deletions(-)
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -764,9 +764,62 @@ static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
* Calls
* ============================================================ */
+static Operand aa_call_stack_arg_addr(CGTarget* t, u32 stack_offset,
+ int tail) {
+ AAImpl* a = impl_of(t);
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_INDIRECT;
+ addr.cls = RC_INT;
+ addr.v.ind.base = tail && !a->omit_frame ? 29u : 31u;
+ addr.v.ind.ofs = (i32)stack_offset;
+ if (tail && !a->omit_frame) addr.v.ind.ofs += 16;
+ return addr;
+}
+
+static void aa_check_tail_stack_args(CGTarget* t, u32 stack_size) {
+ AAImpl* a = impl_of(t);
+ if (stack_size > a->next_param_stack) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 tail call: stack argument area too small");
+ }
+}
+
+static u32 aa_call_plan_stack_raw_size(const CGCallPlan* p) {
+ u32 size = 0;
+ for (u32 i = 0; i < p->nargs; ++i) {
+ const CGCallPlanMove* m = &p->args[i];
+ if (m->dst_kind == CG_CALL_PLAN_STACK ||
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
+ u32 end = m->stack_offset + 8u;
+ if (end > size) size = end;
+ }
+ }
+ return size;
+}
+
+static void aa_store_stack_reg(CGTarget* t, u32 reg, RegClass cls,
+ CfreeCgTypeId type, u32 size,
+ u32 stack_offset, int tail) {
+ Operand addr = aa_call_stack_arg_addr(t, stack_offset, tail);
+ Operand src;
+ MemAccess ma;
+ memset(&src, 0, sizeof src);
+ memset(&ma, 0, sizeof ma);
+ src.kind = OPK_REG;
+ src.cls = (u8)cls;
+ src.type = type;
+ src.v.reg = reg;
+ addr.type = type;
+ ma.type = type;
+ ma.size = size;
+ ma.align = size ? size : 1u;
+ aa_store(t, addr, src, ma);
+}
+
static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
const CGABIValue* av, u32* next_int, u32* next_fp,
- u32* stack_off) {
+ u32* stack_off, int tail) {
AAImpl* a = impl_of(t);
ABIArgInfo va_ai;
ABIArgPart va_pt;
@@ -810,7 +863,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
(int)av->storage.kind);
}
if (to_stack) {
- aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
+ aa_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail);
*stack_off += 8;
}
return;
@@ -859,7 +912,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
(int)av->storage.kind);
}
if (to_stack) {
- aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
+ aa_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail);
*stack_off += 8;
}
} else if (pt->cls == ABI_CLASS_FP) {
@@ -891,8 +944,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
} else {
switch (av->storage.kind) {
case OPK_REG:
- aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31,
- (i32)*stack_off));
+ aa_store_stack_reg(t, reg_num(av->storage), RC_FP, av->type, sz,
+ *stack_off, tail);
break;
case OPK_INDIRECT: {
Operand src;
@@ -903,7 +956,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
i32 off;
u32 base = addr_base(t, src, &off, AA_TMP0);
aa64_emit32(t->mc, aa64_ldur_fp(sidx, AA_FP_TMP0, base, off));
- aa64_emit32(t->mc, aa64_stur_fp(sidx, AA_FP_TMP0, 31, (i32)*stack_off));
+ aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz,
+ *stack_off, tail);
break;
}
default:
@@ -1063,11 +1117,12 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) {
}
for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
+ emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off,
+ (d->flags & CG_CALL_TAIL) != 0);
}
u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) {
+ if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) {
if (a->known_frame) {
compiler_panic(t->c, a->loc,
"aarch64 call: known frame outgoing area too small");
@@ -1078,9 +1133,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) {
if (d->flags & CG_CALL_TAIL) {
if (d->abi && d->abi->has_sret)
compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported");
- if (needed)
- compiler_panic(t->c, a->loc,
- "aarch64 tail call: stack arguments unsupported");
+ aa_check_tail_stack_args(t, stack_off);
aa_tail_branch(t, d->callee);
return;
}
@@ -1166,9 +1219,7 @@ static void aa_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
if (p->flags & CG_CALL_TAIL) {
if (p->has_sret)
compiler_panic(t->c, a->loc, "aarch64 tail call: sret unsupported");
- if (p->stack_arg_size)
- compiler_panic(t->c, a->loc,
- "aarch64 tail call: stack arguments unsupported");
+ aa_check_tail_stack_args(t, aa_call_plan_stack_raw_size(p));
aa_tail_branch(t, p->callee);
return;
}
@@ -1225,12 +1276,9 @@ static void aa_store_call_ret(CGTarget* t, const CGCallPlanRet* r,
static void aa_store_call_arg(CGTarget* t, const CGCallPlanMove* m) {
Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_INDIRECT;
- addr.cls = RC_INT;
+ addr = aa_call_stack_arg_addr(t, m->stack_offset,
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK);
addr.type = m->mem.type;
- addr.v.ind.base = 31;
- addr.v.ind.ofs = (i32)m->stack_offset;
if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) {
Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type};
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -422,6 +422,9 @@ typedef struct CGPhysRegInfo {
typedef enum CGCallPlanLocKind {
CG_CALL_PLAN_REG,
CG_CALL_PLAN_STACK,
+ /* Stack argument for a sibling call. The slot is addressed where the
+ * caller's stack pointer will be after this frame is restored. */
+ CG_CALL_PLAN_TAIL_STACK,
CG_CALL_PLAN_IGNORE,
} CGCallPlanLocKind;
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -749,8 +749,63 @@ static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
/* ---- calls / return ---- */
+static Operand rv_call_stack_arg_addr(CGTarget* t, u32 stack_offset,
+ int tail) {
+ RImpl* a = impl_of(t);
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_INDIRECT;
+ addr.cls = RC_INT;
+ addr.v.ind.base = tail && !a->omit_frame ? RV_S0 : RV_SP;
+ addr.v.ind.ofs = (i32)stack_offset;
+ if (tail && !a->omit_frame) {
+ addr.v.ind.ofs += 16 + (a->is_variadic ? 64 : 0);
+ }
+ return addr;
+}
+
+static void rv_check_tail_stack_args(CGTarget* t, u32 stack_size) {
+ RImpl* a = impl_of(t);
+ if (stack_size > a->next_param_stack) {
+ compiler_panic(t->c, a->loc,
+ "rv64 tail call: stack argument area too small");
+ }
+}
+
+static u32 rv_call_plan_stack_raw_size(const CGCallPlan* p) {
+ u32 size = 0;
+ for (u32 i = 0; i < p->nargs; ++i) {
+ const CGCallPlanMove* m = &p->args[i];
+ if (m->dst_kind == CG_CALL_PLAN_STACK ||
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
+ u32 end = m->stack_offset + 8u;
+ if (end > size) size = end;
+ }
+ }
+ return size;
+}
+
+static void rv_store_stack_reg(CGTarget* t, u32 reg, RegClass cls,
+ CfreeCgTypeId type, u32 size,
+ u32 stack_offset, int tail) {
+ Operand addr = rv_call_stack_arg_addr(t, stack_offset, tail);
+ Operand src;
+ MemAccess ma;
+ memset(&src, 0, sizeof src);
+ memset(&ma, 0, sizeof ma);
+ src.kind = OPK_REG;
+ src.cls = (u8)cls;
+ src.type = type;
+ src.v.reg = reg;
+ addr.type = type;
+ ma.type = type;
+ ma.size = size;
+ ma.align = size ? size : 1u;
+ rv_store(t, addr, src, ma);
+}
+
static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
- u32* next_fp, u32* stack_off) {
+ u32* next_fp, u32* stack_off, int tail) {
RImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
@@ -804,7 +859,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
(int)av->storage.kind);
}
if (to_stack) {
- rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail);
*stack_off += 8;
}
return;
@@ -855,7 +910,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
(int)av->storage.kind);
}
if (to_stack) {
- rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, dst_reg, RC_INT, av->type, 8, *stack_off, tail);
*stack_off += 8;
}
} else if (pt->cls == ABI_CLASS_FP) {
@@ -891,8 +946,8 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
} else {
switch (av->storage.kind) {
case OPK_REG:
- if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off));
- else rv64_emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, reg_num(av->storage), RC_FP, av->type, sz,
+ *stack_off, tail);
break;
case OPK_LOCAL: {
RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
@@ -900,10 +955,12 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
i32 off = -(i32)s->off + (i32)pt->src_offset;
if (sz == 8) {
rv64_emit32(mc, rv_fld(/*ft0=*/0u, RV_S0, off));
- rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
+ *stack_off, tail);
} else {
rv64_emit32(mc, rv_flw(/*ft0=*/0u, RV_S0, off));
- rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
+ *stack_off, tail);
}
break;
}
@@ -914,10 +971,12 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
if (sz == 8) {
rv64_emit32(mc, rv_fld(/*ft0=*/0u, base, off));
- rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
+ *stack_off, tail);
} else {
rv64_emit32(mc, rv_flw(/*ft0=*/0u, base, off));
- rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
+ *stack_off, tail);
}
break;
}
@@ -1092,10 +1151,11 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) {
}
for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off,
+ (d->flags & CG_CALL_TAIL) != 0);
}
u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) {
+ if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) {
if (a->known_frame)
compiler_panic(t->c, a->loc,
"rv64 call: known frame outgoing area too small");
@@ -1105,9 +1165,7 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) {
if (d->flags & CG_CALL_TAIL) {
if (d->abi && d->abi->has_sret)
compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported");
- if (needed)
- compiler_panic(t->c, a->loc,
- "rv64 tail call: stack arguments unsupported");
+ rv_check_tail_stack_args(t, stack_off);
rv_tail_branch(t, d->callee);
return;
}
@@ -1184,9 +1242,7 @@ static void rv_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
if (p->flags & CG_CALL_TAIL) {
if (p->has_sret)
compiler_panic(t->c, a->loc, "rv64 tail call: sret unsupported");
- if (p->stack_arg_size)
- compiler_panic(t->c, a->loc,
- "rv64 tail call: stack arguments unsupported");
+ rv_check_tail_stack_args(t, rv_call_plan_stack_raw_size(p));
rv_tail_branch(t, p->callee);
return;
}
@@ -1245,12 +1301,9 @@ static void rv_store_call_ret(CGTarget* t, const CGCallPlanRet* r,
static void rv_store_call_arg(CGTarget* t, const CGCallPlanMove* m) {
Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_INDIRECT;
- addr.cls = RC_INT;
+ addr = rv_call_stack_arg_addr(t, m->stack_offset,
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK);
addr.type = m->mem.type;
- addr.v.ind.base = RV_SP;
- addr.v.ind.ofs = (i32)m->stack_offset;
if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) {
Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type};
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -700,8 +700,42 @@ static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
/* ============================================================
* Calls / return */
+static Operand x_call_stack_arg_addr(CGTarget* t, u32 stack_offset,
+ int tail) {
+ XImpl* a = impl_of(t);
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_INDIRECT;
+ addr.cls = RC_INT;
+ addr.v.ind.base = tail && !a->omit_frame ? X64_RBP : X64_RSP;
+ addr.v.ind.ofs = (i32)stack_offset + (tail ? 8 : 0);
+ if (tail && !a->omit_frame) addr.v.ind.ofs = 16 + (i32)stack_offset;
+ return addr;
+}
+
+static void x_check_tail_stack_args(CGTarget* t, u32 stack_size) {
+ XImpl* a = impl_of(t);
+ if (stack_size > a->next_param_stack) {
+ compiler_panic(t->c, a->loc,
+ "x64 tail call: stack argument area too small");
+ }
+}
+
+static u32 x_call_plan_stack_raw_size(const CGCallPlan* p) {
+ u32 size = 0;
+ for (u32 i = 0; i < p->nargs; ++i) {
+ const CGCallPlanMove* m = &p->args[i];
+ if (m->dst_kind == CG_CALL_PLAN_STACK ||
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
+ u32 end = m->stack_offset + 8u;
+ if (end > size) size = end;
+ }
+ }
+ return size;
+}
+
static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
- u32* next_fp, u32* stack_off) {
+ u32* next_fp, u32* stack_off, int tail) {
XImpl* a = impl_of(t);
/* Synthesize one-part DIRECT for variadic args (av->abi NULL). */
ABIArgInfo va_ai;
@@ -743,7 +777,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
(int)av->storage.kind);
}
if (to_stack) {
- emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
+ emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu,
+ addr.v.ind.ofs);
*stack_off += 8;
}
return;
@@ -788,7 +824,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
(int)av->storage.kind);
}
if (to_stack) {
- emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
+ emit_mov_store(t->mc, 8, dst_reg, addr.v.ind.base & 0xFu,
+ addr.v.ind.ofs);
*stack_off += 8;
}
} else if (pt->cls == ABI_CLASS_FP) {
@@ -815,23 +853,26 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
}
} else {
if (av->storage.kind == OPK_REG) {
+ Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu,
- X64_RSP, (i32)*stack_off);
+ addr.v.ind.base & 0xFu, addr.v.ind.ofs);
} else if (av->storage.kind == OPK_LOCAL) {
+ Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
XSlot* s = x64_slot_get(a, av->storage.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "x64 call: bad FP arg slot");
emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, X64_RBP,
-(i32)s->off + (i32)pt->src_offset);
- emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP,
- (i32)*stack_off);
+ emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15,
+ addr.v.ind.base & 0xFu, addr.v.ind.ofs);
} else if (av->storage.kind == OPK_INDIRECT) {
+ Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
/* Load through xmm15 (scratch — last in g_fp_order so cg won't
* have it live mid-call) then store. */
emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15,
av->storage.v.ind.base & 0xFu,
av->storage.v.ind.ofs + (i32)pt->src_offset);
- emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP,
- (i32)*stack_off);
+ emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15,
+ addr.v.ind.base & 0xFu, addr.v.ind.ofs);
} else {
compiler_panic(t->c, a->loc,
"x64 call: FP stack-arg storage kind %d unsupported",
@@ -967,10 +1008,11 @@ static void x_call(CGTarget* t, const CGCallDesc* d) {
next_int = 1;
}
for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off,
+ (d->flags & CG_CALL_TAIL) != 0);
}
u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) {
+ if ((d->flags & CG_CALL_TAIL) == 0 && needed > a->max_outgoing) {
if (a->known_frame)
compiler_panic(t->c, a->loc,
"x64 call: known frame outgoing area too small");
@@ -985,9 +1027,7 @@ static void x_call(CGTarget* t, const CGCallDesc* d) {
if (d->flags & CG_CALL_TAIL) {
if (d->abi && d->abi->has_sret)
compiler_panic(t->c, a->loc, "x64 tail call: sret unsupported");
- if (needed)
- compiler_panic(t->c, a->loc,
- "x64 tail call: stack arguments unsupported");
+ x_check_tail_stack_args(t, stack_off);
x_tail_branch(t, d->callee);
return;
}
@@ -1081,9 +1121,7 @@ static void x_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
if (p->has_sret)
compiler_panic(t->c, impl_of(t)->loc,
"x64 tail call: sret unsupported");
- if (p->stack_arg_size)
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 tail call: stack arguments unsupported");
+ x_check_tail_stack_args(t, x_call_plan_stack_raw_size(p));
x_tail_branch(t, p->callee);
return;
}
@@ -1166,12 +1204,9 @@ static void x_store_call_ret(CGTarget* t, const CGCallPlanRet* r,
static void x_store_call_arg(CGTarget* t, const CGCallPlanMove* m) {
Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_INDIRECT;
- addr.cls = RC_INT;
+ addr = x_call_stack_arg_addr(t, m->stack_offset,
+ m->dst_kind == CG_CALL_PLAN_TAIL_STACK);
addr.type = m->mem.type;
- addr.v.ind.base = X64_RSP;
- addr.v.ind.ofs = (i32)m->stack_offset;
if (m->src_kind == CG_CALL_PLAN_SRC_ADDR) {
Operand tmp = {.kind = OPK_REG, .cls = RC_INT, .type = m->mem.type};
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1359,12 +1359,13 @@ static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) {
Operand dst = move->dst;
Operand src = move->src;
MemAccess mem = move->mem;
- if (move->dst_kind == CG_CALL_PLAN_STACK) {
+ if (move->dst_kind == CG_CALL_PLAN_STACK ||
+ move->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
CGCallPlanMove m;
memset(&m, 0, sizeof m);
m.src = src;
m.src_kind = move->src_kind;
- m.dst_kind = CG_CALL_PLAN_STACK;
+ m.dst_kind = move->dst_kind;
m.cls = dst.cls;
m.src_offset = move->src_offset;
m.stack_offset = move->stack_offset;
@@ -1481,7 +1482,9 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
static int replay_plan_supported(CGTarget* w, const CGCallPlan* p) {
if (!p) return 0;
for (u32 i = 0; i < p->nargs; ++i) {
- if (p->args[i].dst_kind == CG_CALL_PLAN_STACK && !w->store_call_arg)
+ if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK ||
+ p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) &&
+ !w->store_call_arg)
return 0;
if (p->args[i].dst_kind == CG_CALL_PLAN_REG &&
(p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR ||
@@ -1521,6 +1524,10 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) {
for (u32 i = 0; i < src_plan->nargs; ++i) {
plan.args[i] = src_plan->args[i];
plan.args[i].src = xlat_op(r, src_plan->args[i].src);
+ if ((src_plan->flags & CG_CALL_TAIL) &&
+ plan.args[i].dst_kind == CG_CALL_PLAN_STACK) {
+ plan.args[i].dst_kind = CG_CALL_PLAN_TAIL_STACK;
+ }
Operand dst;
if (plan.args[i].dst_kind == CG_CALL_PLAN_REG) {
dst = phys_reg_operand(plan.args[i].dst_reg,
@@ -2057,10 +2064,14 @@ static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) {
if ((IROp)in->op == IR_ALLOCA) {
out->has_alloca = 1;
} else if ((IROp)in->op == IR_CALL) {
- out->has_call = 1;
- if (!w->call_stack_size) continue;
IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) continue;
+ if (!aux) {
+ out->has_call = 1;
+ continue;
+ }
+ if ((aux->desc.flags & CG_CALL_TAIL) == 0) out->has_call = 1;
+ if ((aux->desc.flags & CG_CALL_TAIL) != 0) continue;
+ if (!w->call_stack_size) continue;
u32 need = w->call_stack_size(w, &aux->desc);
if (need > out->max_outgoing) out->max_outgoing = need;
}
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -566,7 +566,8 @@ static int call_plan_replay_supported(const IRCallAux* aux,
const CGTarget* target) {
if (!aux || !aux->plan_valid || !target || !target->emit_call_plan) return 0;
for (u32 i = 0; i < aux->plan.nargs; ++i) {
- if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK &&
+ if ((aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK ||
+ aux->plan.args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) &&
!target->store_call_arg)
return 0;
if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG &&