commit 9c1a093280492ba7866eb3cebbb445c716da8ecb
parent 8dd63074edda0ee085f6ebe758e9f0e7a9739594
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 7 May 2026 14:10:48 -0700
stdcoro asymmetric coro_t
Diffstat:
14 files changed, 403 insertions(+), 160 deletions(-)
diff --git a/doc/builtins.md b/doc/builtins.md
@@ -150,22 +150,35 @@ Always:
- Compare: `__eq`, `__ne`, `__lt`, `__le`, `__gt`, `__ge`, `__unord` × `sf2`/`df2`/`tf2`
### Nonlocal jumps + stackful coroutines (per-arch, always shipped)
-The `<setjmp.h>` and `<stdcoro.h>` primitives share one per-target context
-struct: callee-saved GPRs + callee-saved FPRs + sp + return address. The
-`jmp_buf` and `coro_ctx` typedefs are 256-byte aligned-16 storage; the
-runtime reinterprets them as the per-arch struct.
-- `setjmp`, `longjmp` — `<setjmp.h>` (C11 7.13). cfree extension: this
- header is *not* in the C11 freestanding subset.
-- `coro_init`, `coro_switch`, `__cfree_coro_trampoline` — `<stdcoro.h>`
- (cfree-specific). `coro_switch(from, to, value) → uintptr_t` is the
- one universal primitive; `setjmp` = save-and-return-0,
- `longjmp` = restore-and-deliver-val.
-- Implementations live one master `.c` per arch under `lib/coro/`
- (file-scope asm + tiny C `coro_init`). ARM has two: `arm32.c`
- (Thumb-2, ARMv7+, may use VFP `d8-d15`) and `arm32_thumb1.c`
- (ARMv6-M, no IT blocks / no VFP / data-processing limited to
- r0-r7). Not provided for: WASM (would need an Asyncify-fiber
- port).
+`<setjmp.h>` and `<stdcoro.h>` share one per-target context payload
+(256 bytes, 16-byte aligned): callee-saved GPRs + callee-saved FPRs
++ sp + return address. `jmp_buf` and `coro_ctx` are both opaque
+typedefs over that payload; the runtime reinterprets them as the
+per-arch struct.
+
+- `setjmp`, `longjmp` — `<setjmp.h>` (C11 7.13). cfree extension:
+ this header is *not* in the C11 freestanding subset.
+- `coro_init`, `coro_resume`, `coro_yield`, `coro_self` — public
+ asymmetric API in `<stdcoro.h>`. Resume drives a coroutine
+ forward; yield suspends back to the most recent resumer; resumes
+ nest like function calls. Status (`CORO_INIT` / `RUNNING` /
+ `SUSPENDED` / `DEAD`) is tracked on the `coro_t` and propagates
+ through `coro_resume`'s result.
+- `__cfree_coro_switch(from, to, value) -> uintptr_t` — the symmetric
+ primitive. `coro_resume` / `coro_yield` are built on it; setjmp =
+ save+return-0, longjmp = restore+deliver-val. Exposed (with the
+ `__cfree_` prefix to signal "compiler-builtin-style") for
+ schedulers that don't fit the asymmetric resume-chain model.
+- `__cfree_coro_ctx_init`, `__cfree_coro_trampoline` — internal,
+ used only by `lib/coro/coro.c`'s asymmetric layer.
+
+Implementation: one master `.c` per arch under `lib/coro/` (file-scope
+asm + tiny C `__cfree_coro_ctx_init`), plus one arch-agnostic
+`coro/coro.c` for the public asymmetric layer. ARM has two arch
+masters: `arm32.c` (Thumb-2, ARMv7+, may use VFP `d8-d15`) and
+`arm32_thumb1.c` (ARMv6-M, no IT blocks / no VFP / data-processing
+limited to r0-r7). Not provided for: WASM (would need an
+Asyncify-fiber port).
### Atomic fallbacks (only when target lacks native atomics for that width)
- Generic: `__atomic_load`, `__atomic_store`, `__atomic_exchange`, `__atomic_compare_exchange`
diff --git a/include/stdcoro.h b/include/stdcoro.h
@@ -1,25 +1,40 @@
-/* stdcoro.h -- cfree extension -- stackful symmetric coroutines
+/* stdcoro.h -- cfree extension -- stackful asymmetric coroutines
*
* stdcoro.h is non-standard: C11 has no stackful-coroutine facility.
* cfree ships it as a native counterpart to <setjmp.h>: the underlying
- * per-target context struct, save sequence, and restore sequence are
- * literally shared with setjmp/longjmp -- only the entry shapes differ
- * (setjmp = save+return-0; longjmp = restore+return-val; coro_switch =
- * save(from)+restore(to)+deliver-value). Implementations live in
- * libcfree_rt.a -- see doc/builtins.md.
- *
- * Programming model
- * 1. Allocate a coro_ctx and a stack region.
- * 2. coro_init(&ctx, stack_base, stack_len, entry).
- * 3. coro_switch(&caller, &ctx, value) -- delivers `value` to entry's
- * uintptr_t argument on first switch in.
- * 4. Inside the coroutine, coro_switch(&ctx, &caller, value) yields
- * back, with `value` becoming the caller's coro_switch return.
- * 5. entry must NOT return; the trampoline traps if it does.
- *
- * coro_ctx is sized conservatively -- large enough for every cfree
- * target's callee-saved registers + sp + ip + (where applicable)
- * callee-saved FP regs. Layout is internal to the runtime.
+ * per-target context payload is literally shared with setjmp/longjmp
+ * (256 bytes, see doc/builtins.md), and the runtime is target-specific
+ * assembly in libcfree_rt.a.
+ *
+ * Two layers in this header:
+ *
+ * coro_ctx Raw register-context buffer used by the symmetric
+ * primitive __cfree_coro_switch. Most code does not
+ * touch it -- it is exposed for advanced schedulers
+ * (M:N, custom dispatch) that want the bare switch.
+ *
+ * coro_t Asymmetric coroutine handle. Resume drives forward,
+ * yield suspends back to the most recent resumer.
+ * Resumes nest like function calls. status is
+ * publicly readable; the rest is private storage.
+ *
+ * Programming model (asymmetric):
+ * 1. Allocate a coro_t and a stack region.
+ * 2. coro_init(&c, fn, stack_base, stack_len).
+ * 3. coro_resume(&c, value) drives c forward.
+ * 4. From inside fn, coro_yield(value) suspends back to the resumer.
+ * 5. fn's return value becomes the final coro_resume payload, with
+ * status CORO_DEAD; the runtime cleans up automatically.
+ *
+ * Threading. The runtime's "current coroutine" pointer and "main"
+ * register save slot are _Thread_local, so each thread has its own
+ * resume chain. A coroutine itself is still tied to the thread that
+ * drives it: errno, _Thread_local user state, and thread-affine OS
+ * handles silently rebind if a coroutine is resumed on a different
+ * thread, so don't migrate a suspended coroutine across threads.
+ * cfree's contract defines __STDC_NO_THREADS__ (no <threads.h>) --
+ * _Thread_local is a separate C11 language feature and works
+ * independently.
*/
#ifndef CFREE_STDCORO_H
#define CFREE_STDCORO_H
@@ -32,25 +47,84 @@
but 16 covers it). Caller stacks must be aligned to this. */
#define CORO_STACK_ALIGN 16
-/* 256 bytes is the largest per-target context across cfree's targets
- (x86_64 Windows: 12 GPR slots + xmm6-15). Same byte payload as
- <setjmp.h>'s jmp_buf -- the per-arch runtime reinterprets either
- as the same internal struct. */
+/* Raw register-context buffer. 256 bytes, alignof 16. The runtime
+ reinterprets this as a per-target struct of callee-saved GPRs +
+ callee-saved FPRs + sp + return address. Exposed only because the
+ internal __cfree_coro_switch primitive at the bottom of this header
+ needs it as an argument type. coro_t below embeds one of these as
+ the first word of its private storage. */
typedef struct coro_ctx {
_Alignas(16) unsigned char __cfree_storage[256];
} coro_ctx;
-typedef void (*coro_entry_fn)(uintptr_t value);
+/* ====================================================================
+ * Asymmetric coroutine API.
+ * ==================================================================== */
+
+typedef enum {
+ CORO_INIT, /* never resumed */
+ CORO_RUNNING, /* on the live resume chain */
+ CORO_SUSPENDED, /* yielded; resumable */
+ CORO_DEAD, /* entry returned */
+} coro_status_t;
+
+typedef struct {
+ uintptr_t value;
+ coro_status_t status;
+} coro_result_t;
+
+/* Coroutine entry point. The first coro_resume's value is passed as
+ `arg`. The return value is delivered as the final coro_resume's
+ payload, with status CORO_DEAD. */
+typedef uintptr_t (*coro_fn)(uintptr_t arg);
-/* Initialize *ctx to begin executing entry(value) on first switch in,
- using the stack region [stack_base, stack_base + stack_len). The
- stack base must be CORO_STACK_ALIGN-aligned. entry must not return. */
-void coro_init(coro_ctx *ctx,
- void *stack_base, size_t stack_len,
- coro_entry_fn entry);
+/* Coroutine handle. status is publicly readable; the private blob
+ carries the register context (256 B), a resumer pointer, and the
+ user-supplied entry fn. 288 B is comfortable headroom on both LP64
+ and ILP32 (lib/coro/coro.c verifies the fit with a _Static_assert). */
+typedef struct coro {
+ coro_status_t status;
+ _Alignas(16) unsigned char __cfree_priv[288];
+} coro_t;
-/* Save callee-saved state into *from, restore it from *to, deliver
- `value` to *to. Returns the value passed by the next switch back. */
-uintptr_t coro_switch(coro_ctx *from, coro_ctx *to, uintptr_t value);
+/* Initialize *c to run fn on [stack_base, stack_base + stack_len).
+ stack_base must be CORO_STACK_ALIGN-aligned. status becomes
+ CORO_INIT. The first coro_resume delivers its value as fn's arg. */
+void coro_init(coro_t *c, coro_fn fn, void *stack_base, size_t stack_len);
+
+/* Drive c forward. If c is INIT, calls fn(value) on c's stack. If
+ SUSPENDED, c's matching coro_yield call returns value. coro_resume
+ itself returns when c yields or its fn returns; the result carries
+ c's new status (SUSPENDED or DEAD) and the value c delivered.
+ UB if c is RUNNING or DEAD. */
+coro_result_t coro_resume(coro_t *c, uintptr_t value);
+
+/* Suspend the current coroutine, returning value to its resumer (the
+ matching coro_resume call returns this value). coro_yield itself
+ returns the value the next resumer passes. UB outside a coroutine. */
+uintptr_t coro_yield(uintptr_t value);
+
+/* The currently running coroutine, or NULL if not in one. */
+coro_t *coro_self(void);
+
+static inline coro_status_t coro_status(const coro_t *c) { return c->status; }
+
+/* ====================================================================
+ * Symmetric primitive (compiler-builtin-style; for advanced schedulers).
+ *
+ * Saves callee-saved state into *from, restores it from *to, and
+ * delivers `value` to *to as the return of its prior switch (or as
+ * the first-arg register of *to's trampoline on a fresh context).
+ * Returns the value passed by the next switch back to *from.
+ *
+ * coro_resume / coro_yield are built on this. Most code should not
+ * call it directly; it is exposed for schedulers that don't fit the
+ * asymmetric resume-chain model (M:N runtimes, work-stealing, etc.).
+ *
+ * Bypassing the asymmetric layer means losing coro_self / status
+ * tracking / DEAD propagation -- the symmetric primitive is purely
+ * a register-shuffle and knows nothing about coro_t.
+ * ==================================================================== */
+uintptr_t __cfree_coro_switch(coro_ctx *from, coro_ctx *to, uintptr_t value);
#endif
diff --git a/lib/README.md b/lib/README.md
@@ -33,7 +33,8 @@ hand-written `mem/mem.c` is 0BSD; relicense as desired.
| `riscv/rv64.S` | `__riscv_save_*` + `__riscv_restore_*` (rv64) | RISC-V rv64 with `-msave-restore` |
| `mem/mem.c` | `memcpy` / `memmove` / `memset` / `memcmp` (weak) | All; user libc overrides |
| `atomic/atomic_freestanding.c` | `__atomic_*` fallback shim | All |
-| `coro/<arch>.c` | `setjmp` / `longjmp` (`<setjmp.h>`) + `coro_init` / `coro_switch` / `__cfree_coro_trampoline` (`<stdcoro.h>`) | One of `aarch64`, `arm32`, `arm32_thumb1`, `i386`, `riscv32`, `riscv64`, `x86_64`, `x86_64_win`. Not built for `wasm32`. |
+| `coro/<arch>.c` | Per-arch primitives: `setjmp` / `longjmp` (`<setjmp.h>`) + `__cfree_coro_ctx_init` / `__cfree_coro_switch` / `__cfree_coro_trampoline` (internal; the public `<stdcoro.h>` API sits on top via `coro/coro.c`) | One of `aarch64`, `arm32`, `arm32_thumb1`, `i386`, `riscv32`, `riscv64`, `x86_64`, `x86_64_win`. Not built for `wasm32`. |
+| `coro/coro.c` | Arch-agnostic asymmetric layer: `coro_init` / `coro_resume` / `coro_yield` / `coro_self` (`<stdcoro.h>`) | All variants that ship a `coro/<arch>.c`. |
### Build-time include dirs (consumed by the masters; nothing here lands in `libcfree_rt.a`)
@@ -142,27 +143,43 @@ Hand-written portable C (not from compiler-rt). All four functions are weak
so a user libc, or a tuned arch-specific replacement, wins at link time.
`arm/aeabi_thumb{1,2}.S`'s `aeabi_mem*` symbols forward to these.
-### `coro/<arch>.c`
-One master `.c` per arch that supplies both `<setjmp.h>` (`setjmp`,
-`longjmp`) and `<stdcoro.h>` (`coro_init`, `coro_switch`,
-`__cfree_coro_trampoline`). The setjmp/longjmp/coro_switch primitives
-share a per-arch struct (callee-saved GPRs + callee-saved FPRs + sp +
-return address) and one pair of C string-concat macros
-`SAVE_INTO(reg)` / `RESTORE_FROM(reg)` so the same instruction bytes
-are emitted in all three places. Written as file-scope `__asm__`
-inside a `.c` file (not a separate `.S`) so the asm and the tiny
-`coro_init` C function stay co-located. Symbol naming uses
-`__USER_LABEL_PREFIX__` so the same source compiles for ELF / Mach-O /
-COFF.
+### `coro/<arch>.c` + `coro/coro.c`
+The coro module ships in two layers:
+
+**`coro/<arch>.c`** (one per arch) — per-target primitives, file-scope
+`__asm__` inside a `.c` file (not a separate `.S`) so the tiny C
+`__cfree_coro_ctx_init` and the asm save/restore stay co-located.
+Provides:
+
+- `setjmp` / `longjmp` (public, `<setjmp.h>`).
+- `__cfree_coro_switch(from, to, value)` — symmetric register switch,
+ exposed in `<stdcoro.h>` as a compiler-builtin-style primitive for
+ advanced schedulers; the asymmetric layer below also uses it.
+- `__cfree_coro_ctx_init` / `__cfree_coro_trampoline` — internal.
+
+The three primitives that need register save/restore (setjmp,
+longjmp, `__cfree_coro_switch`) share one pair of C string-concat
+macros `SAVE_INTO(reg)` / `RESTORE_FROM(reg)` so the same instruction
+bytes are emitted in all three places. Symbol naming uses
+`__USER_LABEL_PREFIX__` so the same source compiles for ELF / Mach-O
+/ COFF.
ARM ships two variants: `arm32.c` (Thumb-2, ARMv7+, optional VFP
`d8-d15` gated on `__ARM_FP`) and `arm32_thumb1.c` (ARMv6-M /
Cortex-M0/M0+; no IT blocks, no VFP, data-processing restricted to
-r0-r7, no `str sp` / `str rN, [sp,...]` -- the asm sequences don't
+r0-r7, no `str sp` / `str rN, [sp,...]` — the asm sequences don't
share with arm32.c so it's a separate file).
Not provided for `wasm32` (would need an Asyncify-fiber port).
+**`coro/coro.c`** (arch-agnostic) — the public asymmetric API:
+`coro_init` / `coro_resume` / `coro_yield` / `coro_self`. Tracks the
+current coroutine in a static, threads each `coro_t`'s resumer slot
+through the resume chain, and dispatches the per-arch trampoline via
+a thunk that runs the user's `coro_fn`, marks the coroutine
+`CORO_DEAD`, and switches back to the resumer. Built once per coro
+variant and linked alongside the per-arch master.
+
### `atomic/atomic_freestanding.c`
Defines a pointer-sized `_Atomic(uintptr_t)` spinlock as the lock primitive
(no OS dependency) then `#include`s `atomic_common.inc`, which contains the
diff --git a/lib/build.sh b/lib/build.sh
@@ -93,7 +93,14 @@ ARM_AEABI_THUMB1="arm/aeabi_thumb1.S arm/aeabi.c"
RV32_SR="riscv/rv32.S"
RV64_SR="riscv/rv64.S"
-# Coro + setjmp/longjmp: one master .c per arch, file-scope asm inside.
+# Coro + setjmp/longjmp:
+# coro/coro.c -- arch-agnostic asymmetric layer (coro_init,
+# coro_resume, coro_yield, coro_self).
+# coro/<arch>.c -- per-arch primitives (setjmp / longjmp /
+# __cfree_coro_ctx_init / __cfree_coro_switch /
+# __cfree_coro_trampoline).
+# Every variant that ships coro picks one <arch>.c plus the common file.
+CORO_COMMON="coro/coro.c"
CORO_X86_64="coro/x86_64.c"
CORO_X86_64_WIN="coro/x86_64_win.c"
CORO_I386="coro/i386.c"
@@ -118,11 +125,11 @@ CORO_INC="-I../include"
build_variant x86_64-linux \
"--target=x86_64-linux-gnu -Iinclude/lp64_le -DHAS_INT128=1 $CORO_INC" \
- "$LP64_BASE $CORO_X86_64"
+ "$LP64_BASE $CORO_X86_64 $CORO_COMMON"
build_variant x86_64-apple-darwin \
"--target=x86_64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1 $CORO_INC" \
- "$LP64_BASE $CORO_X86_64"
+ "$LP64_BASE $CORO_X86_64 $CORO_COMMON"
# aarch64-linux: long double is binary128; needs fp_tf + fp_ti and the
# tf_supplement.h pre-include.
@@ -130,33 +137,33 @@ build_variant aarch64-linux \
"--target=aarch64-linux-gnu \
-Iinclude/lp64_le_ldbl128 -Iinclude/lp64_le -DHAS_INT128=1 \
-include include/lp64_le_ldbl128/tf_supplement.h $CORO_INC" \
- "$INT_C $INT64_C $FP_C $FP_TF_C $FP_TI_C $MEM_C $ATOMIC_C $CORO_AARCH64"
+ "$INT_C $INT64_C $FP_C $FP_TF_C $FP_TI_C $MEM_C $ATOMIC_C $CORO_AARCH64 $CORO_COMMON"
build_variant aarch64-apple-darwin \
"--target=aarch64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1 $CORO_INC" \
- "$LP64_BASE $CORO_AARCH64"
+ "$LP64_BASE $CORO_AARCH64 $CORO_COMMON"
build_variant riscv64-elf \
"--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd \
-Iinclude/lp64_le -DHAS_INT128=1 $CORO_INC" \
- "$LP64_BASE $CORO_RV64"
+ "$LP64_BASE $CORO_RV64 $CORO_COMMON"
build_variant riscv64-elf-save-restore \
"--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd -msave-restore \
-Iinclude/lp64_le -DHAS_INT128=1 $CORO_INC" \
- "$LP64_BASE $RV64_SR $CORO_RV64"
+ "$LP64_BASE $RV64_SR $CORO_RV64 $CORO_COMMON"
# ---- LLP64 little-endian (Win64) --------------------------------------------
build_variant x86_64-pc-windows \
"--target=x86_64-pc-windows-msvc -Iinclude/llp64_le -DHAS_INT128=1 $CORO_INC" \
- "$INT_C $INT64_C $FP_C $MEM_C $ATOMIC_C $CORO_X86_64_WIN"
+ "$INT_C $INT64_C $FP_C $MEM_C $ATOMIC_C $CORO_X86_64_WIN $CORO_COMMON"
# ---- ILP32 little-endian -----------------------------------------------------
ILP32_BASE="$INT_C $INT32_C $FP_C $MEM_C $ATOMIC_C"
build_variant i386-linux \
"--target=i386-linux-gnu -Iinclude/ilp32_le -DHAS_INT128=0 $CORO_INC" \
- "$ILP32_BASE $CORO_I386"
+ "$ILP32_BASE $CORO_I386 $CORO_COMMON"
# wasm32: no setjmp/coro impl yet -- Emscripten fibers / sjlj are a
# separate runtime model that hasn't been ported to cfree.
@@ -167,17 +174,17 @@ build_variant wasm32 \
build_variant riscv32-elf \
"--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd \
-Iinclude/ilp32_le -DHAS_INT128=0 $CORO_INC" \
- "$ILP32_BASE $CORO_RV32"
+ "$ILP32_BASE $CORO_RV32 $CORO_COMMON"
build_variant riscv32-elf-save-restore \
"--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd -msave-restore \
-Iinclude/ilp32_le -DHAS_INT128=0 $CORO_INC" \
- "$ILP32_BASE $RV32_SR $CORO_RV32"
+ "$ILP32_BASE $RV32_SR $CORO_RV32 $CORO_COMMON"
build_variant arm-eabi-thumb2 \
"--target=arm-none-eabi -march=armv7-a -mthumb -mfloat-abi=soft \
-Iinclude/ilp32_le -DHAS_INT128=0 $CORO_INC" \
- "$ILP32_BASE $ARM_AEABI_THUMB2 $CORO_ARM32"
+ "$ILP32_BASE $ARM_AEABI_THUMB2 $CORO_ARM32 $CORO_COMMON"
# arm-eabi-thumb1 (Cortex-M0/M0+, ARMv6-M): Thumb-1 ISA, no IT blocks,
# data-processing ops restricted to r0-r7, no VFP. Coro impl is a
@@ -185,7 +192,7 @@ build_variant arm-eabi-thumb2 \
build_variant arm-eabi-thumb1 \
"--target=arm-none-eabi -march=armv6-m -mthumb -mfloat-abi=soft \
-Iinclude/ilp32_le -DHAS_INT128=0 $CORO_INC" \
- "$ILP32_BASE $ARM_AEABI_THUMB1 $CORO_ARM32_THUMB1"
+ "$ILP32_BASE $ARM_AEABI_THUMB1 $CORO_ARM32_THUMB1 $CORO_COMMON"
#-------------------------------------------------------------------------------
echo
diff --git a/lib/coro/aarch64.c b/lib/coro/aarch64.c
@@ -1,7 +1,7 @@
/*
* lib/coro/aarch64.c -- AArch64 (AAPCS) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* All three primitives sit on one per-target context layout:
*
@@ -15,7 +15,7 @@
* 256-byte storage carved out by jmp_buf and coro_ctx.
*
* SAVE_/RESTORE_ are C string-concat macros so the same byte
- * sequence is emitted in setjmp, longjmp, and coro_switch without
+ * sequence is emitted in setjmp, longjmp, and __cfree_coro_switch without
* any duplication or gas-specific .macro tricks.
*
* Symbol naming uses __USER_LABEL_PREFIX__ so labels match the C
@@ -42,9 +42,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_arm64_ctx), "align
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_arm64_ctx *c = (struct __cfree_arm64_ctx *)ctx;
/* AArch64 stacks grow down; align top to 16. */
@@ -116,19 +116,19 @@ __asm__ (
" csinc x0, x1, xzr, ne\n"
" ret\n"
- /* coro_switch(from, to, value) -- x0, x1, x2. Save into [x0],
+ /* __cfree_coro_switch(from, to, value) -- x0, x1, x2. Save into [x0],
restore from [x1], deliver x2 in x0 (which is both the return
register here and the first-arg register the trampoline reads
on a fresh context's first run). */
- ".globl " SYM(coro_switch) "\n"
- SYM(coro_switch) ":\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("x0")
RESTORE_FROM("x1")
" mov x0, x2\n"
" ret\n"
/* __cfree_coro_trampoline -- on first entry x0 = value (delivered),
- x19 = entry fn (set by coro_init), sp aligned to 16. brk if entry
+ x19 = entry fn (set by __cfree_coro_ctx_init), sp aligned to 16. brk if entry
returns. */
".globl " SYM(__cfree_coro_trampoline) "\n"
SYM(__cfree_coro_trampoline) ":\n"
diff --git a/lib/coro/arm32.c b/lib/coro/arm32.c
@@ -1,7 +1,7 @@
/*
* lib/coro/arm32.c -- ARM32 Thumb-2 (AAPCS) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* All three primitives sit on one per-target context layout:
*
@@ -20,7 +20,7 @@
* storage carved out by jmp_buf and coro_ctx.
*
* SAVE_/RESTORE_ are C string-concat macros so the same byte sequence
- * is emitted in setjmp, longjmp, and coro_switch. The VFP half is
+ * is emitted in setjmp, longjmp, and __cfree_coro_switch. The VFP half is
* gated by a C-level #ifdef on __ARM_FP -- the cpp pass picks one
* macro body before the assembler sees anything, so we can't hide
* `#ifdef` inside the asm string.
@@ -49,9 +49,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_arm32_ctx), "align
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_arm32_ctx *c = (struct __cfree_arm32_ctx *)ctx;
/* ARM32 stacks grow down; align top to 16 (AAPCS public-boundary
@@ -173,23 +173,23 @@ __asm__ (
" mov r0, r1\n"
" bx lr\n"
- /* coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
+ /* __cfree_coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
Save into [r0], restore from [r1], deliver r2 in r0. The lr
loaded by RESTORE_FROM is either a real return address (a
previously-suspended coro) or __cfree_coro_trampoline (a fresh
- coro initialized by coro_init). Either way `bx lr` lands there
+ coro initialized by __cfree_coro_ctx_init). Either way `bx lr` lands there
with r0 holding `value`. */
- ".globl " SYM(coro_switch) "\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
".thumb_func\n"
- ".type " SYM(coro_switch) ", %function\n"
- SYM(coro_switch) ":\n"
+ ".type " SYM(__cfree_coro_switch) ", %function\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("r0")
RESTORE_FROM("r1")
" mov r0, r2\n"
" bx lr\n"
/* __cfree_coro_trampoline -- on first entry r0 = value (delivered
- by coro_switch's `mov r0, r2`), r4 = entry fn (set by coro_init),
+ by __cfree_coro_switch's `mov r0, r2`), r4 = entry fn (set by __cfree_coro_ctx_init),
sp aligned to 16. udf if entry returns. */
".globl " SYM(__cfree_coro_trampoline) "\n"
".thumb_func\n"
diff --git a/lib/coro/arm32_thumb1.c b/lib/coro/arm32_thumb1.c
@@ -1,7 +1,7 @@
/*
* lib/coro/arm32_thumb1.c -- ARMv6-M (Cortex-M0 / M0+, Thumb-1) impls of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* Thumb-1 / ARMv6-M is a strict subset of the Thumb-2 ISA used by the
* sibling arm32.c, and several conveniences disappear:
@@ -21,7 +21,7 @@
* jmp_buf and coro_ctx.
*
* SAVE_INTO uses r4-r7 as scratches *after* they have themselves been
- * stored, so r0-r3 are never clobbered. That matters for coro_switch:
+ * stored, so r0-r3 are never clobbered. That matters for __cfree_coro_switch:
* `to` (r1) and `value` (r2) survive across the save half and are still
* live for the restore half / value delivery.
*/
@@ -43,9 +43,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_arm32_thumb1_ctx),"
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_arm32_thumb1_ctx *c = (struct __cfree_arm32_thumb1_ctx *)ctx;
/* ARM stacks grow down; align top to 16 (AAPCS public-boundary
@@ -144,24 +144,24 @@ __asm__ (
" movs r0, r1\n"
" bx lr\n"
- /* coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
+ /* __cfree_coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
SAVE_INTO leaves r0-r3 untouched, so r1 (to) and r2 (value) are
still live. RESTORE_FROM clobbers r4-r7 freely (they belong to
the resumed coro). The lr loaded by RESTORE_FROM is either a
real return address (a previously-suspended coro) or
- __cfree_coro_trampoline (a fresh coro initialized by coro_init);
+ __cfree_coro_trampoline (a fresh coro initialized by __cfree_coro_ctx_init);
either way `bx lr` lands there with r0 holding `value`. */
- ".globl " SYM(coro_switch) "\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
".thumb_func\n"
- ".type " SYM(coro_switch) ", %function\n"
- SYM(coro_switch) ":\n"
+ ".type " SYM(__cfree_coro_switch) ", %function\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("r0")
RESTORE_FROM("r1")
" movs r0, r2\n"
" bx lr\n"
/* __cfree_coro_trampoline -- on first entry r0 = value (delivered
- by coro_switch's `movs r0, r2`), r4 = entry fn (set by coro_init),
+ by __cfree_coro_switch's `movs r0, r2`), r4 = entry fn (set by __cfree_coro_ctx_init),
sp aligned to 16. UDF #0 (T1, ARMv6-M) traps if entry returns. */
".globl " SYM(__cfree_coro_trampoline) "\n"
".thumb_func\n"
diff --git a/lib/coro/coro.c b/lib/coro/coro.c
@@ -0,0 +1,124 @@
+/*
+ * lib/coro/coro.c -- asymmetric coroutine layer for <stdcoro.h>.
+ *
+ * Sits on top of the per-arch __cfree_coro_switch / __cfree_coro_ctx_init
+ * primitives (one of lib/coro/<arch>.c) and supplies the public
+ * coro_init / coro_resume / coro_yield / coro_self surface.
+ *
+ * Layout of coro_t.__cfree_priv:
+ *
+ * offset 0: coro_ctx ctx
+ * offset 256: coro_t *resumer
+ * offset 256 + sizeof(void*): coro_fn user_fn
+ *
+ * Total = 256 + 2 * sizeof(void*) bytes (272 LP64 / 264 ILP32). The
+ * header reserves 288 -- comfortable headroom either way. The
+ * _Static_assert below pins the fit.
+ *
+ * Resume chain. coro_resume saves the previous "current coroutine"
+ * pointer (NULL means "no coroutine; main thread") into the resumed
+ * coroutine's resumer slot, switches in, and on return flips the
+ * pointer back. coro_yield reads its own resumer slot and switches
+ * there. The result is a stack of resumers; resumes nest like calls.
+ *
+ * Per-thread scheduler state. __cfree_current and __cfree_main_ctx
+ * are _Thread_local: each thread that drives coroutines gets its own
+ * resume chain and its own "main" save slot. cfree's contract still
+ * defines __STDC_NO_THREADS__ (no <threads.h>), but _Thread_local is
+ * a C11 language feature independent of that library, so this is
+ * fine -- on hosted targets it Just Works, and bare-metal toolchains
+ * that don't link a TLS runtime fall through to per-image storage,
+ * which collapses to single-thread semantics.
+ *
+ * "Main" thread context. coro_resume needs a coro_ctx to save the
+ * caller's regs into; if the caller is itself a coroutine we use its
+ * ctx, otherwise the per-thread __cfree_main_ctx. The "main" slot is
+ * only ever touched on the resume/yield boundary -- it lives outside
+ * any coroutine's lifecycle.
+ */
+
+#include <stdcoro.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+ coro_ctx ctx;
+ coro_t *resumer;
+ coro_fn user_fn;
+} __cfree_coro_priv_t;
+
+_Static_assert(sizeof(__cfree_coro_priv_t) <= sizeof(((coro_t *)0)->__cfree_priv),
+ "priv blob fits in coro_t reservation");
+_Static_assert(_Alignof(__cfree_coro_priv_t) <= _Alignof(coro_t),
+ "priv blob alignment fits coro_t");
+
+/* Per-arch primitives (declared here, defined in lib/coro/<arch>.c). */
+extern uintptr_t __cfree_coro_switch(coro_ctx *from, coro_ctx *to, uintptr_t value);
+extern void __cfree_coro_ctx_init(coro_ctx *ctx,
+ void *stack_base, size_t stack_len,
+ void (*entry)(uintptr_t));
+
+/* Per-thread scheduler state. */
+static _Thread_local coro_t *__cfree_current = NULL;
+static _Thread_local coro_ctx __cfree_main_ctx;
+
+static inline __cfree_coro_priv_t *__priv(coro_t *c) {
+ return (__cfree_coro_priv_t *)c->__cfree_priv;
+}
+static inline coro_ctx *__ctx_of(coro_t *c) {
+ return &__priv(c)->ctx;
+}
+static inline coro_ctx *__resumer_ctx(coro_t *c) {
+ coro_t *r = __priv(c)->resumer;
+ return r ? __ctx_of(r) : &__cfree_main_ctx;
+}
+
+/* Trampoline-side thunk. Each per-arch trampoline calls this with the
+ uintptr_t delivered by the first __cfree_coro_switch into the fresh
+ context. The thunk dispatches to user_fn, then performs the
+ "DEAD + switch back to resumer" handoff so the symmetric primitive
+ doesn't need to know about coro_t lifecycle. */
+static void __cfree_coro_thunk(uintptr_t value) {
+ coro_t *self = __cfree_current;
+ uintptr_t retval = __priv(self)->user_fn(value);
+
+ self->status = CORO_DEAD;
+ __cfree_coro_switch(__ctx_of(self), __resumer_ctx(self), retval);
+ __builtin_unreachable();
+}
+
+void coro_init(coro_t *c, coro_fn fn, void *stack_base, size_t stack_len) {
+ __cfree_coro_priv_t *p = __priv(c);
+ c->status = CORO_INIT;
+ p->resumer = NULL;
+ p->user_fn = fn;
+ __cfree_coro_ctx_init(&p->ctx, stack_base, stack_len, __cfree_coro_thunk);
+}
+
+coro_result_t coro_resume(coro_t *c, uintptr_t value) {
+ coro_t *prev = __cfree_current;
+ coro_ctx *prev_ctx = prev ? __ctx_of(prev) : &__cfree_main_ctx;
+
+ __priv(c)->resumer = prev;
+ __cfree_current = c;
+ c->status = CORO_RUNNING;
+
+ uintptr_t v = __cfree_coro_switch(prev_ctx, __ctx_of(c), value);
+
+ /* c either yielded (status set to CORO_SUSPENDED by coro_yield)
+ or finished (status set to CORO_DEAD by the thunk). */
+ __cfree_current = prev;
+ return (coro_result_t){ .value = v, .status = c->status };
+}
+
+uintptr_t coro_yield(uintptr_t value) {
+ coro_t *self = __cfree_current;
+ self->status = CORO_SUSPENDED;
+ /* When the resumer next coro_resumes us, it sets status back to
+ CORO_RUNNING before the switch -- so our caller sees RUNNING. */
+ return __cfree_coro_switch(__ctx_of(self), __resumer_ctx(self), value);
+}
+
+coro_t *coro_self(void) {
+ return __cfree_current;
+}
diff --git a/lib/coro/i386.c b/lib/coro/i386.c
@@ -1,7 +1,7 @@
/*
* lib/coro/i386.c -- i386 System V (cdecl, ILP32) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* cdecl callee-saved set: ebx, esi, edi, ebp, esp. Args are pushed
* right-to-left on the stack: at function entry, 4(%esp)=arg0,
@@ -20,11 +20,11 @@
*
* setjmp(env) 4(%esp)=env
* longjmp(env, val) 4(%esp)=env, 8(%esp)=val
- * coro_switch(f, t, val) 4(%esp)=from, 8(%esp)=to, 12(%esp)=value
+ * __cfree_coro_switch(f, t, val) 4(%esp)=from, 8(%esp)=to, 12(%esp)=value
*
* The "save esp/eip" trick: at function entry, (%esp) holds the caller's
* return address (just pushed by `call`); 4(%esp) is the caller's
- * pre-call esp. Saving those two lets longjmp/coro_switch "land" at the
+ * pre-call esp. Saving those two lets longjmp/__cfree_coro_switch "land" at the
* call site exactly as if the function had returned.
*
* Modern SysV i386 (ABI rev 1.1+) requires 16-byte stack alignment
@@ -49,9 +49,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_i386_ctx), "align c
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_i386_ctx *c = (struct __cfree_i386_ctx *)ctx;
/* i386 stacks grow down; align top to 16. */
@@ -117,10 +117,10 @@ __asm__ (
RESTORE_FROM("%edx")
" jmp *%ecx\n"
- /* coro_switch(from, to, value) -- 4(%esp)=from, 8(%esp)=to, 12(%esp)=value.
+ /* __cfree_coro_switch(from, to, value) -- 4(%esp)=from, 8(%esp)=to, 12(%esp)=value.
Read all three args before SAVE_INTO clobbers the stack frame. */
- ".globl " SYM(coro_switch) "\n"
- SYM(coro_switch) ":\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ SYM(__cfree_coro_switch) ":\n"
" movl 4(%esp), %edx\n" /* from */
SAVE_INTO("%edx")
" movl 8(%esp), %edx\n" /* to (re-read; SAVE clobbered %eax not stack) */
@@ -129,7 +129,7 @@ __asm__ (
" jmp *%ecx\n"
/* __cfree_coro_trampoline -- on first entry: %eax=value, %ebx=entry,
- %esp=stack_top (no return addr pushed -- coro_switch reaches here
+ %esp=stack_top (no return addr pushed -- __cfree_coro_switch reaches here
via jmp). cdecl needs the arg pushed; align defensively, then
reserve 12 bytes + push value so that after the upcoming `call`
pushes the 4-byte return addr, the callee sees %esp+4 16-aligned. */
diff --git a/lib/coro/riscv32.c b/lib/coro/riscv32.c
@@ -1,7 +1,7 @@
/*
* lib/coro/riscv32.c -- RISC-V 32-bit (ILP32/ILP32F/ILP32D) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* Per-target context layout (matches xOS rv32 tick_coro_ctx):
*
@@ -22,7 +22,7 @@
* and coro_ctx.
*
* SAVE_/RESTORE_ are C string-concat macros so the same byte sequence
- * is emitted in setjmp, longjmp, and coro_switch without duplication.
+ * is emitted in setjmp, longjmp, and __cfree_coro_switch without duplication.
*/
#include <setjmp.h>
@@ -44,9 +44,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_riscv32_ctx), "alig
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_riscv32_ctx *c = (struct __cfree_riscv32_ctx *)ctx;
/* RISC-V stacks grow down; align top to 16. */
@@ -192,21 +192,21 @@ __asm__ (
" ret\n"
".size " SYM(longjmp) ", .-" SYM(longjmp) "\n"
- /* coro_switch(from, to, value) -- a0=from, a1=to, a2=value.
+ /* __cfree_coro_switch(from, to, value) -- a0=from, a1=to, a2=value.
Save into [a0], restore from [a1], deliver a2 in a0 (which is
both the return register and the trampoline's first-arg reg
on a fresh context's first run). */
- ".globl " SYM(coro_switch) "\n"
- ".type " SYM(coro_switch) ", @function\n"
- SYM(coro_switch) ":\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ ".type " SYM(__cfree_coro_switch) ", @function\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("a0")
RESTORE_FROM("a1")
" mv a0, a2\n"
" ret\n"
- ".size " SYM(coro_switch) ", .-" SYM(coro_switch) "\n"
+ ".size " SYM(__cfree_coro_switch) ", .-" SYM(__cfree_coro_switch) "\n"
/* __cfree_coro_trampoline -- on first entry: a0=value (delivered
- by coro_switch's `mv a0, a2`), s0=entry (set by coro_init via
+ by __cfree_coro_switch's `mv a0, a2`), s0=entry (set by __cfree_coro_ctx_init via
regs[2]), sp=stack_top. ebreak if entry returns. */
".globl " SYM(__cfree_coro_trampoline) "\n"
".type " SYM(__cfree_coro_trampoline) ", @function\n"
diff --git a/lib/coro/riscv64.c b/lib/coro/riscv64.c
@@ -1,10 +1,10 @@
/*
* lib/coro/riscv64.c -- RISC-V 64-bit (LP64D) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* RISC-V LP64D callee-saved set:
- * ra (x1) -- saved manually so longjmp/coro_switch can
+ * ra (x1) -- saved manually so longjmp/__cfree_coro_switch can
* "return" to the original call site
* sp (x2)
* s0-s11 (x8-x9, x18-x27)
@@ -22,17 +22,17 @@
*
* setjmp(env) a0=env
* longjmp(env, val) a0=env, a1=val
- * coro_switch(f, t, val) a0=from, a1=to, a2=val
+ * __cfree_coro_switch(f, t, val) a0=from, a1=to, a2=val
*
* Value-passing trick: the destination context "returns" via
* ld ra, 0(a1); ... ret
* where `ret` is `jalr x0, 0(ra)`. By moving the value into a0 just
* before `ret`, both a fresh trampoline (entry(value)) and a previously
- * suspended coro_switch (= the value its switch call returned) see it
+ * suspended __cfree_coro_switch (= the value its switch call returned) see it
* as the a0 return register.
*
* SAVE_/RESTORE_ are C string-concat macros so the same byte sequence
- * is emitted in setjmp, longjmp, and coro_switch without duplication.
+ * is emitted in setjmp, longjmp, and __cfree_coro_switch without duplication.
*
* Symbol naming uses __USER_LABEL_PREFIX__ so labels match the C
* compiler's call-site mangling (empty on RISC-V ELF).
@@ -57,9 +57,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_riscv64_ctx), "alig
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_riscv64_ctx *c = (struct __cfree_riscv64_ctx *)ctx;
/* RISC-V stacks grow down; align top to 16. */
@@ -166,21 +166,21 @@ __asm__ (
" ret\n"
".size " SYM(longjmp) ", .-" SYM(longjmp) "\n"
- /* coro_switch(from, to, value) -- a0=from, a1=to, a2=value.
+ /* __cfree_coro_switch(from, to, value) -- a0=from, a1=to, a2=value.
Save into [a0], restore from [a1] (which clobbers a0 and a1's
roles -- ra/sp/s* are loaded from the to-context), then deliver
value in a0 just before ret. */
- ".globl " SYM(coro_switch) "\n"
- ".type " SYM(coro_switch) ", @function\n"
- SYM(coro_switch) ":\n"
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ ".type " SYM(__cfree_coro_switch) ", @function\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("a0")
RESTORE_FROM("a1")
" mv a0, a2\n"
" ret\n"
- ".size " SYM(coro_switch) ", .-" SYM(coro_switch) "\n"
+ ".size " SYM(__cfree_coro_switch) ", .-" SYM(__cfree_coro_switch) "\n"
/* __cfree_coro_trampoline -- on first entry: a0=value (delivered),
- s0=entry fn (set by coro_init), sp aligned to 16. ebreak if entry
+ s0=entry fn (set by __cfree_coro_ctx_init), sp aligned to 16. ebreak if entry
returns. */
".globl " SYM(__cfree_coro_trampoline) "\n"
".type " SYM(__cfree_coro_trampoline) ", @function\n"
diff --git a/lib/coro/x86_64.c b/lib/coro/x86_64.c
@@ -1,7 +1,7 @@
/*
* lib/coro/x86_64.c -- x86_64 System V ABI implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* Callee-saved set on SysV: rbx, rbp, r12-r15. (No callee-saved xmm
* regs -- those are MS-ABI specific; see x86_64_win.c.)
@@ -15,11 +15,11 @@
*
* setjmp(env) %rdi=env
* longjmp(env, val) %rdi=env, %esi=val
- * coro_switch(f, t, val) %rdi=from, %rsi=to, %rdx=val
+ * __cfree_coro_switch(f, t, val) %rdi=from, %rsi=to, %rdx=val
*
* The "save rsp/rip" trick: at function entry, (%rsp) holds the
* caller's return address (just pushed by `call`); 8(%rsp) is the
- * caller's pre-call rsp. Saving those two lets longjmp/coro_switch
+ * caller's pre-call rsp. Saving those two lets longjmp/__cfree_coro_switch
* "land" at the call site exactly as if the function had returned.
*/
@@ -40,9 +40,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_x86_64_ctx), "align
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_x86_64_ctx *c = (struct __cfree_x86_64_ctx *)ctx;
/* x86_64 stacks grow down; align top to 16. */
@@ -110,16 +110,16 @@ __asm__ (
RESTORE_FROM("%rdi")
" jmpq *%rcx\n"
- /* coro_switch(from, to, value) -- from=%rdi, to=%rsi, value=%rdx. */
- ".globl " SYM(coro_switch) "\n"
- SYM(coro_switch) ":\n"
+ /* __cfree_coro_switch(from, to, value) -- from=%rdi, to=%rsi, value=%rdx. */
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("%rdi")
" movq %rdx, %rax\n" /* deliver value as return reg */
RESTORE_FROM("%rsi")
" jmpq *%rcx\n"
/* __cfree_coro_trampoline -- on first entry: %rax=value,
- %r13=entry, %rsp=stack_top (no return addr pushed -- coro_switch
+ %r13=entry, %rsp=stack_top (no return addr pushed -- __cfree_coro_switch
reaches here via jmp). System V wants %rsp+8 ≡ 16 (mod 16) at
function entry; the andq below makes that hold defensively. */
".globl " SYM(__cfree_coro_trampoline) "\n"
diff --git a/lib/coro/x86_64_win.c b/lib/coro/x86_64_win.c
@@ -1,7 +1,7 @@
/*
* lib/coro/x86_64_win.c -- x86_64 Windows (MS x64 ABI) implementations of
* setjmp / longjmp (<setjmp.h>)
- * coro_init / coro_switch / trampoline (<stdcoro.h>)
+ * __cfree_coro_ctx_init / __cfree_coro_switch / trampoline (<stdcoro.h>)
*
* MS x64 callee-saved set: rbx, rbp, rdi, rsi, r12-r15, xmm6-xmm15.
* (Compare with x86_64.c -- SysV doesn't preserve rdi/rsi or any xmm.)
@@ -21,7 +21,7 @@
*
* setjmp(env) %rcx=env
* longjmp(env, val) %rcx=env, %edx=val
- * coro_switch(f, t, val) %rcx=from, %rdx=to, %r8=value
+ * __cfree_coro_switch(f, t, val) %rcx=from, %rdx=to, %r8=value
*
* The "save rsp/rip" trick mirrors x86_64.c: at function entry,
* (%rsp) holds the caller's return address, 8(%rsp) is the caller's
@@ -47,9 +47,9 @@ _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __cfree_x86_64_win_ctx), "a
extern void __cfree_coro_trampoline(void);
-void coro_init(coro_ctx *ctx,
+void __cfree_coro_ctx_init(coro_ctx *ctx,
void *stack_base, size_t stack_len,
- coro_entry_fn entry) {
+ void (*entry)(uintptr_t)) {
struct __cfree_x86_64_win_ctx *c = (struct __cfree_x86_64_win_ctx *)ctx;
/* x86_64 stacks grow down; align top to 16. */
@@ -153,17 +153,17 @@ __asm__ (
RESTORE_FROM("%rcx")
" jmpq *%r10\n"
- /* coro_switch(from, to, value) -- from=%rcx, to=%rdx, value=%r8. */
- ".globl " SYM(coro_switch) "\n"
- SYM(coro_switch) ":\n"
+ /* __cfree_coro_switch(from, to, value) -- from=%rcx, to=%rdx, value=%r8. */
+ ".globl " SYM(__cfree_coro_switch) "\n"
+ SYM(__cfree_coro_switch) ":\n"
SAVE_INTO("%rcx")
" movq %r8, %rax\n" /* deliver value as return reg */
RESTORE_FROM("%rdx")
" jmpq *%r10\n"
/* __cfree_coro_trampoline -- on first entry: %rax=value (delivered
- by coro_switch), %r12=entry (set by coro_init), %rsp=stack_top
- (no return addr pushed -- coro_switch reaches here via jmp). MS
+ by __cfree_coro_switch), %r12=entry (set by __cfree_coro_ctx_init), %rsp=stack_top
+ (no return addr pushed -- __cfree_coro_switch reaches here via jmp). MS
x64 wants %rsp 16-byte aligned at call sites with 32 bytes of
shadow space reserved by the caller. */
".globl " SYM(__cfree_coro_trampoline) "\n"
diff --git a/test/smoke.c b/test/smoke.c
@@ -145,17 +145,25 @@ static int cfree_setjmp_compiles(int x) {
return 0;
}
-/* stdcoro: coro_ctx storage exists, the API surface compiles and
- resolves; same compile-only caveat as setjmp. */
+/* stdcoro: coro_ctx and coro_t storage exists; the asymmetric API
+ surface compiles and resolves. Compile-only -- smoke.c never links
+ against a libcfree_rt. */
_Static_assert(sizeof(coro_ctx) >= 64, "coro_ctx room for regs");
_Static_assert(_Alignof(coro_ctx) >= 16, "coro_ctx 16-byte aligned");
+_Static_assert(sizeof(coro_t) >= sizeof(coro_ctx) + 2 * sizeof(void *),
+ "coro_t room for ctx + resumer + user_fn");
+_Static_assert(_Alignof(coro_t) >= 16, "coro_t 16-byte aligned");
_Static_assert(CORO_STACK_ALIGN >= 8, "stack align reasonable");
-static coro_ctx cfree_co_a, cfree_co_b;
+_Static_assert(CORO_INIT != CORO_DEAD, "status enum distinct");
+
+static coro_t cfree_co;
static _Alignas(16) unsigned char cfree_co_stack[4096];
-static void cfree_co_entry(uintptr_t v) { (void)v; for (;;) {} }
+static uintptr_t cfree_co_body(uintptr_t v) { return coro_yield(v + 1); }
static uintptr_t cfree_coro_compiles(void) {
- coro_init(&cfree_co_b, cfree_co_stack, sizeof(cfree_co_stack), cfree_co_entry);
- return coro_switch(&cfree_co_a, &cfree_co_b, 0xC0FFEEu);
+ coro_init(&cfree_co, cfree_co_body, cfree_co_stack, sizeof(cfree_co_stack));
+ coro_result_t r = coro_resume(&cfree_co, 0xC0FFEEu);
+ coro_t *me = coro_self();
+ return r.value + (uintptr_t)me + (uintptr_t)coro_status(&cfree_co);
}
/* stdatomic: types, memory_order, lock-free macros, plus a runtime