From 01b4e5dfdc100ea3fadf2cd05e82ad85be8758e1 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 12:44:47 +0100
Subject: [PATCH 01/14] Use loop from sniffio

---
 wgpu/_async.py | 111 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 75 insertions(+), 36 deletions(-)
diff --git a/wgpu/_async.py b/wgpu/_async.py
index 81e08f29..b47646dd 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -13,6 +13,33 @@
 logger = logging.getLogger("wgpu")
 
 
+def get_call_soon_threadsafe():
+    """Get the call_soon_threadsafe() function for the currently running event loop.
+
+    Sniffio is used for this, and it supports asyncio, trio, and rendercanvas.utils.asyncadapter.
+    If this function returns None, it means that the GPUPromise will not support ``await`` and ``.then()``.
+
+    It's relatively easy to register a custom loop to sniffio so that this code works on it.
+    """
+
+    try:
+        name = sniffio.current_async_library()
+    except sniffio.AsyncLibraryNotFoundError:
+        return None
+
+    if name == "trio":
+        trio = sys.modules[name]
+        token = trio.lowlevel.current_trio_token()
+        return token.run_sync_soon
+    else:  # asyncio, rendercanvas.utils.asyncadapter, and easy to mimic for custom loops
+        try:
+            mod = sys.modules[name]
+            loop = mod.get_running_loop()
+            return loop.call_soon_threadsafe
+        except Exception:
+            return None
+
+
 # The async_sleep and AsyncEvent are a copy of the implementation in rendercanvas.asyncs
 
 
@@ -45,14 +72,14 @@ def call_soon(self, callback: Callable, *args: object):
         raise NotImplementedError()
 
 
-def get_backoff_time_generator() -> Generator[float, None, None]:
-    """Generates sleep-times, start at 0 then increasing to 100Hz and sticking there."""
-    for _ in range(5):
-        yield 0
-    for i in range(1, 20):
-        yield i / 2000.0  # ramp up from 0ms to 10ms
-    while True:
-        yield 0.01
+# def get_backoff_time_generator() -> Generator[float, None, None]:
+#     """Generates sleep-times, start at 0 then increasing to 100Hz and sticking there."""
+#     for _ in range(5):
+#         yield 0
+#     for i in range(1, 20):
+#         yield i / 2000.0  # ramp up from 0ms to 10ms
+#     while True:
+#         yield 0.01
 
 
 class GPUPromise(Awaitable[AwaitedType], Generic[AwaitedType]):
@@ -88,24 +115,21 @@ def __init__(
         title: str,
         handler: Callable | None,
         *,
-        loop: LoopInterface | None = None,
+        loop=None,
         keepalive: object = None,
+        _call_soon_threadsafe: Callable | None = None,  # passed internally
     ):
         """
         Arguments:
             title (str): The title of this promise, mostly for debugging purposes.
             handler (callable, optional): The function to turn promise input into the result. If None,
                 the result will simply be the input.
-            loop (LoopInterface, optional): A loop object that at least has a ``call_soon()`` method.
-                If not given, this promise does not support .then() or promise-chaining.
             keepalive (object, optional): Pass any data via this arg who's lifetime must be bound to the
                 resolving of this promise.
 
         """
         self._title = str(title)  # title for debugging
         self._handler = handler  # function to turn input into the result
-
-        self._loop = loop  # Event loop instance, can be None
         self._keepalive = keepalive  # just to keep something alive
 
         self._state = "pending"  # "pending", "pending-rejected", "pending-fulfilled", "rejected", "fulfilled"
@@ -117,6 +141,8 @@ def __init__(
         self._error_callbacks = []
         self._UNRESOLVED.add(self)
 
+        self._call_soon_threadsafe = _call_soon_threadsafe or get_call_soon_threadsafe()
+
     def __repr__(self):
         return f"<GPUPromise '{self._title}' {self._state} at {hex(id(self))}>"
 
@@ -139,8 +165,10 @@ def _set_input(self, result: object, *, resolve_now=True) -> None:
 
         # If the input is a promise, we need to wait for it, i.e. chain to self.
         if isinstance(result, GPUPromise):
-            if self._loop is None:
-                self._set_error("Cannot chain GPUPromise if the loop is not set.")
+            if self._call_soon_threadsafe is None:
+                self._set_error(
+                    "Cannot chain GPUPromise because no running loop could be detected."
+                )
             else:
                 result._chain(self)
             return
@@ -188,8 +216,8 @@ def _set_pending_resolved(self, *, resolve_now=False):
             self._resolve_callback()
             if self._async_event is not None:
                 self._async_event.set()
-        elif self._loop is not None:
-            self._loop.call_soon_threadsafe(self._resolve_callback)
+        elif self._call_soon_threadsafe is not None:
+            self._call_soon_threadsafe(self._resolve_callback)
 
     def _resolve_callback(self):
         # This should only be called in the main/reference thread.
@@ -218,11 +246,11 @@ def _resolve(self):
         if self._state.endswith("rejected"):
             error = self._value
             for cb in self._error_callbacks:
-                self._loop.call_soon_threadsafe(cb, error)
+                self._call_soon_threadsafe(cb, error)
         elif self._state.endswith("fulfilled"):
             result = self._value
             for cb in self._done_callbacks:
-                self._loop.call_soon_threadsafe(cb, result)
+                self._call_soon_threadsafe(cb, result)
         # New state
         self._state = self._state.replace("pending-", "")
         # Clean up
@@ -253,7 +281,7 @@ def sync_wait(self) -> AwaitedType:
 
     def _sync_wait(self):
         # Each subclass may implement this in its own way. E.g. it may wait for
-        # the _thread_event, it may poll the device in a loop while checking the
+        # the _thread_event, it may poll the device in a while-loop while checking the
         # status, and Pyodide may use its special logic to sync wait the JS
         # promise.
         raise NotImplementedError()
@@ -275,8 +303,10 @@ def then(
 
         The callback will receive one argument: the result of the promise.
         """
-        if self._loop is None:
-            raise RuntimeError("Cannot use GPUPromise.then() if the loop is not set.")
+        if self._call_soon_threadsafe is None:
+            raise RuntimeError(
+                "Cannot use GPUPromise.then() because no running loop could be detected."
+            )
         if not callable(callback):
             raise TypeError(
                 f"GPUPromise.then() got a callback that is not callable: {callback!r}"
@@ -293,7 +323,9 @@ def then(
             title = self._title + " -> " + callback_name
 
         # Create new promise
-        new_promise = self.__class__(title, callback, loop=self._loop)
+        new_promise = self.__class__(
+            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
+        )
         self._chain(new_promise)
 
         if error_callback is not None:
@@ -306,8 +338,10 @@ def catch(self, callback: Callable[[Exception], None] | None):
 
         The callback will receive one argument: the error object.
         """
-        if self._loop is None:
-            raise RuntimeError("Cannot use GPUPromise.catch() if the loop is not set.")
+        if self._call_soon_threadsafe is None:
+            raise RuntimeError(
+                "Cannot use GPUPromise.catch() because not running loop could be detected."
+            )
         if not callable(callback):
             raise TypeError(
                 f"GPUPromise.catch() got a callback that is not callable: {callback!r}"
@@ -317,7 +351,9 @@ def catch(self, callback: Callable[[Exception], None] | None):
         title = "Catcher for " + self._title
 
         # Create new promise
-        new_promise = self.__class__(title, callback, loop=self._loop)
+        new_promise = self.__class__(
+            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
+        )
 
         # Custom chain
         with self._lock:
@@ -328,16 +364,19 @@ def catch(self, callback: Callable[[Exception], None] | None):
         return new_promise
 
     def __await__(self):
-        if self._loop is None:
-            # An async busy loop
-            async def awaiter():
-                if self._state == "pending":
-                    # Do small incremental async naps. Other tasks and threads can run.
-                    # Note that async sleep, with sleep_time > 0, is inaccurate on Windows.
-                    sleep_gen = get_backoff_time_generator()
-                    while self._state == "pending":
-                        await async_sleep(next(sleep_gen))
-                return self._resolve()
+        if self._call_soon_threadsafe is None:
+            raise RuntimeError(
+                "Cannot await GPUPromise because no running loop could be detected."
+            )
+            # # An async busy loop
+            # async def awaiter():
+            #     if self._state == "pending":
+            #         # Do small incremental async naps. Other tasks and threads can run.
+            #         # Note that async sleep, with sleep_time > 0, is inaccurate on Windows.
+            #         sleep_gen = get_backoff_time_generator()
+            #         while self._state == "pending":
+            #             await async_sleep(next(sleep_gen))
+            #     return self._resolve()
 
         else:
             # Using an async Event.

From c7338c61ba9c79df7543a4bba027afdb67eb397f Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 13:34:38 +0100
Subject: [PATCH 02/14] tweak

---
 wgpu/_async.py | 54 +++++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 31 deletions(-)

diff --git a/wgpu/_async.py b/wgpu/_async.py
index b47646dd..d45f0458 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -13,8 +13,13 @@
 logger = logging.getLogger("wgpu")
 
 
-def get_call_soon_threadsafe():
-    """Get the call_soon_threadsafe() function for the currently running event loop.
+class StubLoop:
+    def __init__(self, call_soon_threadsafe):
+        self.call_soon_threadsafe = call_soon_threadsafe
+
+
+def get_running_loop():
+    """Get an object with a call_soon_threadsafe() method.
 
     Sniffio is used for this, and it supports asyncio, trio, and rendercanvas.utils.asyncadapter.
     If this function returns None, it means that the GPUPromise will not support ``await`` and ``.then()``.
@@ -30,12 +35,13 @@ def get_call_soon_threadsafe():
     if name == "trio":
         trio = sys.modules[name]
         token = trio.lowlevel.current_trio_token()
-        return token.run_sync_soon
+        return StubLoop(token.run_sync_soon)
     else:  # asyncio, rendercanvas.utils.asyncadapter, and easy to mimic for custom loops
         try:
             mod = sys.modules[name]
             loop = mod.get_running_loop()
-            return loop.call_soon_threadsafe
+            loop.call_soon_threadsafe  # noqa: access to make sure it exists
+            return loop
         except Exception:
             return None
 
@@ -62,16 +68,6 @@ def __new__(cls):
 AwaitedType = TypeVar("AwaitedType")
 
 
-class LoopInterface:
-    """A loop object must have (at least) this API.
-
-    Rendercanvas loop objects do, asyncio.loop does too.
-    """
-
-    def call_soon(self, callback: Callable, *args: object):
-        raise NotImplementedError()
-
-
 # def get_backoff_time_generator() -> Generator[float, None, None]:
 #     """Generates sleep-times, start at 0 then increasing to 100Hz and sticking there."""
 #     for _ in range(5):
@@ -115,9 +111,8 @@ def __init__(
         title: str,
         handler: Callable | None,
         *,
-        loop=None,
         keepalive: object = None,
-        _call_soon_threadsafe: Callable | None = None,  # passed internally
+        _loop: object = None,  # for testing and chaining
     ):
         """
         Arguments:
@@ -141,7 +136,8 @@ def __init__(
         self._error_callbacks = []
         self._UNRESOLVED.add(self)
 
-        self._call_soon_threadsafe = _call_soon_threadsafe or get_call_soon_threadsafe()
+        # we only care about call_soon_threadsafe, but clearer to just have a loop object
+        self._loop = _loop or get_running_loop()
 
     def __repr__(self):
         return f"<GPUPromise '{self._title}' {self._state} at {hex(id(self))}>"
@@ -165,7 +161,7 @@ def _set_input(self, result: object, *, resolve_now=True) -> None:
 
         # If the input is a promise, we need to wait for it, i.e. chain to self.
         if isinstance(result, GPUPromise):
-            if self._call_soon_threadsafe is None:
+            if self._loop is None:
                 self._set_error(
                     "Cannot chain GPUPromise because no running loop could be detected."
                 )
@@ -216,8 +212,8 @@ def _set_pending_resolved(self, *, resolve_now=False):
             self._resolve_callback()
             if self._async_event is not None:
                 self._async_event.set()
-        elif self._call_soon_threadsafe is not None:
-            self._call_soon_threadsafe(self._resolve_callback)
+        elif self._loop is not None:
+            self._loop.call_soon_threadsafe(self._resolve_callback)
 
     def _resolve_callback(self):
         # This should only be called in the main/reference thread.
@@ -246,11 +242,11 @@ def _resolve(self):
         if self._state.endswith("rejected"):
             error = self._value
             for cb in self._error_callbacks:
-                self._call_soon_threadsafe(cb, error)
+                self._loop.call_soon_threadsafe(cb, error)
         elif self._state.endswith("fulfilled"):
             result = self._value
             for cb in self._done_callbacks:
-                self._call_soon_threadsafe(cb, result)
+                self._loop.call_soon_threadsafe(cb, result)
         # New state
         self._state = self._state.replace("pending-", "")
         # Clean up
@@ -303,7 +299,7 @@ def then(
 
         The callback will receive one argument: the result of the promise.
         """
-        if self._call_soon_threadsafe is None:
+        if self._loop is None:
             raise RuntimeError(
                 "Cannot use GPUPromise.then() because no running loop could be detected."
             )
@@ -323,9 +319,7 @@ def then(
             title = self._title + " -> " + callback_name
 
         # Create new promise
-        new_promise = self.__class__(
-            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
-        )
+        new_promise = self.__class__(title, callback, _loop=self._loop)
         self._chain(new_promise)
 
         if error_callback is not None:
@@ -338,7 +332,7 @@ def catch(self, callback: Callable[[Exception], None] | None):
 
         The callback will receive one argument: the error object.
         """
-        if self._call_soon_threadsafe is None:
+        if self._loop is None:
             raise RuntimeError(
                 "Cannot use GPUPromise.catch() because not running loop could be detected."
             )
@@ -351,9 +345,7 @@ def catch(self, callback: Callable[[Exception], None] | None):
         title = "Catcher for " + self._title
 
         # Create new promise
-        new_promise = self.__class__(
-            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
-        )
+        new_promise = self.__class__(title, callback, _loop=self._loop)
 
         # Custom chain
         with self._lock:
@@ -364,7 +356,7 @@ def catch(self, callback: Callable[[Exception], None] | None):
         return new_promise
 
     def __await__(self):
-        if self._call_soon_threadsafe is None:
+        if self._loop is None:
             raise RuntimeError(
                 "Cannot await GPUPromise because no running loop could be detected."
             )

From 6ae18e391fec7ed7b84322c662b44e93df539b58 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 13:35:05 +0100
Subject: [PATCH 03/14] Remove loop logic from main api code

---
 tests/test_api.py                 |  2 +-
 wgpu/_classes.py                  | 16 ++-----
 wgpu/backends/wgpu_native/_api.py | 72 +++++++++----------------------
 3 files changed, 25 insertions(+), 65 deletions(-)

diff --git a/tests/test_api.py b/tests/test_api.py
index cd572ccf..0c32e6b9 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -100,7 +100,7 @@ def test_enums_and_flags_and_structs():
 
 def test_base_wgpu_api():
     # Fake a device and an adapter
-    adapter = wgpu.GPUAdapter(None, set(), {}, wgpu.GPUAdapterInfo({}), None)
+    adapter = wgpu.GPUAdapter(None, set(), {}, wgpu.GPUAdapterInfo({}))
     queue = wgpu.GPUQueue("", None, None)
     device = wgpu.GPUDevice("device08", -1, adapter, {42, 43}, {}, queue)
 
diff --git a/wgpu/_classes.py b/wgpu/_classes.py
index e1b41259..83e263a9 100644
--- a/wgpu/_classes.py
+++ b/wgpu/_classes.py
@@ -14,7 +14,7 @@
 import logging
 from typing import Sequence
 
-from ._async import GPUPromise as BaseGPUPromise, LoopInterface
+from ._async import GPUPromise as BaseGPUPromise
 from ._coreutils import ApiDiff, str_flag_to_int, ArrayLike, CanvasLike
 from ._diagnostics import diagnostics, texture_format_to_bpp
 from . import flags, enums, structs
@@ -119,7 +119,6 @@ def request_adapter_async(
         power_preference: enums.PowerPreferenceEnum | None = None,
         force_fallback_adapter: bool = False,
         canvas: CanvasLike | None = None,
-        loop: LoopInterface | None = None,
     ) -> GPUPromise[GPUAdapter]:
         """Create a `GPUAdapter`, the object that represents an abstract wgpu
         implementation, from which one can request a `GPUDevice`.
@@ -132,8 +131,6 @@ def request_adapter_async(
                 fallback adapter.
             canvas : The canvas or context that the adapter should be able to render to. This can typically
                  be left to None. If given, it must be a ``GPUCanvasContext`` or ``RenderCanvas``.
-            loop : the loop object for async support. Must have at least ``call_soon(f, *args)``.
-                The loop object is required for asynchrouns use with ``promise.then()``. EXPERIMENTAL.
         """
         # If this method gets called, no backend has been loaded yet, let's do that now!
         from .backends.auto import gpu
@@ -145,7 +142,6 @@ def request_adapter_async(
             power_preference=power_preference,
             force_fallback_adapter=force_fallback_adapter,
             canvas=canvas,
-            loop=loop,
         )
 
     @apidiff.add("Method useful for multi-gpu environments")
@@ -158,9 +154,7 @@ def enumerate_adapters_sync(self) -> list[GPUAdapter]:
         return promise.sync_wait()
 
     @apidiff.add("Method useful for multi-gpu environments")
-    def enumerate_adapters_async(
-        self, *, loop: LoopInterface | None = None
-    ) -> GPUPromise[list[GPUAdapter]]:
+    def enumerate_adapters_async(self) -> GPUPromise[list[GPUAdapter]]:
         """Get a list of adapter objects available on the current system.
 
         An adapter can then be selected (e.g. using its summary), and a device
@@ -187,7 +181,7 @@ def enumerate_adapters_async(
         # If this method gets called, no backend has been loaded yet, let's do that now!
         from .backends.auto import gpu
 
-        return gpu.enumerate_adapters_async(loop=loop)
+        return gpu.enumerate_adapters_async()
 
     # IDL: GPUTextureFormat getPreferredCanvasFormat();
     @apidiff.change("Disabled because we put it on the canvas context")
@@ -544,10 +538,9 @@ class GPUAdapter:
 
     _ot = object_tracker
 
-    def __init__(self, internal, features, limits, adapter_info, loop):
+    def __init__(self, internal, features, limits, adapter_info):
         self._ot.increase(self.__class__.__name__)
         self._internal = internal
-        self._loop = loop
 
         assert isinstance(features, set)
         assert isinstance(limits, dict)
@@ -693,7 +686,6 @@ def __init__(self, label, internal, adapter, features, limits, queue):
         self._adapter = adapter
         self._features = features
         self._limits = limits
-        self._loop = adapter._loop
         self._queue = queue
         queue._device = self  # because it could not be set earlier
 
diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
index 9202f6d0..c17c39ae 100644
--- a/wgpu/backends/wgpu_native/_api.py
+++ b/wgpu/backends/wgpu_native/_api.py
@@ -23,7 +23,6 @@
 from weakref import WeakKeyDictionary
 from typing import NoReturn, Sequence
 
-from ..._async import LoopInterface
 from ..._coreutils import str_flag_to_int, ArrayLike, CanvasLike
 from ... import classes, flags, enums, structs
 
@@ -462,7 +461,6 @@ def request_adapter_async(
         power_preference: enums.PowerPreferenceEnum | None = None,
         force_fallback_adapter: bool = False,
         canvas: CanvasLike | None = None,
-        loop: LoopInterface | None = None,
     ) -> GPUPromise[GPUAdapter]:
         """Create a `GPUAdapter`, the object that represents an abstract wgpu
         implementation, from which one can request a `GPUDevice`.
@@ -486,11 +484,11 @@ def request_adapter_async(
         # We chose the variable name WGPUPY_WGPU_ADAPTER_NAME instead WGPU_ADAPTER_NAME
         # to avoid a clash
         if adapter_name := os.getenv(("WGPUPY_WGPU_ADAPTER_NAME")):
-            adapters = self._enumerate_adapters(loop)
+            adapters = self._enumerate_adapters()
             adapters = [a for a in adapters if adapter_name in a.summary]
             if not adapters:
                 raise ValueError(f"Adapter with name '{adapter_name}' not found.")
-            promise = GPUPromise("adapter by name", None, loop=loop)
+            promise = GPUPromise("adapter by name", None)
             promise._wgpu_set_input(adapters[0])
 
             return promise
@@ -563,13 +561,10 @@ def request_adapter_callback(status, result, c_message, _userdata1, _userdata2):
         )
 
         def handler(adapter_id):
-            return self._create_adapter(adapter_id, loop)
+            return self._create_adapter(adapter_id)
 
         promise = GPUPromise(
-            "request_adapter",
-            handler,
-            loop=loop,
-            keepalive=request_adapter_callback,
+            "request_adapter", handler, keepalive=request_adapter_callback
         )
 
         instance = get_wgpu_instance()
@@ -587,20 +582,18 @@ def handler(adapter_id):
 
         return promise
 
-    def enumerate_adapters_async(
-        self, *, loop: LoopInterface | None = None
-    ) -> GPUPromise[list[GPUAdapter]]:
+    def enumerate_adapters_async(self) -> GPUPromise[list[GPUAdapter]]:
         """Get a list of adapter objects available on the current system.
         This is the implementation based on wgpu-native.
         """
-        result = self._enumerate_adapters(loop)
+        result = self._enumerate_adapters()
         # We already have the result, so we return a resolved promise.
         # The reason this is async is to allow this to work on backends where we cannot actually enumerate adapters.
-        promise = GPUPromise("enumerate_adapters", None, loop=loop)
+        promise = GPUPromise("enumerate_adapters", None)
         promise._wgpu_set_input(result)
         return promise
 
-    def _enumerate_adapters(self, loop) -> list[GPUAdapter]:
+    def _enumerate_adapters(self) -> list[GPUAdapter]:
         # The first call is to get the number of adapters, and the second call
         # is to get the actual adapters. Note that the second arg (now NULL) can
         # be a `WGPUInstanceEnumerateAdapterOptions` to filter by backend.
@@ -610,9 +603,9 @@ def _enumerate_adapters(self, loop) -> list[GPUAdapter]:
         adapters = new_array("WGPUAdapter[]", count)
         # H: size_t f(WGPUInstance instance, WGPUInstanceEnumerateAdapterOptions const * options, WGPUAdapter * adapters)
         libf.wgpuInstanceEnumerateAdapters(instance, ffi.NULL, adapters)
-        return [self._create_adapter(adapter, loop) for adapter in adapters]
+        return [self._create_adapter(adapter) for adapter in adapters]
 
-    def _create_adapter(self, adapter_id, loop):
+    def _create_adapter(self, adapter_id):
         # ----- Get adapter info
 
         # H: nextInChain: WGPUChainedStructOut *, vendor: WGPUStringView, architecture: WGPUStringView, device: WGPUStringView, description: WGPUStringView, backendType: WGPUBackendType, adapterType: WGPUAdapterType, vendorID: int, deviceID: int
@@ -671,7 +664,7 @@ def to_py_str(key):
         features = _get_features(adapter_id, adapter=True)
 
         # ----- Done
-        return GPUAdapter(adapter_id, features, limits, adapter_info, loop)
+        return GPUAdapter(adapter_id, features, limits, adapter_info)
 
     def get_canvas_context(self, present_info: dict) -> GPUCanvasContext:
         """Get the GPUCanvasContext object for the appropriate backend.
@@ -1386,10 +1379,7 @@ def handler(device_id):
             return device
 
         promise = GPUPromise(
-            "request_device",
-            handler,
-            loop=self._loop,
-            keepalive=request_device_callback,
+            "request_device", handler, keepalive=request_device_callback
         )
 
         # H: WGPUFuture f(WGPUAdapter adapter, WGPUDeviceDescriptor const * descriptor, WGPURequestDeviceCallbackInfo callbackInfo)
@@ -1968,9 +1958,7 @@ def create_compute_pipeline_async(
             # H: WGPUComputePipeline f(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor)
             id = libf.wgpuDeviceCreateComputePipeline(self._internal, descriptor)
             result = GPUComputePipeline(label, id, self)
-            promise = GPUPromise(
-                "create_compute_pipeline_async", None, loop=self._device._loop
-            )
+            promise = GPUPromise("create_compute_pipeline_async", None)
             promise._wgpu_set_input(result)
             return promise
 
@@ -2002,12 +1990,7 @@ def callback(status, result, c_message, _userdata1, _userdata2):
         def handler(id):
             return GPUComputePipeline(label, id, self)
 
-        promise = GPUPromise(
-            "create_compute_pipeline",
-            handler,
-            loop=self._loop,
-            keepalive=callback,
-        )
+        promise = GPUPromise("create_compute_pipeline", handler, keepalive=callback)
 
         token = self._device._poller.get_token()
 
@@ -2097,9 +2080,7 @@ def create_render_pipeline_async(
             # H: WGPURenderPipeline f(WGPUDevice device, WGPURenderPipelineDescriptor const * descriptor)
             id = libf.wgpuDeviceCreateRenderPipeline(self._internal, descriptor)
             result = GPURenderPipeline(label, id, self)
-            promise = GPUPromise(
-                "create_render_pipeline_async", None, loop=self._device._loop
-            )
+            promise = GPUPromise("create_render_pipeline_async", None)
             promise._wgpu_set_input(result)
             return promise
 
@@ -2129,12 +2110,7 @@ def callback(status, result, c_message, _userdata1, _userdata2):
         def handler(id):
             return GPURenderPipeline(label, id, self)
 
-        promise = GPUPromise(
-            "create_render_pipeline",
-            handler,
-            loop=self._loop,
-            keepalive=callback,
-        )
+        promise = GPUPromise("create_render_pipeline", handler, keepalive=callback)
 
         token = self._device._poller.get_token()
 
@@ -2565,7 +2541,7 @@ def map_async(
 
         # Can we even map?
         if self._map_state != enums.BufferMapState.unmapped:
-            promise = GPUPromise("buffer.map", None, loop=self._device._loop)
+            promise = GPUPromise("buffer.map", None)
             promise._wgpu_set_error(
                 RuntimeError(
                     f"Can only map a buffer if its currently unmapped, not {self._map_state!r}"
@@ -2609,12 +2585,7 @@ def handler(_status):
             self._mapped_status = offset, offset + size, mode
             self._mapped_memoryviews = []
 
-        promise = GPUPromise(
-            "buffer.map",
-            handler,
-            loop=self._device._loop,
-            keepalive=buffer_map_callback,
-        )
+        promise = GPUPromise("buffer.map", handler, keepalive=buffer_map_callback)
 
         token = self._device._poller.get_token()
 
@@ -2883,7 +2854,7 @@ def get_compilation_info_async(self) -> GPUPromise[GPUCompilationInfo]:
         result = []
 
         # Return a resolved promise
-        promise = GPUPromise("get_compilation_info", None, loop=self._device._loop)
+        promise = GPUPromise("get_compilation_info", None)
         promise._wgpu_set_input(result)
         return promise
 
@@ -4185,10 +4156,7 @@ def handler(_value):
             return None
 
         promise = GPUPromise(
-            "on_submitted_work_done",
-            handler,
-            loop=self._device._loop,
-            keepalive=work_done_callback,
+            "on_submitted_work_done", handler, keepalive=work_done_callback
         )
 
         token = self._device._poller.get_token()

From dab454a7f1b47f809ad7d75b381452aa9681bde7 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 13:35:33 +0100
Subject: [PATCH 04/14] Update tests

---
 tests/test_async.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/test_async.py b/tests/test_async.py
index f145e3cb..6cada415 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -212,7 +212,7 @@ def poller():
 async def test_promise_async_loop_simple():
     loop = SillyLoop()
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     loop.process_events()
     result = await promise
@@ -226,7 +226,7 @@ async def test_promise_async_loop_normal():
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, loop=loop)
+    promise = GPUPromise("test", handler, _loop=loop)
 
     loop.process_events()
     result = await promise
@@ -240,7 +240,7 @@ async def test_promise_async_loop_fail2():
     def handler(input):
         return input / 0
 
-    promise = GPUPromise("test", handler, loop=loop)
+    promise = GPUPromise("test", handler, _loop=loop)
 
     loop.process_events()
     with raises(ZeroDivisionError):
@@ -272,7 +272,7 @@ def callback(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     promise.then(callback)
     loop.process_events()
@@ -291,7 +291,7 @@ def callback(r):
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, loop=loop)
+    promise = GPUPromise("test", handler, _loop=loop)
 
     promise.then(callback)
     loop.process_events()
@@ -315,7 +315,7 @@ def err_callback(e):
     def handler(input):
         return input / 0
 
-    promise = GPUPromise("test", handler, loop=loop)
+    promise = GPUPromise("test", handler, _loop=loop)
 
     promise.then(callback, err_callback)
     loop.process_events()
@@ -338,7 +338,7 @@ def callback1(r):
         nonlocal result
         result = r
 
-    promise = MyPromise("test", None, loop=loop)
+    promise = MyPromise("test", None, _loop=loop)
 
     p = promise.then(callback1)
     loop.process_events()
@@ -371,7 +371,7 @@ def callback3(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     p = promise.then(callback1).then(callback2).then(callback3)
     assert isinstance(p, GPUPromise)
@@ -400,7 +400,7 @@ def err_callback(e):
         nonlocal error
         error = e
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     p = promise.then(callback1).then(callback2).then(callback3, err_callback)
     assert isinstance(p, GPUPromise)
@@ -430,7 +430,7 @@ def err_callback(e):
         nonlocal error
         error = e
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     p = promise.then(callback1).then(callback2).then(callback3, err_callback)
     assert isinstance(p, GPUPromise)
@@ -454,7 +454,7 @@ def callback2(r):
     def callback3(r):
         results.append(r * 3)
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     promise.then(callback1)
     promise.then(callback2)
@@ -473,7 +473,7 @@ def test_promise_chaining_after_resolve():
     def callback1(r):
         results.append(r)
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     # Adding handler has no result, because promise is not yet resolved.
     promise.then(callback1)
@@ -503,16 +503,16 @@ def test_promise_chaining_with_promises():
     result = None
 
     def callback1(r):
-        return GPUPromise("test", lambda _: r * 3, loop=loop)
+        return GPUPromise("test", lambda _: r * 3, _loop=loop)
 
     def callback2(r):
-        return GPUPromise("test", lambda _: r + 2, loop=loop)
+        return GPUPromise("test", lambda _: r + 2, _loop=loop)
 
     def callback3(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, loop=loop)
+    promise = GPUPromise("test", None, _loop=loop)
 
     p = promise.then(callback1).then(callback2).then(callback3)
     assert isinstance(p, GPUPromise)
@@ -535,7 +535,7 @@ def test_promise_decorator():
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, loop=loop)
+    promise = GPUPromise("test", handler, _loop=loop)
 
     @promise
     def decorated(r):

From 3e823950a44ca72be18de20dd1cf0832e2103bf5 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 13:51:39 +0100
Subject: [PATCH 05/14] small tweak

---
 wgpu/_async.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/wgpu/_async.py b/wgpu/_async.py
index d45f0458..fa630f48 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -14,9 +14,13 @@
 
 
 class StubLoop:
-    def __init__(self, call_soon_threadsafe):
+    def __init__(self, name, call_soon_threadsafe):
+        self.name = name
         self.call_soon_threadsafe = call_soon_threadsafe
 
+    def __repr__(self):
+        return f"<StubLoop for {self.name} at {hex(id(self))}>"
+
 
 def get_running_loop():
     """Get an object with a call_soon_threadsafe() method.
@@ -35,7 +39,7 @@ def get_running_loop():
     if name == "trio":
         trio = sys.modules[name]
         token = trio.lowlevel.current_trio_token()
-        return StubLoop(token.run_sync_soon)
+        return StubLoop("trio", token.run_sync_soon)
     else:  # asyncio, rendercanvas.utils.asyncadapter, and easy to mimic for custom loops
         try:
             mod = sys.modules[name]

From f7f1ed809e923d43402801ed3ab35d0d518b30a5 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 14:34:21 +0100
Subject: [PATCH 06/14] fixes

---
 wgpu/_async.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/wgpu/_async.py b/wgpu/_async.py
index fa630f48..78a5aee9 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -44,7 +44,7 @@ def get_running_loop():
         try:
             mod = sys.modules[name]
             loop = mod.get_running_loop()
-            loop.call_soon_threadsafe  # noqa: access to make sure it exists
+            loop.call_soon_threadsafe  # noqa: B018 - access to make sure it exists
             return loop
         except Exception:
             return None
@@ -227,7 +227,10 @@ def _resolve_callback(self):
             self._async_event.set()
         # The callback may already be resolved
         if self._state.startswith("pending-"):
-            self._resolve()
+            try:
+                self._resolve()
+            except Exception:
+                pass
 
     def _resolve(self):
         """Finalize the promise, by calling the handler to get the result, and then invoking callbacks."""

From 083fbf9563812b85fcf0f45c9b9e571f7f3edefa Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 15:12:43 +0100
Subject: [PATCH 07/14] Restore await even if no loop available

---
 examples/tests/test_examples.py |  5 +++--
 wgpu/_async.py                  | 38 ++++++++++++++++-----------------
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/examples/tests/test_examples.py b/examples/tests/test_examples.py
index 8f248b24..2cffb502 100644
--- a/examples/tests/test_examples.py
+++ b/examples/tests/test_examples.py
@@ -79,7 +79,8 @@ def test_examples_screenshots(
     def unload_module():
         del sys.modules[module_name]
 
-    request.addfinalizer(unload_module)
+    if request:
+        request.addfinalizer(unload_module)
 
     if not hasattr(example, "canvas"):
         # some examples we screenshot test don't have a canvas as a global variable when imported,
@@ -188,4 +189,4 @@ def test_examples_run(module, force_offscreen):
     os.environ["RENDERCANVAS_FORCE_OFFSCREEN"] = "true"
     pytest.getoption = lambda x: False
     is_lavapipe = True
-    test_examples_screenshots("validate_volume", pytest, None, None)
+    test_examples_screenshots("cube", pytest, mock_time, None, None)
diff --git a/wgpu/_async.py b/wgpu/_async.py
index 78a5aee9..3ea2ee6e 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -72,14 +72,14 @@ def __new__(cls):
 AwaitedType = TypeVar("AwaitedType")
 
 
-# def get_backoff_time_generator() -> Generator[float, None, None]:
-#     """Generates sleep-times, start at 0 then increasing to 100Hz and sticking there."""
-#     for _ in range(5):
-#         yield 0
-#     for i in range(1, 20):
-#         yield i / 2000.0  # ramp up from 0ms to 10ms
-#     while True:
-#         yield 0.01
+def get_backoff_time_generator() -> Generator[float, None, None]:
+    """Generates sleep-times, start at 0 then increasing to 100Hz and sticking there."""
+    for _ in range(5):
+        yield 0
+    for i in range(1, 20):
+        yield i / 2000.0  # ramp up from 0ms to 10ms
+    while True:
+        yield 0.01
 
 
 class GPUPromise(Awaitable[AwaitedType], Generic[AwaitedType]):
@@ -364,18 +364,16 @@ def catch(self, callback: Callable[[Exception], None] | None):
 
     def __await__(self):
         if self._loop is None:
-            raise RuntimeError(
-                "Cannot await GPUPromise because no running loop could be detected."
-            )
-            # # An async busy loop
-            # async def awaiter():
-            #     if self._state == "pending":
-            #         # Do small incremental async naps. Other tasks and threads can run.
-            #         # Note that async sleep, with sleep_time > 0, is inaccurate on Windows.
-            #         sleep_gen = get_backoff_time_generator()
-            #         while self._state == "pending":
-            #             await async_sleep(next(sleep_gen))
-            #     return self._resolve()
+            # An async busy loop. In theory we should be able to remove this code, but it helps make the transition
+            # simpler, since then we depend less on https://github.com/pygfx/rendercanvas/pull/151
+            async def awaiter():
+                if self._state == "pending":
+                    # Do small incremental async naps. Other tasks and threads can run.
+                    # Note that async sleep, with sleep_time > 0, is inaccurate on Windows.
+                    sleep_gen = get_backoff_time_generator()
+                    while self._state == "pending":
+                        await async_sleep(next(sleep_gen))
+                return self._resolve()
 
         else:
             # Using an async Event.

From b2b037f54f90b55f1edf8b1593e194d2463f6457 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 25 Nov 2025 15:22:23 +0100
Subject: [PATCH 08/14] clearer comment

---
 wgpu/_async.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu/_async.py b/wgpu/_async.py
index 3ea2ee6e..a87fc851 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -225,7 +225,7 @@ def _resolve_callback(self):
         # Allow tasks that await this promise to continue.
         if self._async_event is not None:
             self._async_event.set()
-        # The callback may already be resolved
+        # If the value is set, let's resolve it so the handlers get called. But swallow the promise's value/failure.
         if self._state.startswith("pending-"):
             try:
                 self._resolve()

From 515bf4254db1234678b1be60958ec23a53598379 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 16 Dec 2025 10:43:30 +0100
Subject: [PATCH 09/14] Use asyncgen

---
 wgpu/_async.py | 87 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/wgpu/_async.py b/wgpu/_async.py
index a87fc851..a3d9c711 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -7,8 +7,6 @@
 import threading
 from typing import Callable, Awaitable, Generator, Generic, TypeVar
 
-import sniffio
-
 
 logger = logging.getLogger("wgpu")
 
@@ -22,49 +20,72 @@ def __repr__(self):
         return f"<StubLoop for {self.name} at {hex(id(self))}>"
 
 
-def get_running_loop():
-    """Get an object with a call_soon_threadsafe() method.
-
-    Sniffio is used for this, and it supports asyncio, trio, and rendercanvas.utils.asyncadapter.
-    If this function returns None, it means that the GPUPromise will not support ``await`` and ``.then()``.
-
-    It's relatively easy to register a custom loop to sniffio so that this code works on it.
-    """
-
-    try:
-        name = sniffio.current_async_library()
-    except sniffio.AsyncLibraryNotFoundError:
-        return None
-
-    if name == "trio":
-        trio = sys.modules[name]
-        token = trio.lowlevel.current_trio_token()
-        return StubLoop("trio", token.run_sync_soon)
-    else:  # asyncio, rendercanvas.utils.asyncadapter, and easy to mimic for custom loops
+def detect_current_async_lib():
+    """Get the lib name of the currently active async lib, or None."""
+    ob = sys.get_asyncgen_hooks()[0]
+    if ob is not None:
         try:
-            mod = sys.modules[name]
-            loop = mod.get_running_loop()
-            loop.call_soon_threadsafe  # noqa: B018 - access to make sure it exists
-            return loop
-        except Exception:
+            libname = ob.__module__.partition(".")[0]
+        except AttributeError:
             return None
+        if libname == "rendercanvas":
+            libname = "rendercanvas.utils.asyncadapter"
+        elif libname == "pyodide":
+            libname = "asyncio"
+        return libname
+
+
+def detect_current_async_loop():
+    """Get a loop object (that has call_soon_threadsafe) or None"""
+    ob = sys.get_asyncgen_hooks()[0]
+    loop = None
+    if ob is not None:
+        try:
+            loop = ob.__self__
+            _ = loop.call_soon_thread_safe
+        except AttributeError:
+            loop = None
+        if loop is None:
+            try:
+                libname = ob.__module__.partition(".")[0]
+            except AttributeError:
+                libname = None
 
-
-# The async_sleep and AsyncEvent are a copy of the implementation in rendercanvas.asyncs
+            if libname is None:
+                pass
+            elif libname == "trio":
+                trio = sys.modules[libname]
+                token = trio.lowlevel.current_trio_token()
+                loop = StubLoop("trio", token.run_sync_soon)
+            elif libname == "pyodide" or libname == "asyncio":
+                # Backup - asyncio has ob.__self__
+                mod = sys.modules["asyncio"]
+                loop = mod._get_running_loop()
+            else:
+                # Generic try, maybe we get lucky
+                try:
+                    mod = sys.modules[libname]
+                    loop = mod.get_running_loop()
+                    _ = loop.call_soon_threadsafe
+                except Exception:
+                    loop = None
+        return loop
 
 
 async def async_sleep(delay):
-    """Async sleep that uses sniffio to be compatible with asyncio, trio, rendercanvas.utils.asyncadapter, and possibly more."""
-    libname = sniffio.current_async_library()
+    """Async sleep that works with asyncio, trio, and rendercanvas' asyncadapter."""
+    # Note that we get the regular lib's sleep(), not the high-precision sleep from rendercanvas.asyncs.sleep
+    # Anyway, we can remove this once we can assume we have rendercanvas with https://github.com/pygfx/rendercanvas/pull/151
+    libname = detect_current_async_lib()
     sleep = sys.modules[libname].sleep
     await sleep(delay)
 
 
 class AsyncEvent:
-    """Generic async event object using sniffio. Works with trio, asyncio and rendercanvas-native."""
+    """Async Event object that works with asyncio, trio, and rendercanvas' asyncadapter."""
 
     def __new__(cls):
-        libname = sniffio.current_async_library()
+        libname = detect_current_async_lib()
         Event = sys.modules[libname].Event  # noqa
         return Event()
 
@@ -141,7 +162,7 @@ def __init__(
         self._UNRESOLVED.add(self)
 
         # we only care about call_soon_threadsafe, but clearer to just have a loop object
-        self._loop = _loop or get_running_loop()
+        self._loop = _loop or detect_current_async_loop()
 
     def __repr__(self):
         return f"<GPUPromise '{self._title}' {self._state} at {hex(id(self))}>"

From 3ab44094713324a15f7a2684f70493daeed0fd89 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 16 Dec 2025 11:07:42 +0100
Subject: [PATCH 10/14] Use the function directly instead of loop object

---
 tests/test_async.py | 34 ++++++++--------
 wgpu/_async.py      | 98 +++++++++++++++++++++++++--------------------
 2 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/tests/test_async.py b/tests/test_async.py
index 6cada415..d589b1d4 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -27,7 +27,7 @@ def __init__(self):
         self._pending_calls = []
         self.errors = []
 
-    def call_soon_threadsafe(self, f, *args):
+    def call_soon(self, f, *args):  # and its threadsafe
         self._pending_calls.append((f, args))
 
     def process_events(self):
@@ -212,7 +212,7 @@ def poller():
 async def test_promise_async_loop_simple():
     loop = SillyLoop()
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     loop.process_events()
     result = await promise
@@ -226,7 +226,7 @@ async def test_promise_async_loop_normal():
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, _loop=loop)
+    promise = GPUPromise("test", handler, _call_soon_threadsafe=loop.call_soon)
 
     loop.process_events()
     result = await promise
@@ -240,7 +240,7 @@ async def test_promise_async_loop_fail2():
     def handler(input):
         return input / 0
 
-    promise = GPUPromise("test", handler, _loop=loop)
+    promise = GPUPromise("test", handler, _call_soon_threadsafe=loop.call_soon)
 
     loop.process_events()
     with raises(ZeroDivisionError):
@@ -272,7 +272,7 @@ def callback(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     promise.then(callback)
     loop.process_events()
@@ -291,7 +291,7 @@ def callback(r):
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, _loop=loop)
+    promise = GPUPromise("test", handler, _call_soon_threadsafe=loop.call_soon)
 
     promise.then(callback)
     loop.process_events()
@@ -315,7 +315,7 @@ def err_callback(e):
     def handler(input):
         return input / 0
 
-    promise = GPUPromise("test", handler, _loop=loop)
+    promise = GPUPromise("test", handler, _call_soon_threadsafe=loop.call_soon)
 
     promise.then(callback, err_callback)
     loop.process_events()
@@ -338,7 +338,7 @@ def callback1(r):
         nonlocal result
         result = r
 
-    promise = MyPromise("test", None, _loop=loop)
+    promise = MyPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     p = promise.then(callback1)
     loop.process_events()
@@ -371,7 +371,7 @@ def callback3(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     p = promise.then(callback1).then(callback2).then(callback3)
     assert isinstance(p, GPUPromise)
@@ -400,7 +400,7 @@ def err_callback(e):
         nonlocal error
         error = e
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     p = promise.then(callback1).then(callback2).then(callback3, err_callback)
     assert isinstance(p, GPUPromise)
@@ -430,7 +430,7 @@ def err_callback(e):
         nonlocal error
         error = e
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     p = promise.then(callback1).then(callback2).then(callback3, err_callback)
     assert isinstance(p, GPUPromise)
@@ -454,7 +454,7 @@ def callback2(r):
     def callback3(r):
         results.append(r * 3)
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     promise.then(callback1)
     promise.then(callback2)
@@ -473,7 +473,7 @@ def test_promise_chaining_after_resolve():
     def callback1(r):
         results.append(r)
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     # Adding handler has no result, because promise is not yet resolved.
     promise.then(callback1)
@@ -503,16 +503,16 @@ def test_promise_chaining_with_promises():
     result = None
 
     def callback1(r):
-        return GPUPromise("test", lambda _: r * 3, _loop=loop)
+        return GPUPromise("test", lambda _: r * 3, _call_soon_threadsafe=loop.call_soon)
 
     def callback2(r):
-        return GPUPromise("test", lambda _: r + 2, _loop=loop)
+        return GPUPromise("test", lambda _: r + 2, _call_soon_threadsafe=loop.call_soon)
 
     def callback3(r):
         nonlocal result
         result = r
 
-    promise = GPUPromise("test", None, _loop=loop)
+    promise = GPUPromise("test", None, _call_soon_threadsafe=loop.call_soon)
 
     p = promise.then(callback1).then(callback2).then(callback3)
     assert isinstance(p, GPUPromise)
@@ -535,7 +535,7 @@ def test_promise_decorator():
     def handler(input):
         return input * 2
 
-    promise = GPUPromise("test", handler, _loop=loop)
+    promise = GPUPromise("test", handler, _call_soon_threadsafe=loop.call_soon)
 
     @promise
     def decorated(r):
diff --git a/wgpu/_async.py b/wgpu/_async.py
index a3d9c711..4e7f6be6 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -35,41 +35,47 @@ def detect_current_async_lib():
         return libname
 
 
-def detect_current_async_loop():
+def detect_current_loops_call_soon_threadsafe():
     """Get a loop object (that has call_soon_threadsafe) or None"""
+
+    # Get asyncgen hook func, return fast when no async loop active
     ob = sys.get_asyncgen_hooks()[0]
-    loop = None
-    if ob is not None:
+    if ob is None:
+        return None
+
+    # For asyncio and rendercanvas's asyncadapter, this works and is super-fast
+    try:
+        return ob.__self__.call_soon_thread_safe
+    except AttributeError:
+        pass
+
+    # Otherwise, checkout the module name
+    try:
+        libname = ob.__module__.partition(".")[0]
+    except AttributeError:
+        return None
+
+    if libname == "trio":
+        # Still pretty fast for trio
+        trio = sys.modules[libname]
+        token = trio.lowlevel.current_trio_token()
+        return token.run_sync_soon
+    else:
+        # Ok, it looks like there is an async loop, try to get the func.
+        # This is also a fallback for asyncio (in case the ob.__self__ stops working)
+        if libname == "pyodide":
+            libname = "asyncio"
+        mod = sys.modules.get(libname, None)
+        if mod is None:
+            return None
         try:
-            loop = ob.__self__
-            _ = loop.call_soon_thread_safe
+            return mod.call_soon_threadsafe
         except AttributeError:
-            loop = None
-        if loop is None:
-            try:
-                libname = ob.__module__.partition(".")[0]
-            except AttributeError:
-                libname = None
-
-            if libname is None:
-                pass
-            elif libname == "trio":
-                trio = sys.modules[libname]
-                token = trio.lowlevel.current_trio_token()
-                loop = StubLoop("trio", token.run_sync_soon)
-            elif libname == "pyodide" or libname == "asyncio":
-                # Backup - asyncio has ob.__self__
-                mod = sys.modules["asyncio"]
-                loop = mod._get_running_loop()
-            else:
-                # Generic try, maybe we get lucky
-                try:
-                    mod = sys.modules[libname]
-                    loop = mod.get_running_loop()
-                    _ = loop.call_soon_threadsafe
-                except Exception:
-                    loop = None
-        return loop
+            pass
+        try:
+            return mod.get_running_loop().call_soon_threadsafe
+        except Exception:  # (RuntimeError, AttributeError) but accept any error
+            pass
 
 
 async def async_sleep(delay):
@@ -137,7 +143,7 @@ def __init__(
         handler: Callable | None,
         *,
         keepalive: object = None,
-        _loop: object = None,  # for testing and chaining
+        _call_soon_threadsafe: object = None,  # for testing and chaining
     ):
         """
         Arguments:
@@ -162,7 +168,9 @@ def __init__(
         self._UNRESOLVED.add(self)
 
         # we only care about call_soon_threadsafe, but clearer to just have a loop object
-        self._loop = _loop or detect_current_async_loop()
+        self._call_soon_threadsafe = (
+            _call_soon_threadsafe or detect_current_loops_call_soon_threadsafe()
+        )
 
     def __repr__(self):
         return f"<GPUPromise '{self._title}' {self._state} at {hex(id(self))}>"
@@ -186,7 +194,7 @@ def _set_input(self, result: object, *, resolve_now=True) -> None:
 
         # If the input is a promise, we need to wait for it, i.e. chain to self.
         if isinstance(result, GPUPromise):
-            if self._loop is None:
+            if self._call_soon_threadsafe is None:
                 self._set_error(
                     "Cannot chain GPUPromise because no running loop could be detected."
                 )
@@ -237,8 +245,8 @@ def _set_pending_resolved(self, *, resolve_now=False):
             self._resolve_callback()
             if self._async_event is not None:
                 self._async_event.set()
-        elif self._loop is not None:
-            self._loop.call_soon_threadsafe(self._resolve_callback)
+        elif self._call_soon_threadsafe is not None:
+            self._call_soon_threadsafe(self._resolve_callback)
 
     def _resolve_callback(self):
         # This should only be called in the main/reference thread.
@@ -270,11 +278,11 @@ def _resolve(self):
         if self._state.endswith("rejected"):
             error = self._value
             for cb in self._error_callbacks:
-                self._loop.call_soon_threadsafe(cb, error)
+                self._call_soon_threadsafe(cb, error)
         elif self._state.endswith("fulfilled"):
             result = self._value
             for cb in self._done_callbacks:
-                self._loop.call_soon_threadsafe(cb, result)
+                self._call_soon_threadsafe(cb, result)
         # New state
         self._state = self._state.replace("pending-", "")
         # Clean up
@@ -327,7 +335,7 @@ def then(
 
         The callback will receive one argument: the result of the promise.
         """
-        if self._loop is None:
+        if self._call_soon_threadsafe is None:
             raise RuntimeError(
                 "Cannot use GPUPromise.then() because no running loop could be detected."
             )
@@ -347,7 +355,9 @@ def then(
             title = self._title + " -> " + callback_name
 
         # Create new promise
-        new_promise = self.__class__(title, callback, _loop=self._loop)
+        new_promise = self.__class__(
+            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
+        )
         self._chain(new_promise)
 
         if error_callback is not None:
@@ -360,7 +370,7 @@ def catch(self, callback: Callable[[Exception], None] | None):
 
         The callback will receive one argument: the error object.
         """
-        if self._loop is None:
+        if self._call_soon_threadsafe is None:
             raise RuntimeError(
                 "Cannot use GPUPromise.catch() because not running loop could be detected."
             )
@@ -373,7 +383,9 @@ def catch(self, callback: Callable[[Exception], None] | None):
         title = "Catcher for " + self._title
 
         # Create new promise
-        new_promise = self.__class__(title, callback, _loop=self._loop)
+        new_promise = self.__class__(
+            title, callback, _call_soon_threadsafe=self._call_soon_threadsafe
+        )
 
         # Custom chain
         with self._lock:
@@ -384,7 +396,7 @@ def catch(self, callback: Callable[[Exception], None] | None):
         return new_promise
 
     def __await__(self):
-        if self._loop is None:
+        if self._call_soon_threadsafe is None:
             # An async busy loop. In theory we should be able to remove this code, but it helps make the transition
             # simpler, since then we depend less on https://github.com/pygfx/rendercanvas/pull/151
             async def awaiter():

From 762ae968aef5d318de9a24c4942a2b1c0948c5e1 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 16 Dec 2025 11:50:20 +0100
Subject: [PATCH 11/14] More tests

---
 pyproject.toml      |   1 -
 tests/test_async.py | 179 +++++++++++++++++++++++++++++++++++++++++++-
 wgpu/_async.py      |   2 +-
 3 files changed, 177 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3c6ec884..dfc43d3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,6 @@ requires-python = ">= 3.10"
 dependencies = [
     "cffi>=1.15.0",
     "rubicon-objc>=0.4.1; sys_platform == 'darwin'",
-    "sniffio",
     "rendercanvas >=2.4",                            # Temporarily depend on rendercanvas because we re-aligned apis. Remove in a few months
 ]
 
diff --git a/tests/test_async.py b/tests/test_async.py
index d589b1d4..fc7b015c 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -1,14 +1,22 @@
+import sys
 import time
+import types
+import asyncio
 import threading
 
+import trio
 import anyio
-
 from pytest import mark, raises
 
+from rendercanvas.raw import RawLoop
 import wgpu.utils
 from testutils import can_use_wgpu_lib, run_tests
 from wgpu import GPUDevice, MapMode, TextureFormat
-from wgpu._async import GPUPromise as BaseGPUPromise
+from wgpu._async import (
+    GPUPromise as BaseGPUPromise,
+    detect_current_loops_call_soon_threadsafe,
+    detect_current_async_lib,
+)
 
 
 class GPUPromise(BaseGPUPromise):
@@ -79,6 +87,171 @@ def test_promise_basics():
     assert "rejected" in repr(promise)
 
 
+# %%%%% Low level
+
+
+def test_async_low_level_none():
+    flag = []
+
+    flag.append(detect_current_async_lib())
+    flag.append(detect_current_loops_call_soon_threadsafe())
+
+    assert flag[0] is None
+    assert flag[1] is None
+
+
+def test_async_low_level_rendercanvas_asyncadapter():
+    loop = RawLoop()
+
+    flag = []
+
+    async def task():
+        # Our methods
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+        # Test that the fast-path works
+        flag.append(sys.get_asyncgen_hooks()[0].__self__.call_soon_threadsafe)
+        loop.stop()
+
+    loop.add_task(task)
+    loop.run()
+
+    assert flag[0] == "rendercanvas.utils.asyncadapter"
+    assert callable(flag[1])
+    assert flag[1].__name__ == "call_soon_threadsafe"
+    assert flag[1].__func__ is flag[2].__func__
+
+
+def test_async_low_level_asyncio():
+    flag = []
+
+    async def task():
+        # Our methods
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+        # Test that the fast-path works
+        flag.append(sys.get_asyncgen_hooks()[0].__self__.call_soon_threadsafe)
+
+    asyncio.run(task())
+
+    assert flag[0] == "asyncio"
+    assert callable(flag[1])
+    assert flag[1].__name__ == "call_soon_threadsafe"
+    assert flag[1].__func__ is flag[2].__func__
+
+
+def test_async_low_level_trio():
+    flag = []
+
+    async def task():
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+
+    trio.run(task)
+
+    assert flag[0] == "trio"
+    assert callable(flag[1])
+    assert flag[1].__name__ == "run_sync_soon"
+
+
+def test_async_low_level_custom1():
+    # Simplest custom approach. Detection at module level.
+
+    mod = types.ModuleType("wgpu_async_test_module")
+    sys.modules[mod.__name__] = mod
+    code = """if True:
+
+    def call_soon_threadsafe(callbacl):
+        pass
+
+    def fake_asyncgen_hook(agen):
+        pass
+    """
+    exec(code, mod.__dict__)
+
+    flag = []
+
+    old_hooks = sys.get_asyncgen_hooks()
+    sys.set_asyncgen_hooks(mod.fake_asyncgen_hook)
+
+    try:
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+    finally:
+        sys.set_asyncgen_hooks(*old_hooks)
+
+    assert flag[0] == mod.__name__
+    assert flag[1] is mod.call_soon_threadsafe
+
+
+def test_async_low_level_custom2():
+    # Even better, call_soon_threadsafe is attr of the same object that asyncgen hook is a method of.
+    # This takes the fast path!
+
+    mod = types.ModuleType("wgpu_async_test_module")
+    sys.modules[mod.__name__] = mod
+    code = """if True:
+
+    class Loop:
+        def call_soon_threadsafe(callbacl):
+            pass
+
+        def fake_asyncgen_hook(agen):
+            pass
+    loop = Loop()
+    """
+    exec(code, mod.__dict__)
+
+    flag = []
+
+    old_hooks = sys.get_asyncgen_hooks()
+    sys.set_asyncgen_hooks(mod.loop.fake_asyncgen_hook)
+
+    try:
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+    finally:
+        sys.set_asyncgen_hooks(*old_hooks)
+
+    assert flag[0] == mod.__name__
+    assert flag[1].__func__ is mod.loop.call_soon_threadsafe.__func__
+
+
+def test_async_low_level_custom3():
+    # The somewhat longer route. This is also the fallback for asyncio,
+    # in case they change something that kills the fast-path for asyncio.
+    # (the fast path being sys.get_asyncgen_hooks()[0].__self__.call_soon_threadsafe)
+
+    mod = types.ModuleType("wgpu_async_test_module")
+    sys.modules[mod.__name__] = mod
+    code = """if True:
+
+    def fake_asyncgen_hook(agen):
+        pass
+    def get_running_loop():
+        return loop
+    class Loop:
+        def call_soon_threadsafe(callbacl):
+            pass
+    loop = Loop()
+    """
+    exec(code, mod.__dict__)
+
+    flag = []
+
+    old_hooks = sys.get_asyncgen_hooks()
+    sys.set_asyncgen_hooks(mod.fake_asyncgen_hook)
+
+    try:
+        flag.append(detect_current_async_lib())
+        flag.append(detect_current_loops_call_soon_threadsafe())
+    finally:
+        sys.set_asyncgen_hooks(*old_hooks)
+
+    assert flag[0] == mod.__name__
+    assert flag[1].__func__ is mod.loop.call_soon_threadsafe.__func__
+
+
 # %%%%% Promise using sync_wait
 
 
@@ -323,7 +496,7 @@ def handler(input):
     assert isinstance(error, ZeroDivisionError)
 
 
-# %%%%% Chainging
+# %%%%% Chaining
 
 
 def test_promise_chaining_basic():
diff --git a/wgpu/_async.py b/wgpu/_async.py
index 4e7f6be6..ef405b7b 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -45,7 +45,7 @@ def detect_current_loops_call_soon_threadsafe():
 
     # For asyncio and rendercanvas's asyncadapter, this works and is super-fast
     try:
-        return ob.__self__.call_soon_thread_safe
+        return ob.__self__.call_soon_threadsafe
     except AttributeError:
         pass
 

From 9879bbdcd2ef391ef689cb2691688e1d3aa16a2c Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 16 Dec 2025 12:12:45 +0100
Subject: [PATCH 12/14] Add async support to direct example

---
 examples/gui_direct.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/examples/gui_direct.py b/examples/gui_direct.py
index 6a90aa58..b3783991 100644
--- a/examples/gui_direct.py
+++ b/examples/gui_direct.py
@@ -7,6 +7,7 @@
 
 # run_example = false
 
+import sys
 import time
 import atexit
 
@@ -36,6 +37,15 @@
 context.set_physical_size(*glfw.get_framebuffer_size(window))
 
 
+# Setup async callbacks. This is optional, but it enables code using promise.then().
+# The asyncgen hook is a stub for the system to detect the call_soon_threadsafe function.
+# This works if both are defined on the same class or in the same module.
+to_call_soon = []
+call_soon_threadsafe = to_call_soon.append
+stub_asynchen_hook = lambda agen: None
+sys.set_asyncgen_hooks(stub_asynchen_hook)
+
+
 def main():
     draw_frame = setup_drawing_sync(context)
 
@@ -50,6 +60,15 @@ def main():
         # resize handling
         context.set_physical_size(*glfw.get_framebuffer_size(window))
 
+        # Call async callbacks (optional, see above)
+        callbacks = to_call_soon.copy()
+        to_call_soon.clear()
+        for cb in callbacks:
+            try:
+                cb()
+            except Exception as err:
+                print(err)
+
         # draw a frame
         draw_frame()
         # present the frame to the screen

From 238101d6472f4b8457ff4fc1231d57a53e873f80 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 16 Dec 2025 12:31:34 +0100
Subject: [PATCH 13/14] docs and some name tweaks

---
 tests/test_async.py | 16 ++++++++--------
 wgpu/_async.py      | 23 +++++++++--------------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/tests/test_async.py b/tests/test_async.py
index fc7b015c..efc99307 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -14,7 +14,7 @@
 from wgpu import GPUDevice, MapMode, TextureFormat
 from wgpu._async import (
     GPUPromise as BaseGPUPromise,
-    detect_current_loops_call_soon_threadsafe,
+    detect_current_call_soon_threadsafe,
     detect_current_async_lib,
 )
 
@@ -94,7 +94,7 @@ def test_async_low_level_none():
     flag = []
 
     flag.append(detect_current_async_lib())
-    flag.append(detect_current_loops_call_soon_threadsafe())
+    flag.append(detect_current_call_soon_threadsafe())
 
     assert flag[0] is None
     assert flag[1] is None
@@ -108,7 +108,7 @@ def test_async_low_level_rendercanvas_asyncadapter():
     async def task():
         # Our methods
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
         # Test that the fast-path works
         flag.append(sys.get_asyncgen_hooks()[0].__self__.call_soon_threadsafe)
         loop.stop()
@@ -128,7 +128,7 @@ def test_async_low_level_asyncio():
     async def task():
         # Our methods
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
         # Test that the fast-path works
         flag.append(sys.get_asyncgen_hooks()[0].__self__.call_soon_threadsafe)
 
@@ -145,7 +145,7 @@ def test_async_low_level_trio():
 
     async def task():
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
 
     trio.run(task)
 
@@ -176,7 +176,7 @@ def fake_asyncgen_hook(agen):
 
     try:
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
     finally:
         sys.set_asyncgen_hooks(*old_hooks)
 
@@ -209,7 +209,7 @@ def fake_asyncgen_hook(agen):
 
     try:
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
     finally:
         sys.set_asyncgen_hooks(*old_hooks)
 
@@ -244,7 +244,7 @@ def call_soon_threadsafe(callbacl):
 
     try:
         flag.append(detect_current_async_lib())
-        flag.append(detect_current_loops_call_soon_threadsafe())
+        flag.append(detect_current_call_soon_threadsafe())
     finally:
         sys.set_asyncgen_hooks(*old_hooks)
 
diff --git a/wgpu/_async.py b/wgpu/_async.py
index ef405b7b..488920e1 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -11,15 +11,6 @@
 logger = logging.getLogger("wgpu")
 
 
-class StubLoop:
-    def __init__(self, name, call_soon_threadsafe):
-        self.name = name
-        self.call_soon_threadsafe = call_soon_threadsafe
-
-    def __repr__(self):
-        return f"<StubLoop for {self.name} at {hex(id(self))}>"
-
-
 def detect_current_async_lib():
     """Get the lib name of the currently active async lib, or None."""
     ob = sys.get_asyncgen_hooks()[0]
@@ -35,15 +26,17 @@ def detect_current_async_lib():
         return libname
 
 
-def detect_current_loops_call_soon_threadsafe():
-    """Get a loop object (that has call_soon_threadsafe) or None"""
+def detect_current_call_soon_threadsafe():
+    """Get the current applicable call_soon_threadsafe function, or None"""
 
     # Get asyncgen hook func, return fast when no async loop active
     ob = sys.get_asyncgen_hooks()[0]
     if ob is None:
         return None
 
-    # For asyncio and rendercanvas's asyncadapter, this works and is super-fast
+    # Super-fast path that works for loop objects that have call_soon_threadsafe()
+    # and use sys.set_asyncgen_hooks() on a method of the same loop object.
+    # Works with asyncio, rendercanvas' asyncadapter, and also custom (direct) loops.
     try:
         return ob.__self__.call_soon_threadsafe
     except AttributeError:
@@ -63,6 +56,8 @@ def detect_current_loops_call_soon_threadsafe():
     else:
         # Ok, it looks like there is an async loop, try to get the func.
         # This is also a fallback for asyncio (in case the ob.__self__ stops working)
+        # Note: we have a unit test for the asyncio fast-path, so we will know when we need to update,
+        # but the code below makes sure that it keeps working regardless (just a tiiiny bit slower).
         if libname == "pyodide":
             libname = "asyncio"
         mod = sys.modules.get(libname, None)
@@ -167,9 +162,9 @@ def __init__(
         self._error_callbacks = []
         self._UNRESOLVED.add(self)
 
-        # we only care about call_soon_threadsafe, but clearer to just have a loop object
+        # Set call_soon_threadsafe function, may be None, in which case we cannot do then() or await.
         self._call_soon_threadsafe = (
-            _call_soon_threadsafe or detect_current_loops_call_soon_threadsafe()
+            _call_soon_threadsafe or detect_current_call_soon_threadsafe()
         )
 
     def __repr__(self):

From 71d69d30e7e07828c273e7561def36d0f56c4def Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Wed, 17 Dec 2025 10:01:26 +0100
Subject: [PATCH 14/14] add example

---
 examples/gui_direct.py  |  19 -------
 examples/gui_direct2.py | 115 ++++++++++++++++++++++++++++++++++++++++
 wgpu/_async.py          |   2 +-
 3 files changed, 116 insertions(+), 20 deletions(-)
 create mode 100644 examples/gui_direct2.py

diff --git a/examples/gui_direct.py b/examples/gui_direct.py
index b3783991..6a90aa58 100644
--- a/examples/gui_direct.py
+++ b/examples/gui_direct.py
@@ -7,7 +7,6 @@
 
 # run_example = false
 
-import sys
 import time
 import atexit
 
@@ -37,15 +36,6 @@
 context.set_physical_size(*glfw.get_framebuffer_size(window))
 
 
-# Setup async callbacks. This is optional, but it enables code using promise.then().
-# The asyncgen hook is a stub for the system to detect the call_soon_threadsafe function.
-# This works if both are defined on the same class or in the same module.
-to_call_soon = []
-call_soon_threadsafe = to_call_soon.append
-stub_asynchen_hook = lambda agen: None
-sys.set_asyncgen_hooks(stub_asynchen_hook)
-
-
 def main():
     draw_frame = setup_drawing_sync(context)
 
@@ -60,15 +50,6 @@ def main():
         # resize handling
         context.set_physical_size(*glfw.get_framebuffer_size(window))
 
-        # Call async callbacks (optional, see above)
-        callbacks = to_call_soon.copy()
-        to_call_soon.clear()
-        for cb in callbacks:
-            try:
-                cb()
-            except Exception as err:
-                print(err)
-
         # draw a frame
         draw_frame()
         # present the frame to the screen
diff --git a/examples/gui_direct2.py b/examples/gui_direct2.py
new file mode 100644
index 00000000..fd08442c
--- /dev/null
+++ b/examples/gui_direct2.py
@@ -0,0 +1,115 @@
+"""
+Direct integration of glfw and wgpu-py without using the RenderCanvas library.
+
+Demonstration for hardcore users that need total low-level control.
+
+This version is a bit more elaborate, using a loop object, and allowing
+async code that uses`promose.then()`. For this to work, wgpu needs
+access to the running loop. In particular, it needs a
+run_soon_threadsafe() function, allowing wgpu to resolve a promise from
+its internal thread.
+"""
+
+# run_example = false
+
+import sys
+import time
+import atexit
+
+import glfw
+import wgpu
+from wgpu.utils.glfw_present_info import get_glfw_present_info
+
+# from triangle import setup_drawing_sync
+from cube import setup_drawing_sync
+
+# Setup glfw
+glfw.init()
+atexit.register(glfw.terminate)
+
+# disable automatic API selection, we are not using opengl
+glfw.window_hint(glfw.CLIENT_API, glfw.NO_API)
+glfw.window_hint(glfw.RESIZABLE, True)
+
+
+title = "wgpu glfw direct"
+window = glfw.create_window(640, 480, title, None, None)
+present_info = get_glfw_present_info(window)
+
+context = wgpu.gpu.get_canvas_context(present_info)
+
+# Initialize physical size once. For robust apps update this on resize events.
+context.set_physical_size(*glfw.get_framebuffer_size(window))
+
+
+# Setup async callbacks. This is optional, but it enables code using promise.then().
+# The asyncgen hook is a stub for the system to detect the call_soon_threadsafe function.
+# This works if both are defined on the same class or in the same module.
+to_call_soon = []
+call_soon_threadsafe = to_call_soon.append
+stub_asynchen_hook = lambda agen: None
+sys.set_asyncgen_hooks(stub_asynchen_hook)
+
+
+class Loop:
+    def __init__(self):
+        self._pending_callbacks = []
+
+    def _asynchen_hook(self, agen):
+        pass
+
+    def call_soon_threadsafe(self, callback):
+        self._pending_callbacks.append(callback)
+
+    def run(self):
+        # Setup async hook. Our stub hook does not do anything, but it makes that
+        # the wgpu promises detect this loop and the call_soon_threadsafe method.
+        sys.set_asyncgen_hooks(self._asynchen_hook)
+
+        # Setup
+        draw_frame = setup_drawing_sync(context)
+        last_frame_time = time.perf_counter()
+        frame_count = 0
+
+        # render loop
+        while not glfw.window_should_close(window):
+            # process inputs
+            glfw.poll_events()
+
+            # resize handling
+            context.set_physical_size(*glfw.get_framebuffer_size(window))
+
+            # call async callbacks (optional, see above)
+            while self._pending_callbacks:
+                callback = self._pending_callbacks.pop(0)
+                print("Callback:", callback)
+                try:
+                    callback()
+                except Exception as err:
+                    print(err)
+
+            # draw a frame
+            draw_frame()
+            # present the frame to the screen
+            context.present()
+            # stats
+            frame_count += 1
+            etime = time.perf_counter() - last_frame_time
+            if etime > 1:
+                print(f"{frame_count / etime:0.1f} FPS")
+                last_frame_time, frame_count = time.perf_counter(), 0
+
+        # dispose resources
+        sys.set_asyncgen_hooks(None, None)
+        context.unconfigure()
+        glfw.destroy_window(window)
+
+        # allow proper cleanup (workaround for glfw bug)
+        end_time = time.perf_counter() + 0.1
+        while time.perf_counter() < end_time:
+            glfw.wait_events_timeout(end_time - time.perf_counter())
+
+
+if __name__ == "__main__":
+    loop = Loop()
+    loop.run()
diff --git a/wgpu/_async.py b/wgpu/_async.py
index 488920e1..b02e804c 100644
--- a/wgpu/_async.py
+++ b/wgpu/_async.py
@@ -54,7 +54,7 @@ def detect_current_call_soon_threadsafe():
         token = trio.lowlevel.current_trio_token()
         return token.run_sync_soon
     else:
-        # Ok, it looks like there is an async loop, try to get the func.
+        # Ok, it looks like there is an async loop that we don't know. Try harder to get the func.
         # This is also a fallback for asyncio (in case the ob.__self__ stops working)
         # Note: we have a unit test for the asyncio fast-path, so we will know when we need to update,
         # but the code below makes sure that it keeps working regardless (just a tiiiny bit slower).