From 07453164e6f834d8344a9be5d749f30716a50128 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 3 Oct 2025 03:44:52 -0700 Subject: [PATCH 01/11] [SYCL] Fix asynchronous exception behavior This commit makes the following changes to the behavior of asynchronous exception handling: 1. The death of a queue should not consume asynchronous exceptions. 2. Calling wait_and_throw on an event after the associated queue has died should still consume exceptions that were originally associated with the queue. This should respect the async_handler priority to the best of its ability. 3. Calling wait_and_throw or throw_asynchronous on a queue without an async_handler should fall back to using the async_handler of the associated context, then the default async_handler if none were attached to the context. Additionally, this lays the ground work for https://github.com/intel/llvm/pull/20266 by moving the tracking of unconsumed asynchronous exception to the devices. Signed-off-by: Larsen, Steffen --- sycl/include/sycl/exception_list.hpp | 2 + sycl/source/detail/device_impl.hpp | 32 +++ sycl/source/detail/event_impl.cpp | 27 ++- sycl/source/detail/event_impl.hpp | 5 +- sycl/source/detail/queue_impl.hpp | 37 +-- sycl/source/detail/scheduler/commands.cpp | 28 ++- sycl/source/detail/scheduler/scheduler.cpp | 3 +- sycl/source/handler.cpp | 2 +- .../AsyncHandler/custom_async_handler.cpp | 217 ++++++++++++++++++ .../AsyncHandler/default_async_handler.cpp | 75 +++++- 10 files changed, 374 insertions(+), 54 deletions(-) create mode 100644 sycl/test-e2e/AsyncHandler/custom_async_handler.cpp diff --git a/sycl/include/sycl/exception_list.hpp b/sycl/include/sycl/exception_list.hpp index 2a0202fa020a9..4e6140d8389c4 100644 --- a/sycl/include/sycl/exception_list.hpp +++ b/sycl/include/sycl/exception_list.hpp @@ -24,6 +24,7 @@ inline namespace _V1 { // Forward declaration namespace detail { class queue_impl; +class device_impl; } /// A list of asynchronous exceptions. @@ -46,6 +47,7 @@ class __SYCL_EXPORT exception_list { private: friend class detail::queue_impl; + friend class detail::device_impl; void PushBack(const_reference Value); void PushBack(value_type &&Value); void Clear() noexcept; diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 7958f384c09f3..3984a1ae6b724 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -2262,6 +2262,31 @@ class device_impl : public std::enable_shared_from_this { return {}; } + /// Puts exception to the list of asynchronous ecxeptions. + /// + /// \param QueueWeakPtr is a weak pointer referring to the queue to report + /// the asynchronous exceptions for. + /// \param ExceptionPtr is a pointer to exception to be put. + void reportAsyncException(std::weak_ptr QueueWeakPtr, + const std::exception_ptr &ExceptionPtr) { + std::lock_guard Lock(MAsyncExceptionsMutex); + MAsyncExceptions[QueueWeakPtr].PushBack(ExceptionPtr); + } + + /// Extracts all unconsumed asynchronous exceptions for a given queue. + /// + /// \param QueueWeakPtr is a weak pointer referring to the queue to extract + /// unconsumed asynchronous exceptions for. + exception_list flushAsyncExceptions(std::weak_ptr QueueWeakPtr) { + std::lock_guard Lock(MAsyncExceptionsMutex); + auto ExceptionsEntryIt = MAsyncExceptions.find(QueueWeakPtr); + if (ExceptionsEntryIt == MAsyncExceptions.end()) + return exception_list{}; + exception_list Exceptions = std::move(ExceptionsEntryIt->second); + MAsyncExceptions.erase(ExceptionsEntryIt); + return Exceptions; + } + private: ur_device_handle_t MDevice = 0; // This is used for getAdapter so should be above other properties. @@ -2272,6 +2297,13 @@ class device_impl : public std::enable_shared_from_this { const ur_device_handle_t MRootDevice; + // Asynchronous exceptions are captured at device-level until flushed, either + // by queues, events or a synchronization on the device itself. + std::mutex MAsyncExceptionsMutex; + std::map, exception_list, + std::owner_less>> + MAsyncExceptions; + // Order of caches matters! UR must come before SYCL info descriptors (because // get_info calls get_info_impl but the opposite never happens) and both // should come before aspects. diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index b0c838cdd890c..30950824e6bf0 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -211,8 +211,9 @@ void event_impl::initHostProfilingInfo() { MHostProfilingInfo->setDevice(&Device); } -void event_impl::setSubmittedQueue(std::weak_ptr SubmittedQueue) { - MSubmittedQueue = std::move(SubmittedQueue); +void event_impl::setSubmittedQueue(queue_impl *SubmittedQueue) { + MSubmittedQueue = SubmittedQueue->weak_from_this(); + MSubmittedDevice = &SubmittedQueue->getDeviceImpl(); } #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -308,8 +309,28 @@ void event_impl::wait(bool *Success) { void event_impl::wait_and_throw() { wait(); - if (std::shared_ptr SubmittedQueue = MSubmittedQueue.lock()) + if (std::shared_ptr SubmittedQueue = MSubmittedQueue.lock()) { SubmittedQueue->throw_asynchronous(); + return; + } + + // If the queue has died, we rely on finding its exceptions through the + // device. + if (MSubmittedDevice == nullptr) + return; + + // If MSubmittedQueue has died, get flush any exceptions associated with it + // still, then user either the context async_handler or the default + // async_handler. + exception_list Exceptions = + MSubmittedDevice->flushAsyncExceptions(MSubmittedQueue); + if (Exceptions.size() == 0) + return; + + if (MContext && MContext->get_async_handler()) + MContext->get_async_handler()(std::move(Exceptions)); + else + defaultAsyncHandler(std::move(Exceptions)); } void event_impl::checkProfilingPreconditions() const { diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 0ca4aa6d49a9f..05c7b87c89c04 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -264,10 +264,10 @@ class event_impl { MWorkerQueue = std::move(WorkerQueue); }; - /// Sets original queue used for submission. + /// Sets original queue and device used for submission. /// /// @return - void setSubmittedQueue(std::weak_ptr SubmittedQueue); + void setSubmittedQueue(queue_impl *SubmittedQueue); /// Indicates if this event is not associated with any command and doesn't /// have native handle. @@ -394,6 +394,7 @@ class event_impl { std::weak_ptr MWorkerQueue; std::weak_ptr MSubmittedQueue; + device_impl *MSubmittedDevice = nullptr; /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 0f6348a3a1444..7bd5c709904bc 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -253,7 +253,6 @@ class queue_impl : public std::enable_shared_from_this { // notification and destroy the trace event for this queue. destructorNotification(); #endif - throw_asynchronous(); auto status = getAdapter().call_nocheck(MQueue); // If loader is already closed, it'll return a not-initialized status @@ -393,9 +392,6 @@ class queue_impl : public std::enable_shared_from_this { /// @param Loc is the code location of the submit call (default argument) void wait(const detail::code_location &Loc = {}); - /// \return list of asynchronous exceptions occurred during execution. - exception_list getExceptionList() const { return MExceptions; } - /// @param Loc is the code location of the submit call (default argument) void wait_and_throw(const detail::code_location &Loc = {}) { wait(Loc); @@ -408,21 +404,20 @@ class queue_impl : public std::enable_shared_from_this { /// Synchronous errors will be reported through SYCL exceptions. /// Asynchronous errors will be passed to the async_handler passed to the /// queue on construction. If no async_handler was provided then - /// asynchronous exceptions will be lost. + /// asynchronous exceptions will be passed to the default async_handler. void throw_asynchronous() { - if (!MAsyncHandler) + exception_list Exceptions = + getDeviceImpl().flushAsyncExceptions(weak_from_this()); + if (Exceptions.size() == 0) return; - exception_list Exceptions; - { - std::lock_guard Lock(MMutex); - std::swap(Exceptions, MExceptions); - } - // Unlock the mutex before calling user-provided handler to avoid - // potential deadlock if the same queue is somehow referenced in the - // handler. - if (Exceptions.size()) + if (MAsyncHandler) MAsyncHandler(std::move(Exceptions)); + else if (const async_handler &CtxAsyncHandler = + getContextImpl().get_async_handler()) + CtxAsyncHandler(std::move(Exceptions)); + else + defaultAsyncHandler(std::move(Exceptions)); } /// Creates UR properties array. @@ -570,14 +565,6 @@ class queue_impl : public std::enable_shared_from_this { event mem_advise(const void *Ptr, size_t Length, ur_usm_advice_flags_t Advice, const std::vector &DepEvents, bool CallerNeedsEvent); - /// Puts exception to the list of asynchronous ecxeptions. - /// - /// \param ExceptionPtr is a pointer to exception to be put. - void reportAsyncException(const std::exception_ptr &ExceptionPtr) { - std::lock_guard Lock(MMutex); - MExceptions.PushBack(ExceptionPtr); - } - static ThreadPool &getThreadPool() { return GlobalHandler::instance().getHostTaskThreadPool(); } @@ -979,10 +966,6 @@ class queue_impl : public std::enable_shared_from_this { /// These events are tracked, but not owned, by the queue. std::vector> MEventsWeak; - /// Events without data dependencies (such as USM) need an owner, - /// additionally, USM operations are not added to the scheduler command graph, - /// queue is the only owner on the runtime side. - exception_list MExceptions; const async_handler MAsyncHandler; const property_list MPropList; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 142d8f958f1ea..14f4cb4f416b1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -359,12 +359,14 @@ class DispatchHostTask { AdapterWithEvents.first->call(RawEvents.size(), RawEvents.data()); } catch (const sycl::exception &) { - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( - std::current_exception()); + auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); + QueuePtr->getDeviceImpl().reportAsyncException( + QueuePtr, std::current_exception()); return false; } catch (...) { - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( - std::current_exception()); + auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); + QueuePtr->getDeviceImpl().reportAsyncException( + QueuePtr, std::current_exception()); return false; } } @@ -407,7 +409,8 @@ class DispatchHostTask { make_error_code(errc::runtime), std::string("Couldn't wait for host-task's dependencies"))); - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(EPtr); + auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); + QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, EPtr); // reset host-task's lambda and quit HostTask.MHostTask.reset(); Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); @@ -469,8 +472,9 @@ class DispatchHostTask { } } #endif - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( - CurrentException); + auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); + QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, + CurrentException); } HostTask.MHostTask.reset(); @@ -487,8 +491,9 @@ class DispatchHostTask { Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); } catch (...) { auto CurrentException = std::current_exception(); - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( - CurrentException); + auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); + QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, + CurrentException); } } }; @@ -563,7 +568,8 @@ Command::Command( MCommandBuffer(CommandBuffer), MSyncPointDeps(SyncPoints) { MWorkerQueue = MQueue; MEvent->setWorkerQueue(MWorkerQueue); - MEvent->setSubmittedQueue(MWorkerQueue); + if (Queue) + MEvent->setSubmittedQueue(Queue); MEvent->setCommand(this); if (MQueue) MEvent->setContextImpl(MQueue->getContextImpl()); @@ -1958,7 +1964,7 @@ ExecCGCommand::ExecCGCommand( assert(SubmitQueue && "Host task command group must have a valid submit queue"); - MEvent->setSubmittedQueue(SubmitQueue->weak_from_this()); + MEvent->setSubmittedQueue(SubmitQueue); // Initialize host profiling info if the queue has profiling enabled. if (SubmitQueue->MIsProfilingEnabled) MEvent->initHostProfilingInfo(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c6a821cdf1e0f..dedefd7a70b81 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -260,7 +260,8 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null"); - WorkerQueue->reportAsyncException(std::current_exception()); + WorkerQueue->getDeviceImpl().reportAsyncException( + WorkerQueue, std::current_exception()); } } EventImplPtr NewEvent = NewCmd->getEvent(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c881d4da9efb9..f379a4ffacfe4 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -956,7 +956,7 @@ event handler::finalize() { // it to the graph to create a node, rather than submit it to the scheduler. if (auto GraphImpl = Queue->getCommandGraph(); GraphImpl) { auto EventImpl = detail::event_impl::create_completed_host_event(); - EventImpl->setSubmittedQueue(Queue->weak_from_this()); + EventImpl->setSubmittedQueue(Queue); ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr; // GraphImpl is read and written in this scope so we lock this graph diff --git a/sycl/test-e2e/AsyncHandler/custom_async_handler.cpp b/sycl/test-e2e/AsyncHandler/custom_async_handler.cpp new file mode 100644 index 0000000000000..113f67a54d3db --- /dev/null +++ b/sycl/test-e2e/AsyncHandler/custom_async_handler.cpp @@ -0,0 +1,217 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +int main() { + int Failures = 0; + + // Case 1 - Event wait_and_throw with custom handler on queue. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 1 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::queue Q{sycl::default_selector_v, AHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 1"); }); + }).wait_and_throw(); + if (!Caught) { + std::cout << "Case 1 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 2 - Event wait_and_throw with custom handler on context. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 2 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::context Ctx{AHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 2"); }); + }).wait_and_throw(); + if (!Caught) { + std::cout << "Case 2 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 3 - Event wait_and_throw with custom handler on both queue and + // context. + { + bool Caught = false; + auto CtxAHandler = [&](sycl::exception_list EL) { + std::cout << "Case 3 - Unexpected handler used." << std::endl; + ++Failures; + }; + auto QAHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 3 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::context Ctx{CtxAHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v, QAHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 3"); }); + }).wait_and_throw(); + if (!Caught) { + std::cout << "Case 3 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 4 - Queue wait_and_throw with custom handler on queue. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 1 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::queue Q{sycl::default_selector_v, AHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 4"); }); + }); + Q.wait_and_throw(); + if (!Caught) { + std::cout << "Case 4 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 5 - Queue wait and throw_asynchronous with custom handler on queue. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 5 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::queue Q{sycl::default_selector_v, AHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 5"); }); + }); + Q.wait(); + Q.throw_asynchronous(); + if (!Caught) { + std::cout << "Case 5 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 6 - Queue wait_and_throw with custom handler on context. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 6 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::context Ctx{AHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 6"); }); + }); + Q.wait_and_throw(); + if (!Caught) { + std::cout << "Case 6 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 7 - Queue wait and throw_asynchronous with custom handler on context. + { + bool Caught = false; + auto AHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 7 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::context Ctx{AHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 7"); }); + }); + Q.wait(); + Q.throw_asynchronous(); + if (!Caught) { + std::cout << "Case 7 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 8 - Queue wait_and_throw with custom handler on both queue and + // context. + { + bool Caught = false; + auto CtxAHandler = [&](sycl::exception_list EL) { + std::cout << "Case 8 - Unexpected handler used." << std::endl; + ++Failures; + }; + auto QAHandler = [&](sycl::exception_list EL) { + Caught = true; + if (EL.size() != 1) { + std::cout << "Case 8 - Unexpected number of exceptions." << std::endl; + ++Failures; + } + }; + sycl::context Ctx{CtxAHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v, QAHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 8"); }); + }); + Q.wait_and_throw(); + if (!Caught) { + std::cout << "Case 8 - Async exception not caught by handler." + << std::endl; + ++Failures; + } + } + + // Case 9 - Queue dying without having consumed its asynchronous exceptions. + { + auto CtxAHandler = [&](sycl::exception_list EL) { + std::cout << "Case 9 - Unexpected context handler used." << std::endl; + ++Failures; + }; + auto QAHandler = [&](sycl::exception_list EL) { + std::cout << "Case 9 - Unexpected queue handler used." << std::endl; + ++Failures; + }; + sycl::context Ctx{CtxAHandler}; + sycl::queue Q{Ctx, sycl::default_selector_v, QAHandler}; + Q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { throw std::runtime_error("Case 9"); }); + }); + } + + return Failures; +} diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index 7e6a333e305d7..d41b9d47cc359 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -1,19 +1,76 @@ // RUN: %{build} -o %t.out -// RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// RUN: %{run} %t.out 0 | FileCheck %s --check-prefix CHECK-0 +// RUN: %{run} %t.out 1 | FileCheck %s --check-prefix CHECK-1 +// RUN: %{run} %t.out 2 | FileCheck %s --check-prefix CHECK-2 +// RUN: %{run} %t.out 3 | FileCheck %s --check-prefix CHECK-3 +#include #include using namespace sycl; -int main() { +int main(int argc, char *argv[]) { + assert(argc == 2); + int TestRun = std::stoi(argv[1]); + assert(TestRun >= 0 && TestRun <= 3); + queue Q; - Q.submit([&](handler &CGH) { - CGH.host_task([=]() { - throw std::runtime_error("Exception thrown from host_task."); - }); - }).wait_and_throw(); + if (TestRun == 0) { + Q.submit([&](handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error("Exception thrown from host_task through " + "event::wait_and_throw()."); + }); + }).wait_and_throw(); + } else if (TestRun == 1) { + Q.submit([&](handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error( + "Exception thrown from host_task through queue::wait_and_throw()."); + }); + }); + Q.wait_and_throw(); + } else if (TestRun == 2) { + Q.submit([&](handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error( + "Exception thrown from host_task through queue::wait() and " + "queue::throw_asynchronous()."); + }); + }); + Q.wait(); + Q.throw_asynchronous(); + } else if (TestRun == 3) { + auto TmpQAsyncHandler = [](exception_list) { + std::cout << "Custom queue async handler was called!" << std::endl; + }; + std::optional TmpQ = queue{default_selector_v, TmpQAsyncHandler}; + event E = TmpQ->submit([&](handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error("Exception thrown from host_task through " + "event::wait_and_throw() after queue death."); + }); + }); + // Ensure all work on the queue has finished so it isn't being kept + // artificially alive. Then kill the queue. + TmpQ->wait(); + TmpQ.reset(); + // Waiting on the event should no longer be able to reach the queue's + // handler. + E.wait_and_throw(); + } return 0; } -// CHECK: Default async_handler caught exceptions: -// CHECK-NEXT: Exception thrown from host_task. +// CHECK-0: Default async_handler caught exceptions: +// CHECK-0-NEXT: Exception thrown from host_task through event::wait_and_throw(). + +// CHECK-1: Default async_handler caught exceptions: +// CHECK-1-NEXT: Exception thrown from host_task through queue::wait_and_throw(). + +// CHECK-2: Default async_handler caught exceptions: +// CHECK-2-NEXT: Exception thrown from host_task through queue::wait() and queue::throw_asynchronous(). + +// CHECK-3: Default async_handler caught exceptions: +// CHECK-3-NEXT: Exception thrown from host_task through event::wait_and_throw() after queue death. +// CHECK-3-NOT: Custom queue async handler was called! From b1dc7359ce32a5b6887f68c2aa92c4dda62a34ab Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 3 Oct 2025 08:03:40 -0700 Subject: [PATCH 02/11] Reintroduce the pipe to file Signed-off-by: Larsen, Steffen --- sycl/test-e2e/AsyncHandler/default_async_handler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index d41b9d47cc359..fbac855b9c928 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -1,8 +1,8 @@ // RUN: %{build} -o %t.out -// RUN: %{run} %t.out 0 | FileCheck %s --check-prefix CHECK-0 -// RUN: %{run} %t.out 1 | FileCheck %s --check-prefix CHECK-1 -// RUN: %{run} %t.out 2 | FileCheck %s --check-prefix CHECK-2 -// RUN: %{run} %t.out 3 | FileCheck %s --check-prefix CHECK-3 +// RUN: %{run} %t.out 0 &> %t_0.txt ; FileCheck %s --input-file %t_0.txt --check-prefix CHECK-0 +// RUN: %{run} %t.out 1 &> %t_1.txt ; FileCheck %s --input-file %t_1.txt --check-prefix CHECK-1 +// RUN: %{run} %t.out 2 &> %t_2.txt ; FileCheck %s --input-file %t_2.txt --check-prefix CHECK-2 +// RUN: %{run} %t.out 3 &> %t_3.txt ; FileCheck %s --input-file %t_3.txt --check-prefix CHECK-3 #include #include From ab2e8d1f10d7049daa6c9ee6ed5c4d6e12753a5a Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 13 Oct 2025 00:53:10 -0700 Subject: [PATCH 03/11] Fix new failure Signed-off-by: Larsen, Steffen --- sycl/source/detail/queue_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 2b18a4fb6e28f..06b6494e439d6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -514,7 +514,7 @@ EventImplPtr queue_impl::submit_command_to_graph( std::unique_ptr CommandGroup, sycl::detail::CGType CGType, sycl::ext::oneapi::experimental::node_type UserFacingNodeType) { auto EventImpl = detail::event_impl::create_completed_host_event(); - EventImpl->setSubmittedQueue(weak_from_this()); + EventImpl->setSubmittedQueue(this); ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr; // GraphImpl is read and written in this scope so we lock this graph From f8237d4d3f25af53b85f642753cdcd54ef02da43 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 14 Oct 2025 04:54:51 -0700 Subject: [PATCH 04/11] Fix comment Signed-off-by: Larsen, Steffen --- sycl/source/detail/queue_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 9b5c445785084..247e875aaab1a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -422,7 +422,8 @@ class queue_impl : public std::enable_shared_from_this { /// Synchronous errors will be reported through SYCL exceptions. /// Asynchronous errors will be passed to the async_handler passed to the /// queue on construction. If no async_handler was provided then - /// asynchronous exceptions will be passed to the default async_handler. + /// asynchronous exceptions will be passed to the async_handler associated + /// with the context if present, or the default async_handler otherwise. void throw_asynchronous() { exception_list Exceptions = getDeviceImpl().flushAsyncExceptions(weak_from_this()); From 8813acec38a4b5b10484937335dc367e2d959225 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 14 Oct 2025 06:51:55 -0700 Subject: [PATCH 05/11] Remove unrealiable test case This commit removes a test case where the default handler would be used if the queue dies. This test case was unreliable as there was no way to ensure that the runtime was not keeping the queue alive for internal reasons. Signed-off-by: Larsen, Steffen --- .../AsyncHandler/default_async_handler.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index fbac855b9c928..da659eabd681e 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -40,24 +40,6 @@ int main(int argc, char *argv[]) { }); Q.wait(); Q.throw_asynchronous(); - } else if (TestRun == 3) { - auto TmpQAsyncHandler = [](exception_list) { - std::cout << "Custom queue async handler was called!" << std::endl; - }; - std::optional TmpQ = queue{default_selector_v, TmpQAsyncHandler}; - event E = TmpQ->submit([&](handler &CGH) { - CGH.host_task([=]() { - throw std::runtime_error("Exception thrown from host_task through " - "event::wait_and_throw() after queue death."); - }); - }); - // Ensure all work on the queue has finished so it isn't being kept - // artificially alive. Then kill the queue. - TmpQ->wait(); - TmpQ.reset(); - // Waiting on the event should no longer be able to reach the queue's - // handler. - E.wait_and_throw(); } return 0; } From e76da364ed0a2a4addbcdc04ee6cadbfd082fdbd Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 16 Oct 2025 03:56:57 -0700 Subject: [PATCH 06/11] Fix removed test case run Signed-off-by: Larsen, Steffen --- sycl/test-e2e/AsyncHandler/default_async_handler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index da659eabd681e..de713859e31ff 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -2,7 +2,6 @@ // RUN: %{run} %t.out 0 &> %t_0.txt ; FileCheck %s --input-file %t_0.txt --check-prefix CHECK-0 // RUN: %{run} %t.out 1 &> %t_1.txt ; FileCheck %s --input-file %t_1.txt --check-prefix CHECK-1 // RUN: %{run} %t.out 2 &> %t_2.txt ; FileCheck %s --input-file %t_2.txt --check-prefix CHECK-2 -// RUN: %{run} %t.out 3 &> %t_3.txt ; FileCheck %s --input-file %t_3.txt --check-prefix CHECK-3 #include #include From 5e43acc923cfec1e3dfe58613c02d08edbce7d54 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 28 Oct 2025 01:05:10 -0700 Subject: [PATCH 07/11] Switch to global async flushing Signed-off-by: Larsen, Steffen --- sycl/include/sycl/exception_list.hpp | 6 ++-- sycl/source/detail/device_impl.hpp | 32 ---------------------- sycl/source/detail/event_impl.cpp | 24 +--------------- sycl/source/detail/queue_impl.hpp | 22 +++++---------- sycl/source/detail/scheduler/commands.cpp | 19 ++++++------- sycl/source/detail/scheduler/scheduler.cpp | 28 +++++++++++++++++-- sycl/source/detail/scheduler/scheduler.hpp | 22 +++++++++++++++ 7 files changed, 67 insertions(+), 86 deletions(-) diff --git a/sycl/include/sycl/exception_list.hpp b/sycl/include/sycl/exception_list.hpp index 4e6140d8389c4..882a09587d72e 100644 --- a/sycl/include/sycl/exception_list.hpp +++ b/sycl/include/sycl/exception_list.hpp @@ -23,8 +23,7 @@ inline namespace _V1 { // Forward declaration namespace detail { -class queue_impl; -class device_impl; +class Scheduler; } /// A list of asynchronous exceptions. @@ -46,8 +45,7 @@ class __SYCL_EXPORT exception_list { iterator end() const; private: - friend class detail::queue_impl; - friend class detail::device_impl; + friend class detail::Scheduler; void PushBack(const_reference Value); void PushBack(value_type &&Value); void Clear() noexcept; diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index c90650c819d59..13c5f5c16ce3c 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -2267,31 +2267,6 @@ class device_impl : public std::enable_shared_from_this { return {}; } - /// Puts exception to the list of asynchronous ecxeptions. - /// - /// \param QueueWeakPtr is a weak pointer referring to the queue to report - /// the asynchronous exceptions for. - /// \param ExceptionPtr is a pointer to exception to be put. - void reportAsyncException(std::weak_ptr QueueWeakPtr, - const std::exception_ptr &ExceptionPtr) { - std::lock_guard Lock(MAsyncExceptionsMutex); - MAsyncExceptions[QueueWeakPtr].PushBack(ExceptionPtr); - } - - /// Extracts all unconsumed asynchronous exceptions for a given queue. - /// - /// \param QueueWeakPtr is a weak pointer referring to the queue to extract - /// unconsumed asynchronous exceptions for. - exception_list flushAsyncExceptions(std::weak_ptr QueueWeakPtr) { - std::lock_guard Lock(MAsyncExceptionsMutex); - auto ExceptionsEntryIt = MAsyncExceptions.find(QueueWeakPtr); - if (ExceptionsEntryIt == MAsyncExceptions.end()) - return exception_list{}; - exception_list Exceptions = std::move(ExceptionsEntryIt->second); - MAsyncExceptions.erase(ExceptionsEntryIt); - return Exceptions; - } - private: ur_device_handle_t MDevice = 0; // This is used for getAdapter so should be above other properties. @@ -2302,13 +2277,6 @@ class device_impl : public std::enable_shared_from_this { const ur_device_handle_t MRootDevice; - // Asynchronous exceptions are captured at device-level until flushed, either - // by queues, events or a synchronization on the device itself. - std::mutex MAsyncExceptionsMutex; - std::map, exception_list, - std::owner_less>> - MAsyncExceptions; - // Order of caches matters! UR must come before SYCL info descriptors (because // get_info calls get_info_impl but the opposite never happens) and both // should come before aspects. diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 30950824e6bf0..9166afac4186c 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -308,29 +308,7 @@ void event_impl::wait(bool *Success) { void event_impl::wait_and_throw() { wait(); - - if (std::shared_ptr SubmittedQueue = MSubmittedQueue.lock()) { - SubmittedQueue->throw_asynchronous(); - return; - } - - // If the queue has died, we rely on finding its exceptions through the - // device. - if (MSubmittedDevice == nullptr) - return; - - // If MSubmittedQueue has died, get flush any exceptions associated with it - // still, then user either the context async_handler or the default - // async_handler. - exception_list Exceptions = - MSubmittedDevice->flushAsyncExceptions(MSubmittedQueue); - if (Exceptions.size() == 0) - return; - - if (MContext && MContext->get_async_handler()) - MContext->get_async_handler()(std::move(Exceptions)); - else - defaultAsyncHandler(std::move(Exceptions)); + Scheduler::getInstance().flushAsyncExceptions(); } void event_impl::checkProfilingPreconditions() const { diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 247e875aaab1a..73b32a1cb667a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -416,27 +416,14 @@ class queue_impl : public std::enable_shared_from_this { throw_asynchronous(); } - /// Performs a blocking wait for the completion of all enqueued tasks in the - /// queue. - /// + /// Synchronous errors will be reported through SYCL exceptions. /// Asynchronous errors will be passed to the async_handler passed to the /// queue on construction. If no async_handler was provided then /// asynchronous exceptions will be passed to the async_handler associated /// with the context if present, or the default async_handler otherwise. void throw_asynchronous() { - exception_list Exceptions = - getDeviceImpl().flushAsyncExceptions(weak_from_this()); - if (Exceptions.size() == 0) - return; - - if (MAsyncHandler) - MAsyncHandler(std::move(Exceptions)); - else if (const async_handler &CtxAsyncHandler = - getContextImpl().get_async_handler()) - CtxAsyncHandler(std::move(Exceptions)); - else - defaultAsyncHandler(std::move(Exceptions)); + Scheduler::getInstance().flushAsyncExceptions(); } /// Creates UR properties array. @@ -704,6 +691,11 @@ class queue_impl : public std::enable_shared_from_this { } #endif + /// Returns the async_handler associated with the queue. + const async_handler &getAsynchHandler() const noexcept { + return MAsyncHandler; + } + protected: template EventImplPtr insertHelperBarrier(const HandlerType &Handler) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8de3a1784513d..a2b01d5615fb2 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -360,13 +360,13 @@ class DispatchHostTask { RawEvents.data()); } catch (const sycl::exception &) { auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); - QueuePtr->getDeviceImpl().reportAsyncException( - QueuePtr, std::current_exception()); + Scheduler::getInstance().reportAsyncException(QueuePtr, + std::current_exception()); return false; } catch (...) { auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); - QueuePtr->getDeviceImpl().reportAsyncException( - QueuePtr, std::current_exception()); + Scheduler::getInstance().reportAsyncException(QueuePtr, + std::current_exception()); return false; } } @@ -410,10 +410,11 @@ class DispatchHostTask { std::string("Couldn't wait for host-task's dependencies"))); auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); - QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, EPtr); + auto &SchedulerInst = Scheduler::getInstance(); + SchedulerInst.reportAsyncException(QueuePtr, EPtr); // reset host-task's lambda and quit HostTask.MHostTask.reset(); - Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); + SchedulerInst.NotifyHostTaskCompletion(MThisCmd); return; } @@ -473,8 +474,7 @@ class DispatchHostTask { } #endif auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); - QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, - CurrentException); + Scheduler::getInstance().reportAsyncException(QueuePtr, CurrentException); } HostTask.MHostTask.reset(); @@ -492,8 +492,7 @@ class DispatchHostTask { } catch (...) { auto CurrentException = std::current_exception(); auto QueuePtr = MThisCmd->MEvent->getSubmittedQueue(); - QueuePtr->getDeviceImpl().reportAsyncException(QueuePtr, - CurrentException); + Scheduler::getInstance().reportAsyncException(QueuePtr, CurrentException); } } }; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index dedefd7a70b81..78db72012443b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -260,8 +260,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null"); - WorkerQueue->getDeviceImpl().reportAsyncException( - WorkerQueue, std::current_exception()); + reportAsyncException(WorkerQueue, std::current_exception()); } } EventImplPtr NewEvent = NewCmd->getEvent(); @@ -707,6 +706,31 @@ bool Scheduler::areEventsSafeForSchedulerBypass(events_range DepEvents, return Event.getHandle() != nullptr; }); } + +void Scheduler::flushAsyncExceptions() { + decltype(MAsyncExceptions) AsyncExceptions; + { + std::lock_guard Lock(MAsyncExceptionsMutex); + std::swap(AsyncExceptions, MAsyncExceptions); + } + for (auto &ExceptionsEntryIt : AsyncExceptions) { + exception_list Exceptions = std::move(ExceptionsEntryIt.second); + + if (Exceptions.size() == 0) + continue; + + std::shared_ptr Queue = ExceptionsEntryIt.first.lock(); + if (Queue && Queue->getAsynchHandler()) { + Queue->getAsynchHandler()(std::move(Exceptions)); + } else if (Queue && Queue->getContextImpl().get_async_handler()) { + Queue->getContextImpl().get_async_handler()(std::move(Exceptions)); + } else { + // If the queue is dead, use the default handler. + defaultAsyncHandler(std::move(Exceptions)); + } + } +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index eb843a11b84e6..1005ceb606312 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -485,6 +485,21 @@ class Scheduler { static bool areEventsSafeForSchedulerBypass(events_range DepEvents, context_impl &Context); + /// Puts exception to the list of asynchronous ecxeptions. + /// + /// \param QueueWeakPtr is a weak pointer referring to the queue to report + /// the asynchronous exceptions for. + /// \param ExceptionPtr is a pointer to exception to be put. + void reportAsyncException(std::weak_ptr QueueWeakPtr, + const std::exception_ptr &ExceptionPtr) { + std::lock_guard Lock(MAsyncExceptionsMutex); + MAsyncExceptions[QueueWeakPtr].PushBack(ExceptionPtr); + } + + /// Reports all unconsumed asynchronous exceptions to either the queue's + /// async_handler, the context's async_handler or the default async_handler. + void flushAsyncExceptions(); + protected: using RWLockT = std::shared_timed_mutex; using ReadLockT = std::shared_lock; @@ -872,6 +887,13 @@ class Scheduler { MAuxiliaryResources; std::mutex MAuxiliaryResourcesMutex; + // Asynchronous exceptions are captured at device-level until flushed, either + // by queues, events or a synchronization on the device itself. + std::mutex MAsyncExceptionsMutex; + std::map, exception_list, + std::owner_less>> + MAsyncExceptions; + friend class Command; friend class DispatchHostTask; friend class queue_impl; From c395f87e9d883f76f30cae7c94d82831e36fec7e Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 28 Oct 2025 03:50:05 -0700 Subject: [PATCH 08/11] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/queue_impl.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 73b32a1cb667a..db7fd470a718d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -416,15 +416,12 @@ class queue_impl : public std::enable_shared_from_this { throw_asynchronous(); } - /// Synchronous errors will be reported through SYCL exceptions. /// Asynchronous errors will be passed to the async_handler passed to the /// queue on construction. If no async_handler was provided then /// asynchronous exceptions will be passed to the async_handler associated /// with the context if present, or the default async_handler otherwise. - void throw_asynchronous() { - Scheduler::getInstance().flushAsyncExceptions(); - } + void throw_asynchronous() { Scheduler::getInstance().flushAsyncExceptions(); } /// Creates UR properties array. /// From 4a24036d8f2c6247360f568196a60973746337f0 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 28 Oct 2025 04:21:26 -0700 Subject: [PATCH 09/11] Fix unittest failures Signed-off-by: Larsen, Steffen --- sycl/unittests/xpti_trace/QueueApiFailures.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 1e88143774e21..d419bb4eb3802 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -378,7 +378,7 @@ TEST_F(QueueApiFailures, QueueHostTaskWaitFail) { ExceptionCaught = true; } EXPECT_FALSE(ExceptionCaught); - Q.wait(); + Q.wait_and_throw(); uint16_t TraceType = 0; std::string Message; @@ -422,7 +422,7 @@ TEST_F(QueueApiFailures, QueueHostTaskFail) { ExceptionCaught = true; } EXPECT_FALSE(ExceptionCaught); - Q.wait(); + Q.wait_and_throw(); uint16_t TraceType = 0; std::string Message; @@ -499,7 +499,7 @@ TEST_F(QueueApiFailures, QueueKernelAsync) { } try { - Q.wait(); + Q.wait_and_throw(); } catch (...) { // kernel enqueue leads to exception throw } From 0e37140bfaec8c4f728c81ed40615641183ef30e Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Sun, 2 Nov 2025 23:18:28 -0800 Subject: [PATCH 10/11] Remove leftovers Signed-off-by: Larsen, Steffen --- sycl/source/detail/event_impl.cpp | 1 - sycl/source/detail/event_impl.hpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 9166afac4186c..2e0363a661f33 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -213,7 +213,6 @@ void event_impl::initHostProfilingInfo() { void event_impl::setSubmittedQueue(queue_impl *SubmittedQueue) { MSubmittedQueue = SubmittedQueue->weak_from_this(); - MSubmittedDevice = &SubmittedQueue->getDeviceImpl(); } #ifdef XPTI_ENABLE_INSTRUMENTATION diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 05c7b87c89c04..53727aa0505ba 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -264,7 +264,7 @@ class event_impl { MWorkerQueue = std::move(WorkerQueue); }; - /// Sets original queue and device used for submission. + /// Sets original queue used for submission. /// /// @return void setSubmittedQueue(queue_impl *SubmittedQueue); @@ -394,7 +394,6 @@ class event_impl { std::weak_ptr MWorkerQueue; std::weak_ptr MSubmittedQueue; - device_impl *MSubmittedDevice = nullptr; /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; From 72136b63cb8b3d2a9cb14113ef4b057fa910d8d4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Sun, 2 Nov 2025 23:55:55 -0800 Subject: [PATCH 11/11] Add context to the async exception key Signed-off-by: Larsen, Steffen --- sycl/source/detail/queue_impl.hpp | 2 ++ sycl/source/detail/scheduler/scheduler.cpp | 20 ++++++++++++++--- sycl/source/detail/scheduler/scheduler.hpp | 25 ++++++++++++++-------- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index db7fd470a718d..ae3975203e754 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -295,6 +295,8 @@ class queue_impl : public std::enable_shared_from_this { context_impl &getContextImpl() const { return *MContext; } + std::weak_ptr getContextImplWeakPtr() const { return MContext; } + device_impl &getDeviceImpl() const { return MDevice; } /// \return an associated SYCL device. diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 78db72012443b..58c815a828cb2 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -707,6 +707,15 @@ bool Scheduler::areEventsSafeForSchedulerBypass(events_range DepEvents, }); } +void Scheduler::reportAsyncException( + const std::shared_ptr &QueuePtr, + const std::exception_ptr &ExceptionPtr) { + std::lock_guard Lock(MAsyncExceptionsMutex); + MAsyncExceptions[AsyncExceptionKey{QueuePtr, + QueuePtr->getContextImplWeakPtr()}] + .PushBack(ExceptionPtr); +} + void Scheduler::flushAsyncExceptions() { decltype(MAsyncExceptions) AsyncExceptions; { @@ -719,11 +728,16 @@ void Scheduler::flushAsyncExceptions() { if (Exceptions.size() == 0) continue; - std::shared_ptr Queue = ExceptionsEntryIt.first.lock(); + std::shared_ptr Queue = ExceptionsEntryIt.first.first.lock(); if (Queue && Queue->getAsynchHandler()) { Queue->getAsynchHandler()(std::move(Exceptions)); - } else if (Queue && Queue->getContextImpl().get_async_handler()) { - Queue->getContextImpl().get_async_handler()(std::move(Exceptions)); + continue; + } + + std::shared_ptr Context = + ExceptionsEntryIt.first.second.lock(); + if (Context && Context->get_async_handler()) { + Context->get_async_handler()(std::move(Exceptions)); } else { // If the queue is dead, use the default handler. defaultAsyncHandler(std::move(Exceptions)); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 1005ceb606312..1ca796ee20efb 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -487,14 +487,11 @@ class Scheduler { /// Puts exception to the list of asynchronous ecxeptions. /// - /// \param QueueWeakPtr is a weak pointer referring to the queue to report - /// the asynchronous exceptions for. + /// \param QueuePtr is a pointer referring to the queue to report the + /// asynchronous exceptions for. /// \param ExceptionPtr is a pointer to exception to be put. - void reportAsyncException(std::weak_ptr QueueWeakPtr, - const std::exception_ptr &ExceptionPtr) { - std::lock_guard Lock(MAsyncExceptionsMutex); - MAsyncExceptions[QueueWeakPtr].PushBack(ExceptionPtr); - } + void reportAsyncException(const std::shared_ptr &QueuePtr, + const std::exception_ptr &ExceptionPtr); /// Reports all unconsumed asynchronous exceptions to either the queue's /// async_handler, the context's async_handler or the default async_handler. @@ -890,8 +887,18 @@ class Scheduler { // Asynchronous exceptions are captured at device-level until flushed, either // by queues, events or a synchronization on the device itself. std::mutex MAsyncExceptionsMutex; - std::map, exception_list, - std::owner_less>> + using AsyncExceptionKey = + std::pair, std::weak_ptr>; + struct AsyncExceptionKeyOwnerLess { + bool operator()(const AsyncExceptionKey &LHS, + const AsyncExceptionKey &RHS) const noexcept { + return std::owner_less>{}(LHS.first, + RHS.first) || + std::owner_less>{}(LHS.second, + RHS.second); + } + }; + std::map MAsyncExceptions; friend class Command;