Skip to content

Commit f21034e

Browse files
[SYCL] Check that some device global uninitialized before locking (#18041)
Count number of not done initialization of device globals and take a lock only if something is not initialized yet.
1 parent daf1fa6 commit f21034e

File tree

2 files changed

+26
-6
lines changed

2 files changed

+26
-6
lines changed

sycl/source/detail/context_impl.cpp

+13-4
Original file line numberDiff line numberDiff line change
@@ -338,16 +338,23 @@ void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
338338
void context_impl::addDeviceGlobalInitializer(
339339
ur_program_handle_t Program, const std::vector<device> &Devs,
340340
const RTDeviceBinaryImage *BinImage) {
341+
if (BinImage->getDeviceGlobals().empty())
342+
return;
341343
std::lock_guard<std::mutex> Lock(MDeviceGlobalInitializersMutex);
342344
for (const device &Dev : Devs) {
343345
auto Key = std::make_pair(Program, getSyclObjImpl(Dev)->getHandleRef());
344-
MDeviceGlobalInitializers.emplace(Key, BinImage);
346+
auto [Iter, Inserted] = MDeviceGlobalInitializers.emplace(Key, BinImage);
347+
if (Inserted && !Iter->second.MDeviceGlobalsFullyInitialized)
348+
++MDeviceGlobalNotInitializedCnt;
345349
}
346350
}
347351

348352
std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
349353
ur_program_handle_t NativePrg,
350354
const std::shared_ptr<queue_impl> &QueueImpl) {
355+
if (!MDeviceGlobalNotInitializedCnt.load(std::memory_order_acquire))
356+
return {};
357+
351358
const AdapterPtr &Adapter = getAdapter();
352359
const DeviceImplPtr &DeviceImpl = QueueImpl->getDeviceImplPtr();
353360
std::lock_guard<std::mutex> NativeProgramLock(MDeviceGlobalInitializersMutex);
@@ -369,16 +376,17 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
369376
[&Adapter](const ur_event_handle_t &Event) {
370377
return get_event_info<info::event::command_execution_status>(
371378
Event, Adapter) == info::event_command_status::complete;
372-
return false;
373379
});
374380
// Release the removed events.
375381
for (auto EventIt = NewEnd; EventIt != InitEventsRef.end(); ++EventIt)
376382
Adapter->call<UrApiKind::urEventRelease>(*EventIt);
377383
// Remove them from the collection.
378384
InitEventsRef.erase(NewEnd, InitEventsRef.end());
379385
// If there are no more events, we can mark it as fully initialized.
380-
if (InitEventsRef.empty())
386+
if (InitEventsRef.empty()) {
381387
InitRef.MDeviceGlobalsFullyInitialized = true;
388+
--MDeviceGlobalNotInitializedCnt;
389+
}
382390
return InitEventsRef;
383391
} else if (InitRef.MDeviceGlobalsFullyInitialized) {
384392
// MDeviceGlobalsFullyInitialized could have been set while we were
@@ -387,7 +395,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
387395
}
388396

389397
// There were no events and it was not set as fully initialized, so this is
390-
// responsible for intializing the device globals.
398+
// responsible for initializing the device globals.
391399
auto DeviceGlobals = InitRef.MBinImage->getDeviceGlobals();
392400
std::vector<std::string> DeviceGlobalIds;
393401
DeviceGlobalIds.reserve(DeviceGlobals.size());
@@ -402,6 +410,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
402410
// globals are trivially fully initialized and we can end early.
403411
if (DeviceGlobalEntries.empty()) {
404412
InitRef.MDeviceGlobalsFullyInitialized = true;
413+
--MDeviceGlobalNotInitializedCnt;
405414
return {};
406415
}
407416

sycl/source/detail/context_impl.hpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -307,10 +307,21 @@ class context_impl {
307307
std::vector<ur_event_handle_t> MDeviceGlobalInitEvents;
308308
};
309309

310-
std::map<std::pair<ur_program_handle_t, ur_device_handle_t>,
311-
DeviceGlobalInitializer>
310+
using HandleDevicePair = std::pair<ur_program_handle_t, ur_device_handle_t>;
311+
312+
struct HandleDevicePairHash {
313+
std::size_t operator()(const HandleDevicePair &Key) const {
314+
return std::hash<ur_program_handle_t>{}(Key.first) ^
315+
std::hash<ur_device_handle_t>{}(Key.second);
316+
}
317+
};
318+
319+
std::unordered_map<HandleDevicePair, DeviceGlobalInitializer,
320+
HandleDevicePairHash>
312321
MDeviceGlobalInitializers;
313322
std::mutex MDeviceGlobalInitializersMutex;
323+
// The number of device globals that have not been initialized yet.
324+
std::atomic<size_t> MDeviceGlobalNotInitializedCnt = 0;
314325

315326
// For device_global variables that are not used in any kernel code we still
316327
// allow copy operations on them. MDeviceGlobalUnregisteredData stores the

0 commit comments

Comments
 (0)