14
14
15
15
namespace v2 {
16
16
17
- template <size_t N>
18
- std::array<ur_command_list_manager, N> createCommandListManagers (
19
- ur_context_handle_t hContext, ur_device_handle_t hDevice, uint32_t ordinal,
20
- ze_command_queue_priority_t priority, std::optional<int32_t > index) {
21
- return createArrayOf<ur_command_list_manager, N>([&](size_t ) {
22
- return ur_command_list_manager (
23
- hContext, hDevice,
24
- hContext->getCommandListCache ().getImmediateCommandList (
25
- hDevice->ZeDevice ,
26
- {true , ordinal, true /* always enable copy offload */ },
27
- ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, priority, index));
28
- });
29
- }
30
-
31
17
ur_queue_immediate_out_of_order_t::ur_queue_immediate_out_of_order_t (
32
18
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint32_t ordinal,
33
19
ze_command_queue_priority_t priority, std::optional<int32_t > index,
34
20
event_flags_t eventFlags, ur_queue_flags_t flags)
35
21
: hContext(hContext), hDevice(hDevice),
36
22
eventPool (hContext->getEventPoolCache (PoolCacheType::Immediate)
37
23
.borrow(hDevice->Id.value(), eventFlags)),
38
- commandListManagers(createCommandListManagers<numCommandLists>(
39
- hContext, hDevice, ordinal, priority, index)),
40
- flags(flags) {
41
- for (size_t i = 0 ; i < numCommandLists; i++) {
42
- barrierEvents[i] = eventPool->allocate ();
43
- }
44
- }
24
+ commandListManager(
25
+ hContext, hDevice,
26
+ hContext->getCommandListCache ().getImmediateCommandList(
27
+ hDevice->ZeDevice,
28
+ {false , ordinal, true /* always enable copy offload */ },
29
+ ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, priority, index)),
30
+ flags(flags) {}
31
+
32
+ ur_queue_immediate_out_of_order_t ::ur_queue_immediate_out_of_order_t (
33
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
34
+ raii::command_list_unique_handle commandListHandle,
35
+ event_flags_t eventFlags, ur_queue_flags_t flags)
36
+ : hContext(hContext), hDevice(hDevice),
37
+ eventPool(hContext->getEventPoolCache (PoolCacheType::Immediate)
38
+ .borrow(hDevice->Id.value(), eventFlags)),
39
+ commandListManager(hContext, hDevice, std::move(commandListHandle)),
40
+ flags(flags) {}
45
41
46
42
ur_result_t ur_queue_immediate_out_of_order_t::queueGetInfo (
47
43
ur_queue_info_t propName, size_t propSize, void *pPropValue,
@@ -72,13 +68,7 @@ ur_result_t ur_queue_immediate_out_of_order_t::queueGetInfo(
72
68
}
73
69
};
74
70
75
- auto commandListManagersLocked = commandListManagers.lock ();
76
-
77
- bool empty = std::all_of (
78
- commandListManagersLocked->begin (), commandListManagersLocked->end (),
79
- [&](auto &cmdListManager) {
80
- return isCmdListEmpty (cmdListManager.getZeCommandList ());
81
- });
71
+ bool empty = isCmdListEmpty (commandListManager.lock ()->getZeCommandList ());
82
72
83
73
return ReturnValue (empty);
84
74
}
@@ -96,8 +86,7 @@ ur_result_t ur_queue_immediate_out_of_order_t::queueGetInfo(
96
86
ur_result_t ur_queue_immediate_out_of_order_t::queueGetNativeHandle (
97
87
ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) {
98
88
*phNativeQueue = reinterpret_cast <ur_native_handle_t >(
99
- (*commandListManagers.get_no_lock ())[getNextCommandListId ()]
100
- .getZeCommandList ());
89
+ commandListManager.get_no_lock ()->getZeCommandList ());
101
90
if (pDesc && pDesc->pNativeData ) {
102
91
// pNativeData == isImmediateQueue
103
92
*(reinterpret_cast <int32_t *>(pDesc->pNativeData )) = 1 ;
@@ -108,13 +97,15 @@ ur_result_t ur_queue_immediate_out_of_order_t::queueGetNativeHandle(
108
97
ur_result_t ur_queue_immediate_out_of_order_t::queueFinish () {
109
98
TRACK_SCOPE_LATENCY (" ur_queue_immediate_out_of_order_t::queueFinish" );
110
99
111
- auto commandListManagersLocked = commandListManagers.lock ();
100
+ auto commandListManagerLocked = commandListManager.lock ();
101
+
102
+ ZE2UR_CALL (zeCommandListHostSynchronize,
103
+ (commandListManagerLocked->getZeCommandList (), UINT64_MAX));
112
104
113
- for (size_t i = 0 ; i < numCommandLists; i++) {
114
- ZE2UR_CALL (zeCommandListHostSynchronize,
115
- (commandListManagersLocked[i].getZeCommandList (), UINT64_MAX));
116
- UR_CALL (commandListManagersLocked[i].releaseSubmittedKernels ());
105
+ for (auto &event : pendingEvents) {
106
+ event->release ();
117
107
}
108
+ pendingEvents.clear ();
118
109
119
110
hContext->getAsyncPool ()->cleanupPoolsForQueue (this );
120
111
hContext->forEachUsmPool ([this ](ur_usm_pool_handle_t hPool) {
@@ -132,10 +123,6 @@ ur_result_t ur_queue_immediate_out_of_order_t::queueFlush() {
132
123
ur_queue_immediate_out_of_order_t ::~ur_queue_immediate_out_of_order_t () {
133
124
try {
134
125
UR_CALL_THROWS (queueFinish ());
135
-
136
- for (size_t i = 0 ; i < numCommandLists; i++) {
137
- barrierEvents[i]->release ();
138
- }
139
126
} catch (...) {
140
127
// Ignore errors during destruction
141
128
}
@@ -146,43 +133,9 @@ ur_result_t ur_queue_immediate_out_of_order_t::enqueueEventsWaitWithBarrier(
146
133
ur_event_handle_t *phEvent) {
147
134
TRACK_SCOPE_LATENCY (
148
135
" ur_queue_immediate_out_of_order_t::enqueueEventsWaitWithBarrier" );
149
- // Since we use L0 in-order command lists, we don't need a real L0 barrier,
150
- // just wait for requested events in potentially different queues and add a
151
- // "barrier" event signal because it is already guaranteed that previous
152
- // commands in this queue are completed when the signal is started. However,
153
- // we do need to use barrier if profiling is enabled: see
154
- // zeCommandListAppendWaitOnEvents
155
- bool needsRealBarrier = (flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0 ;
156
- auto barrierFn = needsRealBarrier
157
- ? &ur_command_list_manager::appendEventsWaitWithBarrier
158
- : &ur_command_list_manager::appendEventsWait;
159
-
160
- auto commandListManagersLocked = commandListManagers.lock ();
161
-
162
- // Enqueue wait for the user-provider events on the first command list.
163
- UR_CALL (commandListManagersLocked[0 ].appendEventsWait (
164
- numEventsInWaitList, phEventWaitList, barrierEvents[0 ]));
165
-
166
- // Request barrierEvents[id] to be signaled on remaining command lists.
167
- for (size_t id = 1 ; id < numCommandLists; id++) {
168
- UR_CALL (commandListManagersLocked[id].appendEventsWait (0 , nullptr ,
169
- barrierEvents[id]));
170
- }
171
-
172
- // Enqueue barriers on all command lists by waiting on barrierEvents.
173
-
174
- if (phEvent) {
175
- UR_CALL (
176
- std::invoke (barrierFn, commandListManagersLocked[0 ], numCommandLists,
177
- barrierEvents.data (),
178
- createEventIfRequested (eventPool.get (), phEvent, this )));
179
- }
180
-
181
- for (size_t id = phEvent ? 1 : 0 ; id < numCommandLists; id++) {
182
- UR_CALL (std::invoke (barrierFn, commandListManagersLocked[0 ],
183
- numCommandLists, barrierEvents.data (), nullptr ));
184
- }
185
-
136
+ UR_CALL (commandListManager.lock ()->appendEventsWaitWithBarrier (
137
+ numEventsInWaitList, phEventWaitList,
138
+ createEventAndStoreIfRequested (phEvent)));
186
139
return UR_RESULT_SUCCESS;
187
140
}
188
141
0 commit comments