@@ -513,22 +513,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
513
513
514
514
UR_CALL (getAsanInterceptor ()->preLaunchKernel (hKernel, hQueue, LaunchInfo));
515
515
516
- ur_event_handle_t hEvent{};
517
- ur_result_t result =
518
- pfnKernelLaunch (hQueue, hKernel, workDim, pGlobalWorkOffset,
519
- pGlobalWorkSize, LaunchInfo.LocalWorkSize .data (),
520
- numEventsInWaitList, phEventWaitList, &hEvent);
521
-
522
- if (result == UR_RESULT_SUCCESS) {
523
- UR_CALL (
524
- getAsanInterceptor ()->postLaunchKernel (hKernel, hQueue, LaunchInfo));
525
- }
516
+ UR_CALL (getContext ()->urDdiTable .Enqueue .pfnKernelLaunch (
517
+ hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
518
+ LaunchInfo.LocalWorkSize .data (), numEventsInWaitList, phEventWaitList,
519
+ phEvent));
526
520
527
- if (phEvent) {
528
- *phEvent = hEvent;
529
- }
521
+ UR_CALL (getAsanInterceptor ()->postLaunchKernel (hKernel, hQueue, LaunchInfo));
530
522
531
- return result ;
523
+ return UR_RESULT_SUCCESS ;
532
524
}
533
525
534
526
// /////////////////////////////////////////////////////////////////////////////
@@ -1410,6 +1402,57 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap(
1410
1402
return UR_RESULT_SUCCESS;
1411
1403
}
1412
1404
1405
+ ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp (
1406
+ // / [in] handle of the queue object
1407
+ ur_queue_handle_t hQueue,
1408
+ // / [in] handle of the kernel object
1409
+ ur_kernel_handle_t hKernel,
1410
+ // / [in] number of dimensions, from 1 to 3, to specify the global and
1411
+ // / work-group work-items
1412
+ uint32_t workDim,
1413
+ // / [in] pointer to an array of workDim unsigned values that specify the
1414
+ // / offset used to calculate the global ID of a work-item
1415
+ const size_t *pGlobalWorkOffset,
1416
+ // / [in] pointer to an array of workDim unsigned values that specify the
1417
+ // / number of global work-items in workDim that will execute the kernel
1418
+ // / function
1419
+ const size_t *pGlobalWorkSize,
1420
+ // / [in][optional] pointer to an array of workDim unsigned values that
1421
+ // / specify the number of local work-items forming a work-group that will
1422
+ // / execute the kernel function.
1423
+ // / If nullptr, the runtime implementation will choose the work-group size.
1424
+ const size_t *pLocalWorkSize,
1425
+ // / [in] size of the event wait list
1426
+ uint32_t numEventsInWaitList,
1427
+ // / [in][optional][range(0, numEventsInWaitList)] pointer to a list of
1428
+ // / events that must be complete before the kernel execution.
1429
+ // / If nullptr, the numEventsInWaitList must be 0, indicating that no wait
1430
+ // / event.
1431
+ const ur_event_handle_t *phEventWaitList,
1432
+ // / [out][optional][alloc] return an event object that identifies this
1433
+ // / particular kernel execution instance. If phEventWaitList and phEvent
1434
+ // / are not NULL, phEvent must not refer to an element of the
1435
+ // / phEventWaitList array.
1436
+ ur_event_handle_t *phEvent) {
1437
+
1438
+ getContext ()->logger .debug (" ==== urEnqueueCooperativeKernelLaunchExp" );
1439
+
1440
+ LaunchInfo LaunchInfo (GetContext (hQueue), GetDevice (hQueue), pGlobalWorkSize,
1441
+ pLocalWorkSize, pGlobalWorkOffset, workDim);
1442
+ UR_CALL (LaunchInfo.Data .syncToDevice (hQueue));
1443
+
1444
+ UR_CALL (getAsanInterceptor ()->preLaunchKernel (hKernel, hQueue, LaunchInfo));
1445
+
1446
+ UR_CALL (getContext ()->urDdiTable .EnqueueExp .pfnCooperativeKernelLaunchExp (
1447
+ hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
1448
+ LaunchInfo.LocalWorkSize .data (), numEventsInWaitList, phEventWaitList,
1449
+ phEvent));
1450
+
1451
+ UR_CALL (getAsanInterceptor ()->postLaunchKernel (hKernel, hQueue, LaunchInfo));
1452
+
1453
+ return UR_RESULT_SUCCESS;
1454
+ }
1455
+
1413
1456
// /////////////////////////////////////////////////////////////////////////////
1414
1457
// / @brief Intercept function for urKernelRetain
1415
1458
__urdlllocal ur_result_t UR_APICALL urKernelRetain (
@@ -1952,6 +1995,25 @@ __urdlllocal ur_result_t UR_APICALL urGetDeviceProcAddrTable(
1952
1995
return result;
1953
1996
}
1954
1997
1998
+ // /////////////////////////////////////////////////////////////////////////////
1999
+ // / @brief Exported function for filling application's EnqueueExp table
2000
+ // / with current process' addresses
2001
+ // /
2002
+ // / @returns
2003
+ // / - ::UR_RESULT_SUCCESS
2004
+ // / - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
2005
+ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable (
2006
+ // / [in,out] pointer to table of DDI function pointers
2007
+ ur_enqueue_exp_dditable_t *pDdiTable) {
2008
+ if (nullptr == pDdiTable) {
2009
+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
2010
+ }
2011
+
2012
+ pDdiTable->pfnCooperativeKernelLaunchExp =
2013
+ ur_sanitizer_layer::asan::urEnqueueCooperativeKernelLaunchExp;
2014
+ return UR_RESULT_SUCCESS;
2015
+ }
2016
+
1955
2017
template <class A , class B > struct NotSupportedApi ;
1956
2018
1957
2019
template <class MsgType , class R , class ... A>
@@ -2147,6 +2209,11 @@ ur_result_t initAsanDDITable(ur_dditable_t *dditable) {
2147
2209
UR_API_VERSION_CURRENT, &dditable->VirtualMem );
2148
2210
}
2149
2211
2212
+ if (UR_RESULT_SUCCESS == result) {
2213
+ result = ur_sanitizer_layer::asan::urGetEnqueueExpProcAddrTable (
2214
+ &dditable->EnqueueExp );
2215
+ }
2216
+
2150
2217
if (result != UR_RESULT_SUCCESS) {
2151
2218
getContext ()->logger .error (" Initialize ASAN DDI table failed: {}" , result);
2152
2219
}
0 commit comments