From 5f06c131a779f561fcac749d34b13e6022ba8baa Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 24 Apr 2025 13:56:14 +0100 Subject: [PATCH] [SYCL][Graph][OpenCL] Map copy/fill to SVM There is currently no cl_khr_command_buffer or cl_intel_unified_shared_memory entry-point for appending USM copy or fill commands to a command-buffer. This prevents these commands from being added to a graph for the OpenCL backend. The long term solution to this is the OpenCL USVM extension which will align USM and SVM, allowing the existing SVM entry-points to be used. To prepare for this, map the UR entry-points to the cl_khr_command_buffer SVM copy/fill commands. This will work on OpenCL implementations that share an implementation for USM and SVM. Signed-off-by: Ewan Crawford --- sycl/doc/design/CommandGraph.md | 18 +++-- sycl/source/detail/memory_manager.cpp | 14 +++- sycl/test-e2e/Graph/Explicit/usm_copy.cpp | 4 - sycl/test-e2e/Graph/Explicit/usm_fill.cpp | 3 - .../test-e2e/Graph/Explicit/usm_fill_host.cpp | 3 - .../Graph/Explicit/usm_fill_shared.cpp | 3 - sycl/test-e2e/Graph/Explicit/usm_memset.cpp | 3 - .../RecordReplay/barrier_multi_graph.cpp | 3 - .../RecordReplay/barrier_multi_queue.cpp | 3 - .../ext_oneapi_enqueue_functions.cpp | 3 - ...pi_enqueue_functions_submit_with_event.cpp | 3 - .../RecordReplay/transitive_queue_barrier.cpp | 3 - sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp | 3 - .../Graph/RecordReplay/usm_copy_in_order.cpp | 4 - sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp | 3 - .../Graph/RecordReplay/usm_fill_host.cpp | 3 - .../Graph/RecordReplay/usm_fill_shared.cpp | 3 - .../Graph/RecordReplay/usm_memset.cpp | 3 - .../RecordReplay/usm_memset_shortcut.cpp | 3 - .../Graph/ValidUsage/linear_graph_copy.cpp | 3 - .../ValidUsage/linear_graph_copy_D2H.cpp | 2 - .../no-unsupported-without-info.cpp | 14 +--- .../source/adapters/opencl/command_buffer.cpp | 74 +++++++++++++------ .../source/adapters/opencl/common.hpp | 17 +++++ .../adapters/opencl/extension_functions.def | 2 + .../exp_command_buffer/commands.cpp | 6 -- .../conformance/exp_command_buffer/fill.cpp | 7 +- 27 files changed, 96 insertions(+), 114 deletions(-) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 1b81e68e408a..4e5b80b593e4 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -626,8 +626,8 @@ adapter where there is matching support for each function in the list. | urCommandBufferReleaseExp | clReleaseCommandBufferKHR | Yes | | urCommandBufferFinalizeExp | clFinalizeCommandBufferKHR | Yes | | urCommandBufferAppendKernelLaunchExp | clCommandNDRangeKernelKHR | Yes | -| urCommandBufferAppendUSMMemcpyExp | | No | -| urCommandBufferAppendUSMFillExp | | No | +| urCommandBufferAppendUSMMemcpyExp | clCommandSVMMemcpyKHR | Partial, [see below](#unsupported-command-types) | +| urCommandBufferAppendUSMFillExp | clCommandSVMMemFillKHR | Partial, [see below](#unsupported-command-types) | | urCommandBufferAppendMembufferCopyExp | clCommandCopyBufferKHR | Yes | | urCommandBufferAppendMemBufferWriteExp | | No | | urCommandBufferAppendMemBufferReadExp | | No | @@ -643,8 +643,6 @@ adapter where there is matching support for each function in the list. | | clCommandCopyImageToBufferKHR | No | | | clCommandFillImageKHR | No | | | clGetCommandBufferInfoKHR | No | -| | clCommandSVMMemcpyKHR | No | -| | clCommandSVMMemFillKHR | No | | urCommandBufferUpdateKernelLaunchExp | clUpdateMutableCommandsKHR | Partial [See Update Section](#update-support) | We are looking to address these gaps in the future so that SYCL-Graph can be @@ -664,17 +662,21 @@ terminate called after throwing an instance of 'sycl::_V1::exception' what(): USM copy command not supported by graph backend ``` -The types of commands which are unsupported, and lead to this exception are: +The types of commands which are unsupported, and may lead to this exception are: * `handler::copy(src, dest)` - Where `src` is an accessor and `dest` is a pointer. This corresponds to a memory buffer read command. * `handler::copy(src, dest)` - Where `src` is an pointer and `dest` is an accessor. This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and - `dest` are USM pointers. This corresponds to a USM copy command. + `dest` are USM pointers. This corresponds to a USM copy command that is + mapped to `clCommandSVMMemcpyKHR`, which will only work on OpenCL devices + which don't differentiate between USM and SVM. * `handler::fill(ptr, pattern, count)` - This corresponds to a USM memory - fill command. + fill command that is mapped to `clCommandSVMMemFillKHR`, which will only work + on OpenCL devices which don't differentiate between USM and SVM. * `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory - fill command. + fill command that is mapped to `clCommandSVMMemFillKHR`, which will only work + on OpenCL devices which don't differentiate between USM and SVM. * `handler::prefetch()`. * `handler::mem_advise()`. diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 3d8b53c454a3..d48c09a4809e 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1567,9 +1567,17 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( "NULL pointer argument in memory fill operation."); const AdapterPtr &Adapter = Context->getAdapter(); - Adapter->call( - CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), - Deps.data(), 0, nullptr, OutSyncPoint, nullptr, nullptr); + ur_result_t Result = + Adapter->call_nocheck( + CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, + Deps.size(), Deps.data(), 0, nullptr, OutSyncPoint, nullptr, nullptr); + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "USM fill command not supported by graph backend"); + } else { + Adapter->checkUrResult(Result); + } } void MemoryManager::ext_oneapi_fill_cmd_buffer( diff --git a/sycl/test-e2e/Graph/Explicit/usm_copy.cpp b/sycl/test-e2e/Graph/Explicit/usm_copy.cpp index 442659c9712c..01e16533bd04 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_copy.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_copy.cpp @@ -4,10 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// -// Intended - USM copy command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp index 0d45fa44a38c..0059161fa7f3 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp @@ -4,9 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp index 7b86c4cc4796..acadbf01e0f9 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_host_allocations -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp index e9848da8dbd1..4752b1fdbbfc 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_shared_allocations -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_memset.cpp b/sycl/test-e2e/Graph/Explicit/usm_memset.cpp index 6c15036bb0b3..00f07258ee47 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_memset.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_memset.cpp @@ -5,9 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Intended - USM memset command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_memset.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp index c21905c2ae3b..21837671866d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp @@ -4,9 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp index 99b0dc6c57fd..675ea8105447 100644 --- a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp @@ -4,9 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp index e60ee6db3c29..6c6fe20337da 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp @@ -7,9 +7,6 @@ // Tests the enqueue free function kernel shortcuts. -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL - #include "../graph_common.hpp" #include #include diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp index 3dc3090acb72..4b8294be7e98 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp @@ -8,9 +8,6 @@ // Tests the enqueue free function using USM and submit_with_event for // dependencies -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL - #include "../graph_common.hpp" #include diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue_barrier.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue_barrier.cpp index 6aa32b4c42cc..efa1ea3a9afa 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue_barrier.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue_barrier.cpp @@ -9,9 +9,6 @@ // event is passed as a dependency to a barrier operation in a different // in-order queue. -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM fill command not supported for OpenCL - #include "../graph_common.hpp" #include diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp index ddbbea1c1182..8666fb4d8f82 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp @@ -4,9 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Intended - USM copy command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp index c519a820a39b..10e4427fa877 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp @@ -4,10 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// -// Intended - USM copy command not supported for OpenCL -// UNSUPPORTED: opencl // Tests memcpy operation using device USM and an in-order queue. diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp index 2d2139991211..bdc116f11112 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp @@ -4,9 +4,6 @@ // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp index 34a43e14ac49..07acad25287a 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_host_allocations -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp index ca0f4925bf5d..7afbf67ebe78 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_shared_allocations -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp index b3963296a90d..ab848c52bc9d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp @@ -5,9 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Intended - USM memset command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_memset.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp index 486a674b3ad1..9506e99cd73a 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -3,9 +3,6 @@ // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{ %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // -// Intended - USM fill command not supported for OpenCL -// UNSUPPORTED: opencl -// // Tests adding a USM memset queue shortcut operation as a graph node. #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp b/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp index 8644addc04bf..d10fafe54bc4 100644 --- a/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp +++ b/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp @@ -12,9 +12,6 @@ // Tests that the optimization to use the L0 Copy Engine for memory commands // does not interfere with the linear graph optimization -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL - #include "../graph_common.hpp" #include diff --git a/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy_D2H.cpp b/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy_D2H.cpp index 4bfc9c40d547..d5ad10257d23 100644 --- a/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy_D2H.cpp +++ b/sycl/test-e2e/Graph/ValidUsage/linear_graph_copy_D2H.cpp @@ -13,8 +13,6 @@ // does not interfere with the linear graph optimization // // REQUIRES: aspect-usm_host_allocations -// UNSUPPORTED: opencl -// UNSUPPORTED-INTENDED: USM memcpy command not supported for OpenCL #include "../graph_common.hpp" diff --git a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp index b7a3bd1abf0e..b5240a486d4d 100644 --- a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp +++ b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp @@ -54,7 +54,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 273 +// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 261 // // List of improperly UNSUPPORTED tests. // Remove the CHECK once the test has been properly UNSUPPORTED. @@ -165,11 +165,6 @@ // CHECK-NEXT: Graph/Explicit/prefetch.cpp // CHECK-NEXT: Graph/Explicit/spec_constants_handler_api.cpp // CHECK-NEXT: Graph/Explicit/spec_constants_kernel_bundle_api.cpp -// CHECK-NEXT: Graph/Explicit/usm_copy.cpp -// CHECK-NEXT: Graph/Explicit/usm_fill.cpp -// CHECK-NEXT: Graph/Explicit/usm_fill_host.cpp -// CHECK-NEXT: Graph/Explicit/usm_fill_shared.cpp -// CHECK-NEXT: Graph/Explicit/usm_memset.cpp // CHECK-NEXT: Graph/Explicit/work_group_size_prop.cpp // CHECK-NEXT: Graph/RecordReplay/buffer_copy_host2target.cpp // CHECK-NEXT: Graph/RecordReplay/buffer_copy_host2target_2d.cpp @@ -185,13 +180,6 @@ // CHECK-NEXT: Graph/RecordReplay/prefetch.cpp // CHECK-NEXT: Graph/RecordReplay/spec_constants_handler_api.cpp // CHECK-NEXT: Graph/RecordReplay/spec_constants_kernel_bundle_api.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_copy.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_copy_in_order.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_fill.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_fill_host.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_fill_shared.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_memset.cpp -// CHECK-NEXT: Graph/RecordReplay/usm_memset_shortcut.cpp // CHECK-NEXT: Graph/RecordReplay/work_group_size_prop.cpp // CHECK-NEXT: Graph/UnsupportedDevice/device_query.cpp // CHECK-NEXT: GroupAlgorithm/SYCL2020/reduce_over_group_size.cpp diff --git a/unified-runtime/source/adapters/opencl/command_buffer.cpp b/unified-runtime/source/adapters/opencl/command_buffer.cpp index 41b0706a99c3..9c8c941ab4de 100644 --- a/unified-runtime/source/adapters/opencl/command_buffer.cpp +++ b/unified-runtime/source/adapters/opencl/command_buffer.cpp @@ -204,33 +204,59 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, - [[maybe_unused]] size_t size, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, - [[maybe_unused]] ur_event_handle_t *phEvent, - [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { + // No extension entry-point exists for USM memcpy, use SVM memcpy in + // preparation for USVM. + cl_context CLContext = hCommandBuffer->hContext->CLContext; + cl_ext::clCommandSVMMemcpyKHR_fn clCommandSVMMemcpyKHR = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, ur::cl::getAdapter()->fnCache.clCommandSVMMemcpyKHRCache, + cl_ext::CommandSVMMemcpyName, &clCommandSVMMemcpyKHR)); + + const bool IsInOrder = hCommandBuffer->IsInOrder; + cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint; + const cl_sync_point_khr *SyncPointWaitList = + IsInOrder ? nullptr : pSyncPointWaitList; + uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList; + CL_RETURN_ON_FAILURE(clCommandSVMMemcpyKHR( + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, pDst, pSrc, size, + WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, - [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, - [[maybe_unused]] ur_event_handle_t *phEvent, - [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { + // No extension entry-point exists for USM fill, use SVM fill in preparation + // for USVM. + cl_context CLContext = hCommandBuffer->hContext->CLContext; + cl_ext::clCommandSVMMemFillKHR_fn clCommandSVMMemFillKHR = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, ur::cl::getAdapter()->fnCache.clCommandSVMMemFillKHRCache, + cl_ext::CommandSVMMemFillName, &clCommandSVMMemFillKHR)); + + const bool IsInOrder = hCommandBuffer->IsInOrder; + cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint; + const cl_sync_point_khr *SyncPointWaitList = + IsInOrder ? nullptr : pSyncPointWaitList; + uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList; + CL_RETURN_ON_FAILURE( + clCommandSVMMemFillKHR(hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + pMemory, pPattern, patternSize, size, WaitListSize, + SyncPointWaitList, RetSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( diff --git a/unified-runtime/source/adapters/opencl/common.hpp b/unified-runtime/source/adapters/opencl/common.hpp index 0be4851cc785..a42297b43b4c 100644 --- a/unified-runtime/source/adapters/opencl/common.hpp +++ b/unified-runtime/source/adapters/opencl/common.hpp @@ -201,6 +201,8 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char CommandBarrierWithWaitListName[] = "clCommandBarrierWithWaitListKHR"; +CONSTFIX char CommandSVMMemcpyName[] = "clCommandSVMMemcpyKHR"; +CONSTFIX char CommandSVMMemFillName[] = "clCommandSVMMemFillKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; @@ -296,6 +298,21 @@ using clCommandBarrierWithWaitListKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( const cl_sync_point_khr *sync_point_wait_list, cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); +using clCommandSVMMemcpyKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_command_properties_khr *properties, void *dst_ptr, + const void *src_ptr, size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandSVMMemFillKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_command_properties_khr *properties, void *svm_ptr, + const void *pattern, size_t pattern_size, size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, cl_command_buffer_khr command_buffer, diff --git a/unified-runtime/source/adapters/opencl/extension_functions.def b/unified-runtime/source/adapters/opencl/extension_functions.def index ecef132bcf42..c7b4861807d9 100644 --- a/unified-runtime/source/adapters/opencl/extension_functions.def +++ b/unified-runtime/source/adapters/opencl/extension_functions.def @@ -22,6 +22,8 @@ CL_EXTENSION_FUNC(clCommandCopyBufferKHR) CL_EXTENSION_FUNC(clCommandCopyBufferRectKHR) CL_EXTENSION_FUNC(clCommandFillBufferKHR) CL_EXTENSION_FUNC(clCommandBarrierWithWaitListKHR) +CL_EXTENSION_FUNC(clCommandSVMMemFillKHR) +CL_EXTENSION_FUNC(clCommandSVMMemcpyKHR) CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR) CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR) CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR) diff --git a/unified-runtime/test/conformance/exp_command_buffer/commands.cpp b/unified-runtime/test/conformance/exp_command_buffer/commands.cpp index 85b0ba0a7143..74db0f4fe42d 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/commands.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/commands.cpp @@ -56,18 +56,12 @@ struct urCommandBufferCommandsTest UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCommandBufferCommandsTest); TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMMemcpyExp) { - // No USM memcpy command in cl_khr_command_buffer - UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); - ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMFillExp) { - // No USM fill command in cl_khr_command_buffer - UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); - uint32_t pattern = 42; ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), diff --git a/unified-runtime/test/conformance/exp_command_buffer/fill.cpp b/unified-runtime/test/conformance/exp_command_buffer/fill.cpp index 1b22a9bba572..fd2bda518da7 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/fill.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/fill.cpp @@ -155,8 +155,11 @@ TEST_P(urCommandBufferFillCommandsTest, ExecuteTwice) { } TEST_P(urCommandBufferFillCommandsTest, USM) { - // No USM fill command in cl_khr_command_buffer - UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); + if (pattern_size > 128) { + // clCommandSVMMemFillKHR returns an error according to the spec if pattern + // size is not one of {1, 2, 4, 8, 16, 32, 64, 128} + UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); + } ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( cmd_buf_handle, device_ptr, pattern.data(), pattern_size, size, 0,