Skip to content

Commit 07649c3

Browse files
authored
[UR][Graph] in-order USM Memcpy regression CTS test (#18241)
Create a UR CTS test based on the E2E test [Graph/RecordReplay/usm_copy_in_order.cpp](https://github.com/intel/llvm/blob/sycl/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp) to help debug #18169 Disabled for Level-Zero v1 adapter (passes on v2) as [test fails with](https://github.com/intel/llvm/actions/runs/14733668954/job/41355110585?pr=18241): ``` [ RUN ] urCommandBufferUSMCopyInOrderTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero__Intel_R__Data_Center_GPU_Max_1100_ID0ID____________________ /home/test-user/actions-runner/sycl-ur-01/_work/llvm/llvm/unified-runtime/test/conformance/exp_command_buffer/regression/usm_copy.cpp:140: Failure Expected equality of these values: result3 Which is: 3656 output[i] Which is: 170 ``` This fails both with an without driver in-order command-lists used, indicating a separate problem in the command-buffer L0 v1 adapter code that needs investigated and debugger to re-enable this test.
1 parent f3b040c commit 07649c3

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed

unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_conformance_test_with_kernels_environment(exp_command_buffer
2121
update/event_sync.cpp
2222
update/kernel_event_sync.cpp
2323
update/local_memory_update.cpp
24+
regression/usm_copy.cpp
2425
)
2526

2627
add_subdirectory(native-command)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "../fixtures.h"
8+
9+
// UR reproducer for SYCL-Graph E2E test "RecordReplay/usm_copy_in_order.cpp"
10+
// Note that the kernel code is different, in that this test uses the
11+
// saxpy_usm kernel, but the sequence of operations is the same.
12+
struct urCommandBufferUSMCopyInOrderTest
13+
: uur::command_buffer::urCommandBufferExpExecutionTest {
14+
virtual void SetUp() override {
15+
program_name = "saxpy_usm";
16+
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp());
17+
18+
// See URLZA-521
19+
UUR_KNOWN_FAILURE_ON(uur::LevelZero{});
20+
21+
// Create in-order command-buffer
22+
ur_exp_command_buffer_desc_t desc{
23+
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype
24+
nullptr, // pNext
25+
false, // isUpdatable
26+
true, // isInOrder
27+
false // enableProfiling
28+
};
29+
ASSERT_SUCCESS(
30+
urCommandBufferCreateExp(context, device, &desc, &in_order_cmd_buf));
31+
ASSERT_NE(in_order_cmd_buf, nullptr);
32+
33+
// Create 4 device USM allocations and initialize elements to list index
34+
for (unsigned i = 0; i < device_ptrs.size(); i++) {
35+
auto &device_ptr = device_ptrs[i];
36+
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
37+
allocation_size, &device_ptr));
38+
ASSERT_NE(device_ptr, nullptr);
39+
40+
uint32_t pattern = i;
41+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptr, sizeof(pattern),
42+
&pattern, allocation_size, 0, nullptr,
43+
nullptr));
44+
}
45+
ASSERT_SUCCESS(urQueueFinish(queue));
46+
47+
// Index 0 is output
48+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[0]));
49+
// Index 1 is A
50+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A));
51+
// Index 2 is X
52+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 2, nullptr, device_ptrs[1]));
53+
// Index 3 is Y
54+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[2]));
55+
}
56+
57+
virtual void TearDown() override {
58+
for (auto &device_ptr : device_ptrs) {
59+
if (device_ptr) {
60+
EXPECT_SUCCESS(urUSMFree(context, device_ptr));
61+
}
62+
}
63+
if (in_order_cmd_buf) {
64+
EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cmd_buf));
65+
}
66+
67+
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::TearDown());
68+
}
69+
70+
ur_exp_command_buffer_handle_t in_order_cmd_buf = nullptr;
71+
static constexpr size_t global_size = 10;
72+
static constexpr size_t global_offset = 0;
73+
static constexpr size_t n_dimensions = 1;
74+
static constexpr size_t allocation_size = sizeof(uint32_t) * global_size;
75+
static constexpr uint32_t A = 42;
76+
std::array<void *, 4> device_ptrs = {nullptr, nullptr, nullptr, nullptr};
77+
};
78+
79+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCommandBufferUSMCopyInOrderTest);
80+
TEST_P(urCommandBufferUSMCopyInOrderTest, Success) {
81+
// Do an eager kernel enqueue without wait on completion
82+
// D[0] = A * D[1] + D[2]
83+
// D[0] = 42 * 1 + 2
84+
// D[0] = 44
85+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
86+
&global_offset, &global_size, nullptr, 0,
87+
nullptr, nullptr));
88+
89+
// command-buffer sync point used to enforce linear dependencies when
90+
// appending commands to the command-buffer.
91+
ur_exp_command_buffer_sync_point_t sync_point;
92+
93+
// Add SAXPY kernel node to command-buffer
94+
// D[3] = A * D[1] + D[0]
95+
// D[3] = 42 * 1 + 44
96+
// D[3] = 86
97+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[0]));
98+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[3]));
99+
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
100+
in_order_cmd_buf, kernel, n_dimensions, &global_offset, &global_size,
101+
nullptr, 0, nullptr, 0, nullptr, 0, nullptr, &sync_point, nullptr,
102+
nullptr));
103+
104+
// Add device-to-device memcpy node from output of previous command to
105+
// the X component of the expression.
106+
// D[1] = 86
107+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
108+
in_order_cmd_buf, device_ptrs[1], device_ptrs[3], allocation_size, 0,
109+
nullptr, 0, nullptr, &sync_point, nullptr, nullptr));
110+
111+
// Add SAXPY kernel node
112+
// D[3] = A * [1] + [0]
113+
// D[3] = 42 * 86 + 44
114+
// D[3] = 3656
115+
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
116+
in_order_cmd_buf, kernel, n_dimensions, &global_offset, &global_size,
117+
nullptr, 0, nullptr, 1, &sync_point, 0, nullptr, &sync_point, nullptr,
118+
nullptr));
119+
120+
// Add device-to-device memcpy node from output of previous command to
121+
// currently unused USM allocation.
122+
// D[2] = 3656
123+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
124+
in_order_cmd_buf, device_ptrs[2], device_ptrs[3], allocation_size, 1,
125+
&sync_point, 0, nullptr, &sync_point, nullptr, nullptr));
126+
127+
// Add device-to-host memcpy node
128+
std::vector<uint32_t> output(global_size);
129+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
130+
in_order_cmd_buf, output.data(), device_ptrs[2], allocation_size, 1,
131+
&sync_point, 0, nullptr, &sync_point, nullptr, nullptr));
132+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cmd_buf));
133+
134+
ASSERT_SUCCESS(
135+
urEnqueueCommandBufferExp(queue, in_order_cmd_buf, 0, nullptr, nullptr));
136+
ASSERT_SUCCESS(urQueueFinish(queue));
137+
138+
// Verify
139+
constexpr uint32_t result1 = A * 1 + 2; // eager kernel submission
140+
constexpr uint32_t result2 = A * 1 + result1; // first kernel command
141+
constexpr uint32_t result3 = A * result2 + result1; // second kernel command
142+
for (size_t i = 0; i < global_size; i++) {
143+
ASSERT_EQ(result3, output[i]);
144+
}
145+
}

0 commit comments

Comments
 (0)