From 7fee22d715e900ab3ac81420053f8cc81a2dc484 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:28:17 +0200 Subject: [PATCH 01/14] Remove _ONEDPL_NO_INIT_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 83c44a8a07d..07d4031c90c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_NO_INIT_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) @@ -97,12 +96,7 @@ namespace __dpl_sycl { -using __no_init = -#if _ONEDPL_NO_INIT_PRESENT - sycl::property::no_init; -#else - sycl::property::noinit; -#endif +using __no_init = sycl::property::no_init; #if _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT template From f852d8185d3d3a7adb3f73c8f3dc0a5751cb7fc2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:31:51 +0200 Subject: [PATCH 02/14] Remove _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) Signed-off-by: Sergey Kopienko --- .../dpl/internal/reduce_by_segment_impl.h | 23 ++++--------------- .../dpl/internal/scan_by_segment_impl.h | 10 ++------ .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 10 ++------ .../dpcpp/parallel_backend_sycl_radix_sort.h | 15 +++--------- .../dpcpp/parallel_backend_sycl_reduce.h | 5 +--- .../dpcpp/parallel_backend_sycl_utils.h | 14 +---------- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 1 - 7 files changed, 14 insertions(+), 64 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index d0979554786..62acc9dc83d 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -281,15 +281,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) { oneapi::dpl::__ranges::__require_access(__cgh, __keys); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceCountKernel>( - sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=]( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_count_kernel, -#endif - sycl::nd_item<1> __item) { + sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::size_t __group_id = __item.get_group(0); ::std::size_t __local_id = __item.get_local_id(0); @@ -319,13 +315,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __cgh.depends_on(__seg_end_identification); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceOffsetKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_offset_kernel, -#endif sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_acc); auto __out_beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_scan_acc); @@ -342,13 +335,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __partials_acc = __partials.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_wg_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { ::std::array<__val_type, __vals_per_item> __loc_partials; @@ -465,13 +455,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh); __cgh.depends_on(__wg_reduce); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReducePrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_prefix_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::int64_t __group_id = __item.get_group(0); diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index b895561baeb..dbf3807bc7b 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -164,13 +164,10 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_scan_wg_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { __val_type __accumulator = __identity; @@ -268,13 +265,10 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh); __dpl_sycl::__local_accessor __loc_seg_ends_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanPrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_scan_prefix_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::size_t __group_id = __item.get_group(0); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index c0f3e9ad2d6..3f72a42e9f0 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -321,13 +321,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle()); #endif __cgh.parallel_for<_LocalScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel_1, -#endif sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); __local_scan(__item, __n, __local_acc, __rng1, __rng2, __temp_ptr, __size_per_wg, __wgroup_size, @@ -342,13 +339,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name __cgh.depends_on(__submit_event); auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle()); #endif __cgh.parallel_for<_GroupScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel_2, -#endif // TODO: try to balance work between several workgroups instead of one sycl::nd_range<1>(__wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index b6ee2c4f3b9..b5fa7aa2bbc 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -198,13 +198,10 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, : oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng); // an accessor per work-group with value counters from each work-item auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) { // item info const ::std::size_t __self_lidx = __self_item.get_local_id(0); @@ -299,13 +296,10 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size __hdl.depends_on(__dependency_event); // access the counters for all work groups oneapi::dpl::__ranges::__require_access(__hdl, __count_rng); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) { // find borders of a region with a specific bucket id sycl::global_ptr<_CountT> __begin = __count_rng.begin() + __scan_size * __self_item.get_group(0); @@ -544,13 +538,10 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif //Each SYCL work group processes one data segment. sycl::nd_range<1>(__segments * __sg_size, __sg_size), [=](sycl::nd_item<1> __self_item) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index ca776e94dce..4c9974417b7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -365,13 +365,10 @@ struct __parallel_transform_reduce_impl oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); __dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_ReduceKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 9bd195a80a9..39ca5402cfc 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -267,7 +267,7 @@ class __kernel_compiler static_assert(__kernel_count > 0, "At least one kernel name should be provided"); public: -#if _ONEDPL_KERNEL_BUNDLE_PRESENT + template static auto __compile(_Exec&& __exec) @@ -290,18 +290,6 @@ class __kernel_compiler { return __kernel_array_type{__kernel_bundle.get_kernel(__kernel_ids[_Ip])...}; } -#else - template - static auto - __compile(_Exec&& __exec) - { - sycl::program __program(__exec.queue().get_context()); - - using __return_type = std::conditional_t<(__kernel_count > 1), __kernel_array_type, sycl::kernel>; - return __return_type{ - (__program.build_with_kernel_type<_KernelNames>(), __program.get_kernel<_KernelNames>())...}; - } -#endif }; #if _ONEDPL_DEBUG_SYCL diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 07d4031c90c..0b3213dfb37 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) From 18a5e5efea37cd08c267ea8bde70d4dbd2805141 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:33:12 +0200 Subject: [PATCH 03/14] Remove _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) Signed-off-by: Sergey Kopienko --- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 0b3213dfb37..f5732e4e49a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) @@ -181,143 +180,91 @@ template constexpr auto __group_broadcast(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::group_broadcast(__args...); -#else - return sycl::ONEAPI::broadcast(__args...); -#endif } template constexpr auto __exclusive_scan_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::exclusive_scan_over_group(__args...); -#else - return sycl::ONEAPI::exclusive_scan(__args...); -#endif } template constexpr auto __inclusive_scan_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::inclusive_scan_over_group(__args...); -#else - return sycl::ONEAPI::inclusive_scan(__args...); -#endif } template constexpr auto __reduce_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::reduce_over_group(__args...); -#else - return sycl::ONEAPI::reduce(__args...); -#endif } template constexpr auto __any_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::any_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __all_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::all_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __none_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::none_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_exclusive_scan(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_exclusive_scan(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::exclusive_scan(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_inclusive_scan(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_inclusive_scan(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::inclusive_scan(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_reduce(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_reduce(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::reduce(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_any_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_any_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_all_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_all_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_none_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_none_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); -#endif } #if _ONEDPL_FPGA_DEVICE From 36e1759c964beb23ad51c7f60e2f0bcf98bf3e1b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:34:00 +0200 Subject: [PATCH 04/14] Remove _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index f5732e4e49a..efa65eb3572 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) #define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) @@ -96,21 +95,12 @@ namespace __dpl_sycl using __no_init = sycl::property::no_init; -#if _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT template using __known_identity = sycl::known_identity<_BinaryOp, _T>; template using __has_known_identity = sycl::has_known_identity<_BinaryOp, _T>; -#elif _ONEDPL_LIBSYCL_VERSION == 50200 -template -using __known_identity = sycl::ONEAPI::known_identity<_BinaryOp, _T>; - -template -using __has_known_identity = sycl::ONEAPI::has_known_identity<_BinaryOp, _T>; -#endif // _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT - template inline constexpr auto __known_identity_v = __known_identity<_BinaryOp, _T>::value; From 4974b185d9184b22b5165cb9967b57e683377042 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:34:59 +0200 Subject: [PATCH 05/14] Remove _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index efa65eb3572..b61b0249128 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) #define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) #define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) #define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) @@ -104,7 +103,6 @@ using __has_known_identity = sycl::has_known_identity<_BinaryOp, _T>; template inline constexpr auto __known_identity_v = __known_identity<_BinaryOp, _T>::value; -#if _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT template using __plus = sycl::plus<_T>; @@ -113,16 +111,6 @@ using __maximum = sycl::maximum<_T>; template using __minimum = sycl::minimum<_T>; -#else // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT -template -using __plus = sycl::ONEAPI::plus<_T>; - -template -using __maximum = sycl::ONEAPI::maximum<_T>; - -template -using __minimum = sycl::ONEAPI::minimum<_T>; -#endif // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT #if _ONEDPL_SYCL_SUB_GROUP_PRESENT using __sub_group = sycl::sub_group; From 719ba6a6f89e416a3e0df4b6ad51d970bfc87932 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:37:02 +0200 Subject: [PATCH 06/14] Remove _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index b61b0249128..520f98d989d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -45,7 +45,6 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) #define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) #define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) #define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN \ @@ -307,15 +306,7 @@ using __buffer_allocator = #endif template -#if _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT using __atomic_ref = sycl::atomic_ref<_AtomicType, sycl::memory_order::relaxed, sycl::memory_scope::work_group, _Space>; -#else -struct __atomic_ref : sycl::atomic<_AtomicType, _Space> -{ - explicit __atomic_ref(_AtomicType& ref) - : sycl::atomic<_AtomicType, _Space>(sycl::multi_ptr<_AtomicType, _Space>(&ref)){}; -}; -#endif // _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT template using __local_accessor = From 4e9e6d47d96e4f9b0c50882fb39fb4ad8baa7b48 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 13:28:19 +0200 Subject: [PATCH 07/14] Remove _ONEDPL_LIBSYCL_VERSION >= 50200 --- .../dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index d180e00fc27..f555de19dd1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -49,7 +49,6 @@ inline constexpr bool __can_use_known_identity = template using __has_known_identity = ::std::conditional_t< __can_use_known_identity<_Tp>, -# if _ONEDPL_LIBSYCL_VERSION >= 50200 typename ::std::disjunction< __dpl_sycl::__has_known_identity<_BinaryOp, _Tp>, ::std::conjunction<::std::is_arithmetic<_Tp>, @@ -61,14 +60,6 @@ using __has_known_identity = ::std::conditional_t< ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum>>>>, -# else //_ONEDPL_LIBSYCL_VERSION >= 50200 - typename ::std::conjunction< - ::std::is_arithmetic<_Tp>, - ::std::disjunction<::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<_Tp>>, - ::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus>, - ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<_Tp>>, - ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus>>>, -# endif //_ONEDPL_LIBSYCL_VERSION >= 50200 ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false #else //_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL) @@ -89,12 +80,7 @@ struct __known_identity_for_plus }; template -inline constexpr _Tp __known_identity = -#if _ONEDPL_LIBSYCL_VERSION >= 50200 - __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; -#else //_ONEDPL_LIBSYCL_VERSION >= 50200 - __known_identity_for_plus<_BinaryOp, _Tp>::value; //for plus only -#endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +inline constexpr _Tp __known_identity = __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; template struct walk_n From 820623edb75f7d0c958315ca1debab4527da7b3f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:39:38 +0200 Subject: [PATCH 08/14] Remove _ONEDPL_LIBSYCL_VERSION >= 50300 --- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 25 ++----------------- include/oneapi/dpl/pstl/utils.h | 2 +- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 520f98d989d..aae08fad7d7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -65,11 +65,7 @@ # endif #endif // _ONEDPL_DETECT_SPIRV_COMPILATION -#if _ONEDPL_LIBSYCL_VERSION >= 50300 -# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) -#else -# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) intel::reqd_sub_group_size(SIZE) -#endif +#define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) // This macro is intended to be used for specifying a subgroup size as a SYCL kernel attribute for SPIR-V targets // only. For non-SPIR-V targets, it will be empty. This macro should only be used in device code and may lead @@ -121,22 +117,14 @@ template constexpr auto __get_buffer_size(const _Buffer& __buffer) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 return __buffer.size(); -#else - return __buffer.get_count(); -#endif } template constexpr auto __get_accessor_size(const _Accessor& __accessor) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 return __accessor.size(); -#else - return __accessor.get_count(); -#endif } template @@ -255,7 +243,7 @@ inline auto __fpga_selector() return sycl::ext::intel::fpga_selector_v; } -# elif _ONEDPL_LIBSYCL_VERSION >= 50300 +# else inline auto __fpga_emulator_selector() { return sycl::ext::intel::fpga_emulator_selector{}; @@ -264,15 +252,6 @@ inline auto __fpga_selector() { return sycl::ext::intel::fpga_selector{}; } -# else -inline auto __fpga_emulator_selector() -{ - return sycl::INTEL::fpga_emulator_selector{}; -} -inline auto __fpga_selector() -{ - return sycl::INTEL::fpga_selector{}; -} # endif #endif // _ONEDPL_FPGA_DEVICE diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index 8a8dfdae1bc..0901fb46e34 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -505,7 +505,7 @@ __dpl_bit_cast(const _Src& __src) noexcept { #if __cpp_lib_bit_cast >= 201806L return ::std::bit_cast<_Dst>(__src); -#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_LIBSYCL_VERSION >= 50300 +#elif _ONEDPL_BACKEND_SYCL return sycl::bit_cast<_Dst>(__src); #elif __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(_Dst, __src); From cf2d8801e233433f7771f5a89b65cbc2767802c6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:50:24 +0200 Subject: [PATCH 09/14] Remove _ONEDPL_LIBSYCL_VERSION >= 50400 --- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index aae08fad7d7..a44ad7cda36 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -37,11 +37,7 @@ #endif #if _ONEDPL_FPGA_DEVICE -# if _ONEDPL_LIBSYCL_VERSION >= 50400 -# include -# else -# include -# endif +# include #endif // Macros to check the new SYCL features @@ -255,19 +251,9 @@ inline auto __fpga_selector() # endif #endif // _ONEDPL_FPGA_DEVICE -using __target = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 - sycl::target; -#else - sycl::access::target; -#endif +using __target = sycl::target; -constexpr __target __target_device = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 - __target::device; -#else - __target::global_buffer; -#endif +constexpr __target __target_device = __target::device; constexpr __target __host_target = #if _ONEDPL_LIBSYCL_VERSION >= 60200 From ef5fa29e57dab737fe2e4ac2a3a50d37a28aff90 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 6 Sep 2024 16:39:35 +0200 Subject: [PATCH 10/14] Remove _ONEDPL_LIBSYCL_VERSION >= 50700 Signed-off-by: Sergey Kopienko --- .../pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h | 6 +----- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 8 +------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index b5fa7aa2bbc..5ed7a019af1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -340,7 +340,6 @@ enum class __peer_prefix_algo template struct __peer_prefix_helper; -#if (_ONEDPL_LIBSYCL_VERSION >= 50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic_fetch_or> { @@ -384,7 +383,6 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic return __offset; } }; -#endif // (_ONEDPL_LIBSYCL_VERSION >= 50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::scan_then_broadcast> @@ -719,10 +717,8 @@ struct __parallel_radix_sort_iteration { #if _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT constexpr auto __peer_algorithm = __peer_prefix_algo::subgroup_ballot; -#elif _ONEDPL_LIBSYCL_VERSION >= 50700 - constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #else - constexpr auto __peer_algorithm = __peer_prefix_algo::scan_then_broadcast; + constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #endif // _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT __reorder_event = diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index a44ad7cda36..48cddc56283 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -41,7 +41,7 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) +#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) #define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) #define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN \ (_ONEDPL_LIBSYCL_VERSION < 70100) && (_ONEDPL_LIBSYCL_VERSION != 0) @@ -49,8 +49,6 @@ // TODO: determine which compiler configurations provide subgroup load/store #define _ONEDPL_SYCL_SUB_GROUP_LOAD_STORE_PRESENT false -#define _ONEDPL_SYCL_SUB_GROUP_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50700) - // Macro to check if we are compiling for SPIR-V devices. This macro must only be used within // SYCL kernels for determining SPIR-V compilation. Using this macro on the host may lead to incorrect behavior. #ifndef _ONEDPL_DETECT_SPIRV_COMPILATION // Check if overridden for testing @@ -103,11 +101,7 @@ using __maximum = sycl::maximum<_T>; template using __minimum = sycl::minimum<_T>; -#if _ONEDPL_SYCL_SUB_GROUP_PRESENT using __sub_group = sycl::sub_group; -#else -using __sub_group = sycl::ONEAPI::sub_group; -#endif template constexpr auto From d458d481310c72c23b2e9bfebf4563858d5e5ae6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 12:57:34 +0200 Subject: [PATCH 11/14] include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h - remove _ONEDPL_LIBSYCL_VERSION >= 50300 in __group_barrier(_Item __item) Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 48cddc56283..dbee1b33bac 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -121,7 +121,7 @@ template constexpr void __group_barrier(_Item __item) { -#if 0 //_ONEDPL_LIBSYCL_VERSION >= 50300 +#if 0 //TODO: usage of sycl::group_barrier: probably, we have to revise SYCL parallel patterns which use a group_barrier. // 1) sycl::group_barrier() implementation is not ready // 2) sycl::group_barrier and sycl::item::group_barrier are not quite equivalent From 994d03c8feff8b24a693f5b5c784452a13a099fb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 13:09:31 +0200 Subject: [PATCH 12/14] include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h - add minimal compiler version check to _ONEDPL_LIBSYCL_VERSION >= 57000 Signed-off-by: Sergey Kopienko --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index dbee1b33bac..bbfa1c1576b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -32,6 +32,9 @@ #if defined(__LIBSYCL_MAJOR_VERSION) && defined(__LIBSYCL_MINOR_VERSION) && defined(__LIBSYCL_PATCH_VERSION) # define _ONEDPL_LIBSYCL_VERSION \ (__LIBSYCL_MAJOR_VERSION * 10000 + __LIBSYCL_MINOR_VERSION * 100 + __LIBSYCL_PATCH_VERSION) +# if _ONEDPL_LIBSYCL_VERSION < 57000 +# error "oneDPL requires Intel(R) oneAPI DPC++/C++ Compiler 2022.2.0 or newer" +# endif #else # define _ONEDPL_LIBSYCL_VERSION 0 #endif From d4901546a52fcc470c6673c4015569f53b08a7b2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Mon, 8 Jul 2024 13:45:06 +0200 Subject: [PATCH 13/14] Apply GitHUB CLang format Signed-off-by: Sergey Kopienko --- .../oneapi/dpl/internal/reduce_by_segment_impl.h | 16 ++++++++-------- .../oneapi/dpl/internal/scan_by_segment_impl.h | 8 ++++---- .../dpcpp/parallel_backend_sycl_radix_sort.h | 6 +++++- .../hetero/dpcpp/parallel_backend_sycl_utils.h | 1 - .../dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h | 2 +- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 62acc9dc83d..053fed2a0d1 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -281,9 +281,9 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) { oneapi::dpl::__ranges::__require_access(__cgh, __keys); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceCountKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); @@ -315,9 +315,9 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __cgh.depends_on(__seg_end_identification); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceOffsetKernel>( sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_acc); @@ -335,9 +335,9 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __partials_acc = __partials.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceWgKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { ::std::array<__val_type, __vals_per_item> __loc_partials; @@ -455,9 +455,9 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh); __cgh.depends_on(__wg_reduce); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReducePrefixKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index dbf3807bc7b..224c6744ff6 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -164,9 +164,9 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegScanWgKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { __val_type __accumulator = __identity; @@ -265,9 +265,9 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh); __dpl_sycl::__local_accessor __loc_seg_ends_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegScanPrefixKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 5ed7a019af1..e821a7b795f 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -596,7 +596,11 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, if (__residual > 0) { //_ValueT may not have a default constructor, so we create just a storage via union type - union __storage { _ValueT __v; __storage(){} } __in_val; + union __storage + { + _ValueT __v; + __storage() {} + } __in_val; ::std::uint32_t __bucket = __radix_states; // greater than any actual radix state if (__self_lidx < __residual) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 39ca5402cfc..ae7a2b16213 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -267,7 +267,6 @@ class __kernel_compiler static_assert(__kernel_count > 0, "At least one kernel name should be provided"); public: - template static auto __compile(_Exec&& __exec) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index f555de19dd1..b764f2bc10a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -60,7 +60,7 @@ using __has_known_identity = ::std::conditional_t< ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum>>>>, - ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false + ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false #else //_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL) From 35dd1fbb6fda65b84c0d10d650898e7e1b9903c1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 6 Sep 2024 17:48:42 +0200 Subject: [PATCH 14/14] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - remove _USE_KERNEL_DEVICE_SPECIFIC_API due _ONEDPL_LIBSYCL_VERSION >= 50700 is always true Signed-off-by: Sergey Kopienko --- .../pstl/hetero/dpcpp/parallel_backend_sycl_utils.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index ae7a2b16213..519cd42c513 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -108,19 +108,12 @@ __supports_sub_group_size(const _ExecutionPolicy& __exec, std::size_t __target_s // Kernel run-time information helpers //----------------------------------------------------------------------------- -// 20201214 value corresponds to Intel(R) oneAPI C++ Compiler Classic 2021.1.2 Patch release -#define _USE_KERNEL_DEVICE_SPECIFIC_API (__SYCL_COMPILER_VERSION > 20201214) || (_ONEDPL_LIBSYCL_VERSION >= 50700) - template ::std::size_t __kernel_work_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __kernel) { const sycl::device& __device = __policy.queue().get_device(); -#if _USE_KERNEL_DEVICE_SPECIFIC_API return __kernel.template get_info(__device); -#else - return __kernel.template get_work_group_info(__device); -#endif } template @@ -130,7 +123,6 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ const sycl::device& __device = __policy.queue().get_device(); [[maybe_unused]] const ::std::size_t __wg_size = __kernel_work_group_size(__policy, __kernel); const ::std::uint32_t __sg_size = -#if _USE_KERNEL_DEVICE_SPECIFIC_API __kernel.template get_info( __device # if _ONEDPL_LIBSYCL_VERSION < 60000 @@ -138,9 +130,6 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ sycl::range<3> { __wg_size, 1, 1 } # endif ); -#else - __kernel.template get_sub_group_info( - __device, sycl::range<3>{__wg_size, 1, 1}); #endif return __sg_size; }