Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove support of old compilers from oneDPL code #1678

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300)
Signed-off-by: Sergey Kopienko <[email protected]>
  • Loading branch information
SergeyKopienko committed Sep 25, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit f852d8185d3d3a7adb3f73c8f3dc0a5751cb7fc2
23 changes: 5 additions & 18 deletions include/oneapi/dpl/internal/reduce_by_segment_impl.h
Original file line number Diff line number Diff line change
@@ -281,15 +281,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __keys);
auto __seg_ends_acc = __seg_ends.template get_access<sycl::access_mode::write>(__cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceCountKernel>(
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_reduce_count_kernel,
#endif
sycl::nd_item<1> __item) {
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
auto __group = __item.get_group();
::std::size_t __group_id = __item.get_group(0);
::std::size_t __local_id = __item.get_local_id(0);
@@ -319,13 +315,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
__cgh.depends_on(__seg_end_identification);
auto __seg_ends_acc = __seg_ends.template get_access<sycl::access_mode::read>(__cgh);
auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access<sycl::access_mode::read_write>(__cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceOffsetKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_reduce_offset_kernel,
#endif
sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
auto __beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_acc);
auto __out_beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_scan_acc);
@@ -342,13 +335,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
auto __partials_acc = __partials.template get_access<sycl::access_mode::read_write>(__cgh);
auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access<sycl::access_mode::read>(__cgh);
__dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceWgKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_reduce_wg_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
::std::array<__val_type, __vals_per_item> __loc_partials;

@@ -465,13 +455,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
__dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh);

__cgh.depends_on(__wg_reduce);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReducePrefixKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_reduce_prefix_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
auto __group = __item.get_group();
::std::int64_t __group_id = __item.get_group(0);
10 changes: 2 additions & 8 deletions include/oneapi/dpl/internal/scan_by_segment_impl.h
Original file line number Diff line number Diff line change
@@ -164,13 +164,10 @@ struct __sycl_scan_by_segment_impl

__dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh);

#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegScanWgKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_scan_wg_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
__val_type __accumulator = __identity;

@@ -268,13 +265,10 @@ struct __sycl_scan_by_segment_impl
__dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh);

__dpl_sycl::__local_accessor<bool> __loc_seg_ends_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegScanPrefixKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__seg_scan_prefix_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
auto __group = __item.get_group();
::std::size_t __group_id = __item.get_group(0);
10 changes: 2 additions & 8 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
@@ -321,13 +321,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle());
#endif
__cgh.parallel_for<_LocalScanKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel_1,
#endif
sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) {
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc);
__local_scan(__item, __n, __local_acc, __rng1, __rng2, __temp_ptr, __size_per_wg, __wgroup_size,
@@ -342,13 +339,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
__cgh.depends_on(__submit_event);
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle());
#endif
__cgh.parallel_for<_GroupScanKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel_2,
#endif
// TODO: try to balance work between several workgroups instead of one
sycl::nd_range<1>(__wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) {
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc);
Original file line number Diff line number Diff line change
@@ -198,13 +198,10 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, :
oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng);
// an accessor per work-group with value counters from each work-item
auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) {
// item info
const ::std::size_t __self_lidx = __self_item.get_local_id(0);
@@ -299,13 +296,10 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size
__hdl.depends_on(__dependency_event);
// access the counters for all work groups
oneapi::dpl::__ranges::__require_access(__hdl, __count_rng);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) {
// find borders of a region with a specific bucket id
sycl::global_ptr<_CountT> __begin = __count_rng.begin() + __scan_size * __self_item.get_group(0);
@@ -544,13 +538,10 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments,

typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl);

#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
//Each SYCL work group processes one data segment.
sycl::nd_range<1>(__segments * __sg_size, __sg_size), [=](sycl::nd_item<1> __self_item) {

Original file line number Diff line number Diff line change
@@ -365,13 +365,10 @@ struct __parallel_transform_reduce_impl
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
#if _ONEDPL_COMPILE_KERNEL
__cgh.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_ReduceKernel>(
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size),
sycl::range<1>(__work_group_size)),
[=](sycl::nd_item<1> __item_id) {
Original file line number Diff line number Diff line change
@@ -267,7 +267,7 @@ class __kernel_compiler
static_assert(__kernel_count > 0, "At least one kernel name should be provided");

public:
#if _ONEDPL_KERNEL_BUNDLE_PRESENT

template <typename _Exec>
static auto
__compile(_Exec&& __exec)
@@ -290,18 +290,6 @@ class __kernel_compiler
{
return __kernel_array_type{__kernel_bundle.get_kernel(__kernel_ids[_Ip])...};
}
#else
template <typename _Exec>
static auto
__compile(_Exec&& __exec)
{
sycl::program __program(__exec.queue().get_context());

using __return_type = std::conditional_t<(__kernel_count > 1), __kernel_array_type, sycl::kernel>;
return __return_type{
(__program.build_with_kernel_type<_KernelNames>(), __program.get_kernel<_KernelNames>())...};
}
#endif
};

#if _ONEDPL_DEBUG_SYCL
1 change: 0 additions & 1 deletion include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h
Original file line number Diff line number Diff line change
@@ -45,7 +45,6 @@
#endif

// Macros to check the new SYCL features
#define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300)
#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300)
#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300)
#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300)