Skip to content

Commit 96adadf

Browse files
committed
Passing all LIT tests
1 parent 9447481 commit 96adadf

File tree

5 files changed

+23
-18
lines changed

5 files changed

+23
-18
lines changed

libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3030
template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
3131
_LIBCPP_HIDE_FROM_ABI void
3232
__pstl_fill(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
33-
// It is only safe to execute for_each on the GPU, it the execution policy is
33+
// It is only safe to execute fill on the GPU, it the execution policy is
3434
// parallel unsequenced, as it is the only execution policy prohibiting throwing
3535
// exceptions and allowing SIMD instructions
3636
if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
3737
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
3838
__libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
39-
return std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
39+
std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
40+
}
41+
// Otherwise, we execute fill on the CPU instead
42+
else {
43+
std::__pstl_fill<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __value);
4044
}
41-
// Otherwise, we execute for_each on the CPU instead
42-
return std::__pstl_fill<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __value);
4345
}
4446

4547
_LIBCPP_END_NAMESPACE_STD

libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ __pstl_for_each(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __
3535
if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
3636
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
3737
__libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
38-
return std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
38+
std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
39+
}
40+
// Else we fall back to the GPU backend
41+
else {
42+
std::__pstl_for_each<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __func);
3943
}
40-
// Else if the excution policy is parallel, we execute for_each on the CPU instead
41-
return std::__pstl_for_each<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __func);
4244
}
4345

4446
_LIBCPP_END_NAMESPACE_STD

libcxx/include/__algorithm/pstl_backends/gpu_backends/stable_sort.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ template <class _ExecutionPolicy, class _RandomAccessIterator, class _Comp>
2828
_LIBCPP_HIDE_FROM_ABI void
2929
__pstl_stable_sort(__gpu_backend_tag, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
3030
// TODO: Implement GPU backend.
31-
return __pstl_stable_sort<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __comp);
31+
__pstl_stable_sort<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __comp);
3232
}
3333

3434
_LIBCPP_END_NAMESPACE_STD

libcxx/include/__algorithm/pstl_backends/gpu_backends/transform.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform(
3838
if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
3939
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
4040
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value &&
41-
__libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
42-
// While the CPU backend captures by reference, [&], that is not valid when
43-
// offloading to the GPU. Therefore we must capture by value, [=].
44-
return std::__par_backend::__parallel_for_simd_2(__first, __last - __first, __result, __op);
41+
__libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
42+
__libcpp_is_contiguous_iterator<_ForwardOutIterator>::value) {
43+
std::__par_backend::__parallel_for_simd_2(__first, __last - __first, __result, __op);
44+
return __result + (__last - __first);
4545
}
46+
// If it is not safe to offload to the GPU, we rely on the CPU backend.
4647
return std::__pstl_transform<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __result, __op);
4748
}
4849

@@ -66,10 +67,10 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform(
6667
__libcpp_is_contiguous_iterator<_ForwardIterator1>::value &&
6768
__libcpp_is_contiguous_iterator<_ForwardIterator2>::value &&
6869
__libcpp_is_contiguous_iterator<_ForwardOutIterator>::value) {
69-
// While the CPU backend captures by reference, [&], that is not valid when
70-
// offloading to the GPU. Therefore we must capture by value, [=].
71-
return std::__par_backend::__parallel_for_simd_3(__first1, __last1 - __first1, __first2, __result, __op);
70+
std::__par_backend::__parallel_for_simd_3(__first1, __last1 - __first1, __first2, __result, __op);
71+
return __result + (__last1 - __first1);
7272
}
73+
// If it is not safe to offload to the GPU, we rely on the CPU backend.
7374
return std::__pstl_transform<_ExecutionPolicy>(__cpu_backend_tag{}, __first1, __last1, __first2, __result, __op);
7475
}
7576

libcxx/include/__algorithm/pstl_backends/gpu_backends/transform_reduce.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
3232

3333
template <class _T1, class _T2, class _T3>
34-
struct __is_supported_reduction : std::false_type {};
34+
_LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction : std::false_type {};
3535

3636
# define __PSTL_IS_SUPPORTED_REDUCTION(funname) \
3737
template <class _Tp> \
38-
struct __is_supported_reduction<std::funname<_Tp>, _Tp, _Tp> : std::true_type {}; \
38+
_LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction<std::funname<_Tp>, _Tp, _Tp> : std::true_type {}; \
3939
template <class _Tp, class _Up> \
40-
struct __is_supported_reduction<std::funname<>, _Tp, _Up> : std::true_type {};
40+
_LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction<std::funname<>, _Tp, _Up> : std::true_type {};
4141

4242
// __is_trivial_plus_operation already exists
4343
__PSTL_IS_SUPPORTED_REDUCTION(plus)

0 commit comments

Comments
 (0)