Skip to content

Commit 5a2ca42

Browse files
committed
Add RUNTIME_FUNCTION annotations to cub's device MR.
1 parent 03df551 commit 5a2ca42

File tree

1 file changed

+31
-9
lines changed

1 file changed

+31
-9
lines changed

cub/cub/detail/device_memory_resource.cuh

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,15 @@
1313
# pragma system_header
1414
#endif // no system header
1515

16+
#include <cub/util_device.cuh>
17+
1618
#include <cuda/__runtime/api_wrapper.h>
1719
#include <cuda/__stream/stream_ref.h>
20+
#include <cuda/std/__exception/terminate.h>
1821
#include <cuda/std/cstdint>
1922

23+
#include <nv/target>
24+
2025
CUB_NAMESPACE_BEGIN
2126

2227
namespace detail
@@ -25,34 +30,51 @@ namespace detail
2530
// this implementation
2631
struct device_memory_resource
2732
{
28-
void* allocate(size_t bytes, size_t /* alignment */)
33+
CUB_RUNTIME_FUNCTION void* allocate(size_t bytes, size_t /* alignment */)
2934
{
3035
void* ptr{nullptr};
31-
_CCCL_TRY_CUDA_API(::cudaMallocAsync, "allocate failed to allocate with cudaMalloc", &ptr, bytes, NULL);
36+
NV_IF_TARGET(
37+
NV_IS_HOST,
38+
(_CCCL_TRY_CUDA_API(::cudaMallocAsync, "allocate failed to allocate with cudaMallocAsync", &ptr, bytes, NULL);),
39+
(_CubLog("%s\n", "cub::detail::device_memory_resource::allocate not supported from device code.");
40+
::cuda::std::terminate();));
3241
_CCCL_ASSERT(ptr != nullptr, "allocate failed to allocate with cudaMallocAsync");
3342
return ptr;
3443
}
3544

36-
void deallocate(void* ptr, size_t /* bytes */)
45+
CUB_RUNTIME_FUNCTION void deallocate(void* ptr, size_t /* bytes */)
3746
{
38-
_CCCL_TRY_CUDA_API(::cudaFree, "deallocate failed", ptr);
47+
NV_IF_TARGET( //
48+
NV_IS_HOST,
49+
(_CCCL_TRY_CUDA_API(::cudaFree, "deallocate failed", ptr);),
50+
(_CubLog("%s\n", "cub::detail::device_memory_resource::deallocate not supported from device code.");
51+
::cuda::std::terminate();));
3952
}
4053

41-
void* allocate(::cuda::stream_ref stream, size_t bytes, size_t /* alignment */)
54+
CUB_RUNTIME_FUNCTION void* allocate(::cuda::stream_ref stream, size_t bytes, size_t /* alignment */)
4255
{
4356
return allocate(stream, bytes);
4457
}
4558

46-
void* allocate(::cuda::stream_ref stream, size_t bytes)
59+
CUB_RUNTIME_FUNCTION void* allocate(::cuda::stream_ref stream, size_t bytes)
4760
{
4861
void* ptr{nullptr};
49-
_CCCL_TRY_CUDA_API(::cudaMallocAsync, "allocate failed to allocate with cudaMallocAsync", &ptr, bytes, stream.get());
62+
NV_IF_TARGET( //
63+
NV_IS_HOST,
64+
(_CCCL_TRY_CUDA_API(
65+
::cudaMallocAsync, "allocate failed to allocate with cudaMallocAsync", &ptr, bytes, stream.get());),
66+
(_CubLog("%s\n", "cub::detail::device_memory_resource::allocate not supported from device code.");
67+
::cuda::std::terminate();));
5068
return ptr;
5169
}
5270

53-
void deallocate(::cuda::stream_ref stream, void* ptr, size_t /* bytes */)
71+
CUB_RUNTIME_FUNCTION void deallocate(::cuda::stream_ref stream, void* ptr, size_t /* bytes */)
5472
{
55-
_CCCL_TRY_CUDA_API(::cudaFreeAsync, "deallocate failed", ptr, stream.get());
73+
NV_IF_TARGET( //
74+
NV_IS_HOST,
75+
(_CCCL_TRY_CUDA_API(::cudaFreeAsync, "deallocate failed", ptr, stream.get());),
76+
(_CubLog("%s\n", "cub::detail::device_memory_resource::deallocate not supported from device code.");
77+
::cuda::std::terminate();));
5678
}
5779
};
5880
} // namespace detail

0 commit comments

Comments
 (0)