diff --git a/paddle/phi/backends/gpu/cuda/cuda_graph.cc b/paddle/phi/backends/gpu/cuda/cuda_graph.cc index 9fd1b1d1d9a44f..37e1c05c9c966a 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_graph.cc +++ b/paddle/phi/backends/gpu/cuda/cuda_graph.cc @@ -18,13 +18,6 @@ #ifdef PADDLE_WITH_CUDA -#if CUDA_VERSION < 11000 -cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr, - const void *symbolPtr) { - return cudaSuccess; -} -#endif - COMMON_DECLARE_bool(use_cuda_malloc_async_allocator); COMMON_DECLARE_bool(auto_free_cudagraph_allocations_on_launch); @@ -108,7 +101,6 @@ int64_t CUDAGraph::UniqueMemoryPoolID() { void CUDAGraph::Reset() { if (is_reset_) return; -#if CUDA_VERSION >= 10010 for (auto graph : graphs_) { PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphDestroy(graph)); } @@ -117,7 +109,6 @@ void CUDAGraph::Reset() { PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphExecDestroy(exec_graph)); } exec_graphs_.clear(); -#endif // callback should be called in reverse order because the latter added // callback may rely on the former added callback. for (auto iter = cudagraph_post_reset_callbacks_.rbegin(); @@ -131,7 +122,6 @@ void CUDAGraph::Reset() { void CUDAGraph::Replay() { is_replayed_ = true; -#if CUDA_VERSION >= 10010 PADDLE_ENFORCE_EQ(is_reset_, false, common::errors::PermissionDenied( @@ -146,12 +136,10 @@ void CUDAGraph::Replay() { PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphLaunch(exec_graphs_[i], stream_)); } is_first_run_ = false; -#endif } void CUDAGraph::BeginSegmentCapture() { ThrowErrorIfNotSupportCUDAGraph(); -#if CUDA_VERSION >= 10010 PADDLE_ENFORCE_EQ(IsCapturing(), true, common::errors::PermissionDenied( @@ -179,14 +167,12 @@ void CUDAGraph::BeginSegmentCapture() { VLOG(10) << "Begin to capture CUDA Graph with ID " << capturing_graph_->id_ << ", segment id " << capturing_graph_->graphs_.size() << ", memory pool id " << capturing_graph_->pool_id_; -#endif } void CUDAGraph::BeginCapture(phi::GPUPlace place, cudaStream_t stream, cudaStreamCaptureMode mode) { ThrowErrorIfNotSupportCUDAGraph(); -#if CUDA_VERSION >= 10010 PADDLE_ENFORCE_EQ(IsCapturing(), false, common::errors::PermissionDenied( @@ -205,7 +191,6 @@ void CUDAGraph::BeginCapture(phi::GPUPlace place, << capturing_thread_id_; } BeginSegmentCapture(); -#endif } inline void sync_streams(gpuStream_t to_record, gpuStream_t to_wait) { @@ -220,7 +205,6 @@ inline void sync_streams(gpuStream_t to_record, gpuStream_t to_wait) { void CUDAGraph::EndSegmentCapture() { ThrowErrorIfNotSupportCUDAGraph(); -#if CUDA_VERSION >= 10010 PADDLE_ENFORCE_EQ( IsCapturing(), true, @@ -276,7 +260,6 @@ void CUDAGraph::EndSegmentCapture() { << ", memory pool id " << capturing_graph_->pool_id_; capturing_graph_->graphs_.emplace_back(graph); capturing_graph_->exec_graphs_.emplace_back(exec_graph); -#endif } std::unique_ptr CUDAGraph::EndCapture() { @@ -286,16 +269,12 @@ std::unique_ptr CUDAGraph::EndCapture() { } bool CUDAGraph::IsValidCapturing() { -#if CUDA_VERSION >= 10010 if (!IsCapturing()) return false; cudaStreamCaptureStatus status; CUDAGraphID id; PADDLE_ENFORCE_GPU_SUCCESS( cudaStreamGetCaptureInfo(capturing_graph_->stream_, &status, &id)); return status == cudaStreamCaptureStatusActive; -#else - return false; -#endif } static std::string ConcatPath(const std::string &dirname, @@ -331,7 +310,6 @@ void CUDAGraph::PrintToDotFiles(const std::string &dirname, #endif } -#if CUDA_VERSION >= 11000 void CUDAGraphNodeLauncher::KernelNodeLaunch( parameterSetter_t parameterSetter, gpuKernelCallback_t cudakernelCallback) { if (UNLIKELY(phi::backends::gpu::CUDAGraph::IsThisThreadCapturing())) { @@ -396,20 +374,6 @@ CUDAGraphNodeLauncher::GetParameterSettersForExecGraph(cudaGraph_t graph) { return hooks; } -#else -void CUDAGraphNodeLauncher::KernelNodeLaunch( - cudaFunction_t cudaFunc, - parameterSetter_t parameterSetter, - gpuKernelCallback_t cudakernelCallback) { - cudakernelCallback(0); -} - -std::vector -CUDAGraphNodeLauncher::GetParameterSettersForExecGraph(cudaGraph_t graph) { - PADDLE_THROW(common::errors::Unimplemented( - "CUDAGraphNodeLauncher is only supported when CUDA version >= 11.0")); -} -#endif } // namespace phi::backends::gpu diff --git a/paddle/phi/backends/gpu/cuda/cuda_graph.h b/paddle/phi/backends/gpu/cuda/cuda_graph.h index f0408b8b034ba7..00146a0b404b4a 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_graph.h +++ b/paddle/phi/backends/gpu/cuda/cuda_graph.h @@ -38,14 +38,6 @@ #include "paddle/utils/optional.h" #ifdef PADDLE_WITH_CUDA - -#if CUDA_VERSION < 11000 -// For CUDA versions less than 11.0, use a dummy type for cudaFunction_t. -using cudaFunction_t = void *; -cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr, - const void *symbolPtr); -#endif - namespace phi { namespace backends { namespace gpu { @@ -181,19 +173,7 @@ class CUDAGraphNodeLauncher { parameterSetters; }; -#if CUDA_VERSION >= 10010 static void ThrowErrorIfNotSupportCUDAGraph() {} -#else -enum gpuStreamCaptureMode { - cudaStreamCaptureModeGlobal = 0, - cudaStreamCaptureModeThreadLocal = 1, - cudaStreamCaptureModeRelaxed = 2 -}; -static void ThrowErrorIfNotSupportCUDAGraph() { - PADDLE_THROW(common::errors::Unimplemented( - "CUDA Graph is only supported when CUDA version >= 10.1")); -} -#endif using CUDAGraphID = unsigned long long; // NOLINT @@ -305,12 +285,8 @@ class CUDAGraph { static bool IsValidCapturing(); static bool IsThreadLocalCapturing() { -#if CUDA_VERSION >= 10010 return IsCapturing() && capturing_graph_->capture_mode_ == cudaStreamCaptureModeThreadLocal; -#else - return false; -#endif } static bool IsThisThreadCapturing() { @@ -335,11 +311,9 @@ class CUDAGraph { static CUDAGraphID UniqueID(); private: -#if CUDA_VERSION >= 10010 std::vector graphs_; std::vector exec_graphs_; gpuStreamCaptureMode capture_mode_; -#endif cudaStream_t stream_{nullptr}; phi::GPUPlace place_; CUDAGraphID id_; @@ -382,7 +356,6 @@ class CUDAGraph { static std::unique_ptr capturing_graph_; }; -#if CUDA_VERSION >= 10010 class CUDAGraphCaptureModeGuard { DISABLE_COPY_AND_ASSIGN(CUDAGraphCaptureModeGuard); @@ -407,15 +380,6 @@ class CUDAGraphCaptureModeGuard { private: gpuStreamCaptureMode old_mode_; }; -#else -class CUDAGraphCaptureModeGuard { - DISABLE_COPY_AND_ASSIGN(CUDAGraphCaptureModeGuard); - - public: - explicit CUDAGraphCaptureModeGuard( - gpuStreamCaptureMode mode = cudaStreamCaptureModeRelaxed) {} -}; -#endif } // namespace gpu } // namespace backends