Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 0 additions & 36 deletions paddle/phi/backends/gpu/cuda/cuda_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@

#ifdef PADDLE_WITH_CUDA

#if CUDA_VERSION < 11000
cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr,
const void *symbolPtr) {
return cudaSuccess;
}
#endif

COMMON_DECLARE_bool(use_cuda_malloc_async_allocator);
COMMON_DECLARE_bool(auto_free_cudagraph_allocations_on_launch);

Expand Down Expand Up @@ -108,7 +101,6 @@ int64_t CUDAGraph::UniqueMemoryPoolID() {

void CUDAGraph::Reset() {
if (is_reset_) return;
#if CUDA_VERSION >= 10010
for (auto graph : graphs_) {
PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphDestroy(graph));
}
Expand All @@ -117,7 +109,6 @@ void CUDAGraph::Reset() {
PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphExecDestroy(exec_graph));
}
exec_graphs_.clear();
#endif
// callback should be called in reverse order because the latter added
// callback may rely on the former added callback.
for (auto iter = cudagraph_post_reset_callbacks_.rbegin();
Expand All @@ -131,7 +122,6 @@ void CUDAGraph::Reset() {

void CUDAGraph::Replay() {
is_replayed_ = true;
#if CUDA_VERSION >= 10010
PADDLE_ENFORCE_EQ(is_reset_,
false,
common::errors::PermissionDenied(
Expand All @@ -146,12 +136,10 @@ void CUDAGraph::Replay() {
PADDLE_ENFORCE_GPU_SUCCESS(cudaGraphLaunch(exec_graphs_[i], stream_));
}
is_first_run_ = false;
#endif
}

void CUDAGraph::BeginSegmentCapture() {
ThrowErrorIfNotSupportCUDAGraph();
#if CUDA_VERSION >= 10010
PADDLE_ENFORCE_EQ(IsCapturing(),
true,
common::errors::PermissionDenied(
Expand Down Expand Up @@ -179,14 +167,12 @@ void CUDAGraph::BeginSegmentCapture() {
VLOG(10) << "Begin to capture CUDA Graph with ID " << capturing_graph_->id_
<< ", segment id " << capturing_graph_->graphs_.size()
<< ", memory pool id " << capturing_graph_->pool_id_;
#endif
}

void CUDAGraph::BeginCapture(phi::GPUPlace place,
cudaStream_t stream,
cudaStreamCaptureMode mode) {
ThrowErrorIfNotSupportCUDAGraph();
#if CUDA_VERSION >= 10010
PADDLE_ENFORCE_EQ(IsCapturing(),
false,
common::errors::PermissionDenied(
Expand All @@ -205,7 +191,6 @@ void CUDAGraph::BeginCapture(phi::GPUPlace place,
<< capturing_thread_id_;
}
BeginSegmentCapture();
#endif
}

inline void sync_streams(gpuStream_t to_record, gpuStream_t to_wait) {
Expand All @@ -220,7 +205,6 @@ inline void sync_streams(gpuStream_t to_record, gpuStream_t to_wait) {

void CUDAGraph::EndSegmentCapture() {
ThrowErrorIfNotSupportCUDAGraph();
#if CUDA_VERSION >= 10010
PADDLE_ENFORCE_EQ(
IsCapturing(),
true,
Expand Down Expand Up @@ -276,7 +260,6 @@ void CUDAGraph::EndSegmentCapture() {
<< ", memory pool id " << capturing_graph_->pool_id_;
capturing_graph_->graphs_.emplace_back(graph);
capturing_graph_->exec_graphs_.emplace_back(exec_graph);
#endif
}

std::unique_ptr<CUDAGraph> CUDAGraph::EndCapture() {
Expand All @@ -286,16 +269,12 @@ std::unique_ptr<CUDAGraph> CUDAGraph::EndCapture() {
}

bool CUDAGraph::IsValidCapturing() {
#if CUDA_VERSION >= 10010
if (!IsCapturing()) return false;
cudaStreamCaptureStatus status;
CUDAGraphID id;
PADDLE_ENFORCE_GPU_SUCCESS(
cudaStreamGetCaptureInfo(capturing_graph_->stream_, &status, &id));
return status == cudaStreamCaptureStatusActive;
#else
return false;
#endif
}

static std::string ConcatPath(const std::string &dirname,
Expand Down Expand Up @@ -331,7 +310,6 @@ void CUDAGraph::PrintToDotFiles(const std::string &dirname,
#endif
}

#if CUDA_VERSION >= 11000
void CUDAGraphNodeLauncher::KernelNodeLaunch(
parameterSetter_t parameterSetter, gpuKernelCallback_t cudakernelCallback) {
if (UNLIKELY(phi::backends::gpu::CUDAGraph::IsThisThreadCapturing())) {
Expand Down Expand Up @@ -396,20 +374,6 @@ CUDAGraphNodeLauncher::GetParameterSettersForExecGraph(cudaGraph_t graph) {

return hooks;
}
#else
void CUDAGraphNodeLauncher::KernelNodeLaunch(
cudaFunction_t cudaFunc,
parameterSetter_t parameterSetter,
gpuKernelCallback_t cudakernelCallback) {
cudakernelCallback(0);
}

std::vector<cudaGraphExecuterSetter_t>
CUDAGraphNodeLauncher::GetParameterSettersForExecGraph(cudaGraph_t graph) {
PADDLE_THROW(common::errors::Unimplemented(
"CUDAGraphNodeLauncher is only supported when CUDA version >= 11.0"));
}
#endif

} // namespace phi::backends::gpu

Expand Down
36 changes: 0 additions & 36 deletions paddle/phi/backends/gpu/cuda/cuda_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,6 @@
#include "paddle/utils/optional.h"

#ifdef PADDLE_WITH_CUDA

#if CUDA_VERSION < 11000
// For CUDA versions less than 11.0, use a dummy type for cudaFunction_t.
using cudaFunction_t = void *;
cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr,
const void *symbolPtr);
#endif

namespace phi {
namespace backends {
namespace gpu {
Expand Down Expand Up @@ -181,19 +173,7 @@ class CUDAGraphNodeLauncher {
parameterSetters;
};

#if CUDA_VERSION >= 10010
static void ThrowErrorIfNotSupportCUDAGraph() {}
#else
enum gpuStreamCaptureMode {
cudaStreamCaptureModeGlobal = 0,
cudaStreamCaptureModeThreadLocal = 1,
cudaStreamCaptureModeRelaxed = 2
};
static void ThrowErrorIfNotSupportCUDAGraph() {
PADDLE_THROW(common::errors::Unimplemented(
"CUDA Graph is only supported when CUDA version >= 10.1"));
}
#endif

using CUDAGraphID = unsigned long long; // NOLINT

Expand Down Expand Up @@ -305,12 +285,8 @@ class CUDAGraph {
static bool IsValidCapturing();

static bool IsThreadLocalCapturing() {
#if CUDA_VERSION >= 10010
return IsCapturing() &&
capturing_graph_->capture_mode_ == cudaStreamCaptureModeThreadLocal;
#else
return false;
#endif
}

static bool IsThisThreadCapturing() {
Expand All @@ -335,11 +311,9 @@ class CUDAGraph {
static CUDAGraphID UniqueID();

private:
#if CUDA_VERSION >= 10010
std::vector<cudaGraph_t> graphs_;
std::vector<cudaGraphExec_t> exec_graphs_;
gpuStreamCaptureMode capture_mode_;
#endif
cudaStream_t stream_{nullptr};
phi::GPUPlace place_;
CUDAGraphID id_;
Expand Down Expand Up @@ -382,7 +356,6 @@ class CUDAGraph {
static std::unique_ptr<CUDAGraph> capturing_graph_;
};

#if CUDA_VERSION >= 10010
class CUDAGraphCaptureModeGuard {
DISABLE_COPY_AND_ASSIGN(CUDAGraphCaptureModeGuard);

Expand All @@ -407,15 +380,6 @@ class CUDAGraphCaptureModeGuard {
private:
gpuStreamCaptureMode old_mode_;
};
#else
class CUDAGraphCaptureModeGuard {
DISABLE_COPY_AND_ASSIGN(CUDAGraphCaptureModeGuard);

public:
explicit CUDAGraphCaptureModeGuard(
gpuStreamCaptureMode mode = cudaStreamCaptureModeRelaxed) {}
};
#endif

} // namespace gpu
} // namespace backends
Expand Down
Loading