Skip to content

Commit 62e051e

Browse files
[CVCUDA] CMake integration, vison processor CV-CUDA integration, PaddleClas support CV-CUDA (#1074)
* cvcuda resize * cvcuda center crop * cvcuda resize * add a fdtensor in fdmat * get cv mat and get tensor support gpu * paddleclas cvcuda preprocessor * fix compile err * fix windows compile error * rename reused to cached * address comment * remove debug code * add comment * add manager run * use cuda and cuda used * use cv cuda doc * address comment --------- Co-authored-by: Jason <jiangjiajun@baidu.com>
1 parent 0c735e9 commit 62e051e

26 files changed

Lines changed: 814 additions & 216 deletions

File tree

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
6666
option(ENABLE_VISION "Whether to enable vision models usage." OFF)
6767
option(ENABLE_TEXT "Whether to enable text models usage." OFF)
6868
option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
69+
option(ENABLE_CVCUDA "Whether to enable NVIDIA CV-CUDA to boost image preprocess." OFF)
6970
option(ENABLE_ENCRYPTION "Whether to enable ENCRYPTION." OFF)
7071
option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
7172
option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
@@ -373,6 +374,12 @@ if(ENABLE_VISION)
373374
include(${PROJECT_SOURCE_DIR}/cmake/flycv.cmake)
374375
list(APPEND DEPEND_LIBS external_flycv)
375376
endif()
377+
378+
if(ENABLE_CVCUDA)
379+
include(${PROJECT_SOURCE_DIR}/cmake/cvcuda.cmake)
380+
add_definitions(-DENABLE_CVCUDA)
381+
list(APPEND DEPEND_LIBS nvcv_types cvcuda)
382+
endif()
376383
endif()
377384

378385
if(ENABLE_TEXT)

FastDeploy.cmake.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
1313
set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
1414
set(ENABLE_VISION @ENABLE_VISION@)
1515
set(ENABLE_FLYCV @ENABLE_FLYCV@)
16+
set(ENABLE_CVCUDA @ENABLE_CVCUDA@)
1617
set(ENABLE_TEXT @ENABLE_TEXT@)
1718
set(ENABLE_ENCRYPTION @ENABLE_ENCRYPTION@)
1819
set(BUILD_ON_JETSON @BUILD_ON_JETSON@)
@@ -140,6 +141,7 @@ if(WITH_GPU)
140141
message(FATAL_ERROR "[FastDeploy] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda")
141142
endif()
142143
list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
144+
list(APPEND FASTDEPLOY_INCS ${CUDA_DIRECTORY}/include)
143145

144146
if (ENABLE_TRT_BACKEND)
145147
if(BUILD_ON_JETSON)
@@ -218,6 +220,12 @@ if(ENABLE_VISION)
218220
endif()
219221
endif()
220222

223+
if(ENABLE_CVCUDA)
224+
find_library(CVCUDA_LIB cvcuda ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
225+
find_library(NVCV_TYPES_LIB nvcv_types ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
226+
list(APPEND FASTDEPLOY_LIBS ${CVCUDA_LIB} ${NVCV_TYPES_LIB})
227+
endif()
228+
221229
endif()
222230

223231
if (ENABLE_TEXT)
@@ -288,6 +296,7 @@ if(ENABLE_OPENVINO_BACKEND)
288296
endif()
289297
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
290298
message(STATUS " ENABLE_VISION : ${ENABLE_VISION}")
299+
message(STATUS " ENABLE_CVCUDA : ${ENABLE_CVCUDA}")
291300
message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}")
292301
message(STATUS " ENABLE_ENCRYPTION : ${ENABLE_ENCRYPTION}")
293302
if(WITH_GPU)

cmake/cvcuda.cmake

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
if(NOT WITH_GPU)
16+
message(FATAL_ERROR "ENABLE_CVCUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
17+
endif()
18+
19+
if(APPLE OR ANDROID OR IOS OR WIN32)
20+
message(FATAL_ERROR "Cannot enable CV-CUDA in mac/ios/android/windows os, please set -DENABLE_CVCUDA=OFF.")
21+
endif()
22+
23+
if(NOT (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64"))
24+
message(FATAL_ERROR "CV-CUDA only support x86_64.")
25+
endif()
26+
27+
set(CVCUDA_LIB_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.0-alpha/nvcv-lib-0.2.0_alpha-cuda11-x86_64-linux.tar.xz)
28+
set(CVCUDA_LIB_FILENAME nvcv-lib-0.2.0_alpha-cuda11-x86_64-linux.tar.xz)
29+
set(CVCUDA_DEV_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.0-alpha/nvcv-dev-0.2.0_alpha-cuda11-x86_64-linux.tar.xz)
30+
set(CVCUDA_DEV_FILENAME nvcv-dev-0.2.0_alpha-cuda11-x86_64-linux.tar.xz)
31+
32+
download_and_decompress(${CVCUDA_LIB_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_LIB_FILENAME} ${THIRD_PARTY_PATH}/cvcuda)
33+
download_and_decompress(${CVCUDA_DEV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_DEV_FILENAME} ${THIRD_PARTY_PATH}/cvcuda)
34+
35+
execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/cvcuda)
36+
execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install/cvcuda)
37+
execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/lib/x86_64-linux-gnu/ ${THIRD_PARTY_PATH}/install/cvcuda/lib)
38+
execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/include/ ${THIRD_PARTY_PATH}/install/cvcuda/include)
39+
40+
link_directories(${THIRD_PARTY_PATH}/install/cvcuda/lib)
41+
include_directories(${THIRD_PARTY_PATH}/install/cvcuda/include)
42+
43+
set(CMAKE_CXX_STANDARD 17)

docs/cn/faq/use_cv_cuda.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# 使用CV-CUDA/CUDA加速GPU端到端推理性能
2+
3+
FastDeploy集成了CV-CUDA来加速预/后处理,个别CV-CUDA不支持的算子使用了CUDA kernel的方式实现。
4+
5+
FastDeploy的Vision Processor模块对CV-CUDA的算子做了进一步的封装,用户不需要自己去调用CV-CUDA,
6+
使用FastDeploy的模型推理接口即可利用CV-CUDA的加速能力。
7+
8+
FastDeploy的Vision Processor模块在集成CV-CUDA时,做了以下工作来方便用户的使用:
9+
- GPU内存管理,缓存算子的输入、输出tensor,避免重复分配GPU内存
10+
- CV-CUDA不支持的个别算子利用CUDA kernel实现
11+
- CV-CUDA/CUDA不支持的算子可以fallback到OpenCV/FlyCV
12+
13+
## 使用方式
14+
编译FastDeploy时,开启CV-CUDA编译选项
15+
```bash
16+
# 编译C++预测库时, 开启CV-CUDA编译选项.
17+
-DENABLE_CVCUDA=ON \
18+
19+
# 在编译Python预测库时, 开启CV-CUDA编译选项
20+
export ENABLE_CVCUDA=ON
21+
```
22+
23+
只有继承了ProcessorManager类的模型预处理,才可以使用CV-CUDA,这里以PaddleClasPreprocessor为例
24+
```bash
25+
# C++
26+
# 创建model之后,调用model preprocessor的UseCuda接口即可打开CV-CUDA/CUDA预处理
27+
# 第一个参数enable_cv_cuda,true代表使用CV-CUDA,false代表只使用CUDA(支持的算子较少)
28+
# 第二个参数是GPU id,-1代表不指定,使用当前GPU
29+
model.GetPreprocessor().UseCuda(true, 0);
30+
31+
# Python
32+
model.preprocessor.use_cuda(True, 0)
33+
```
34+
35+
## 最佳实践
36+
37+
- 如果预处理第一个算子是resize,则要根据实际情况决定resize是否跑在GPU。因为当resize跑在GPU,
38+
且图片解码在CPU时,需要把原图copy到GPU内存,开销较大,而resize之后再copy到GPU内存,则往往只需要
39+
copy较少的数据。

examples/vision/classification/paddleclas/serving/models/preprocess/1/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def initialize(self, args):
7070
yaml_path)
7171
if args['model_instance_kind'] == 'GPU':
7272
device_id = int(args['model_instance_device_id'])
73-
self.preprocess_.use_gpu(device_id)
73+
self.preprocess_.use_cuda(False, device_id)
7474

7575
def execute(self, requests):
7676
"""`execute` must be implemented in every Python model. `execute`

fastdeploy/vision/classification/ppcls/ppcls_pybind.cc

Lines changed: 88 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -18,76 +18,102 @@ void BindPaddleClas(pybind11::module& m) {
1818
pybind11::class_<vision::classification::PaddleClasPreprocessor>(
1919
m, "PaddleClasPreprocessor")
2020
.def(pybind11::init<std::string>())
21-
.def("run", [](vision::classification::PaddleClasPreprocessor& self, std::vector<pybind11::array>& im_list) {
22-
std::vector<vision::FDMat> images;
23-
for (size_t i = 0; i < im_list.size(); ++i) {
24-
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
25-
}
26-
std::vector<FDTensor> outputs;
27-
if (!self.Run(&images, &outputs)) {
28-
throw std::runtime_error("Failed to preprocess the input data in PaddleClasPreprocessor.");
29-
}
30-
if (!self.WithGpu()) {
31-
for (size_t i = 0; i < outputs.size(); ++i) {
32-
outputs[i].StopSharing();
33-
}
34-
}
35-
return outputs;
36-
})
37-
.def("use_gpu", [](vision::classification::PaddleClasPreprocessor& self, int gpu_id = -1) {
38-
self.UseGpu(gpu_id);
39-
})
40-
.def("disable_normalize", [](vision::classification::PaddleClasPreprocessor& self) {
41-
self.DisableNormalize();
42-
})
43-
.def("disable_permute", [](vision::classification::PaddleClasPreprocessor& self) {
44-
self.DisablePermute();
45-
});
21+
.def("run",
22+
[](vision::classification::PaddleClasPreprocessor& self,
23+
std::vector<pybind11::array>& im_list) {
24+
std::vector<vision::FDMat> images;
25+
for (size_t i = 0; i < im_list.size(); ++i) {
26+
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
27+
}
28+
std::vector<FDTensor> outputs;
29+
if (!self.Run(&images, &outputs)) {
30+
throw std::runtime_error(
31+
"Failed to preprocess the input data in "
32+
"PaddleClasPreprocessor.");
33+
}
34+
if (!self.CudaUsed()) {
35+
for (size_t i = 0; i < outputs.size(); ++i) {
36+
outputs[i].StopSharing();
37+
}
38+
}
39+
return outputs;
40+
})
41+
.def("use_cuda",
42+
[](vision::classification::PaddleClasPreprocessor& self,
43+
bool enable_cv_cuda = false,
44+
int gpu_id = -1) { self.UseCuda(enable_cv_cuda, gpu_id); })
45+
.def("disable_normalize",
46+
[](vision::classification::PaddleClasPreprocessor& self) {
47+
self.DisableNormalize();
48+
})
49+
.def("disable_permute",
50+
[](vision::classification::PaddleClasPreprocessor& self) {
51+
self.DisablePermute();
52+
});
4653

4754
pybind11::class_<vision::classification::PaddleClasPostprocessor>(
4855
m, "PaddleClasPostprocessor")
4956
.def(pybind11::init<int>())
50-
.def("run", [](vision::classification::PaddleClasPostprocessor& self, std::vector<FDTensor>& inputs) {
51-
std::vector<vision::ClassifyResult> results;
52-
if (!self.Run(inputs, &results)) {
53-
throw std::runtime_error("Failed to postprocess the runtime result in PaddleClasPostprocessor.");
54-
}
55-
return results;
56-
})
57-
.def("run", [](vision::classification::PaddleClasPostprocessor& self, std::vector<pybind11::array>& input_array) {
58-
std::vector<vision::ClassifyResult> results;
59-
std::vector<FDTensor> inputs;
60-
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
61-
if (!self.Run(inputs, &results)) {
62-
throw std::runtime_error("Failed to postprocess the runtime result in PaddleClasPostprocessor.");
63-
}
64-
return results;
65-
})
66-
.def_property("topk", &vision::classification::PaddleClasPostprocessor::GetTopk, &vision::classification::PaddleClasPostprocessor::SetTopk);
57+
.def("run",
58+
[](vision::classification::PaddleClasPostprocessor& self,
59+
std::vector<FDTensor>& inputs) {
60+
std::vector<vision::ClassifyResult> results;
61+
if (!self.Run(inputs, &results)) {
62+
throw std::runtime_error(
63+
"Failed to postprocess the runtime result in "
64+
"PaddleClasPostprocessor.");
65+
}
66+
return results;
67+
})
68+
.def("run",
69+
[](vision::classification::PaddleClasPostprocessor& self,
70+
std::vector<pybind11::array>& input_array) {
71+
std::vector<vision::ClassifyResult> results;
72+
std::vector<FDTensor> inputs;
73+
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
74+
if (!self.Run(inputs, &results)) {
75+
throw std::runtime_error(
76+
"Failed to postprocess the runtime result in "
77+
"PaddleClasPostprocessor.");
78+
}
79+
return results;
80+
})
81+
.def_property("topk",
82+
&vision::classification::PaddleClasPostprocessor::GetTopk,
83+
&vision::classification::PaddleClasPostprocessor::SetTopk);
6784

6885
pybind11::class_<vision::classification::PaddleClasModel, FastDeployModel>(
6986
m, "PaddleClasModel")
7087
.def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
7188
ModelFormat>())
72-
.def("clone", [](vision::classification::PaddleClasModel& self) {
73-
return self.Clone();
74-
})
75-
.def("predict", [](vision::classification::PaddleClasModel& self, pybind11::array& data) {
76-
cv::Mat im = PyArrayToCvMat(data);
77-
vision::ClassifyResult result;
78-
self.Predict(im, &result);
79-
return result;
80-
})
81-
.def("batch_predict", [](vision::classification::PaddleClasModel& self, std::vector<pybind11::array>& data) {
82-
std::vector<cv::Mat> images;
83-
for (size_t i = 0; i < data.size(); ++i) {
84-
images.push_back(PyArrayToCvMat(data[i]));
85-
}
86-
std::vector<vision::ClassifyResult> results;
87-
self.BatchPredict(images, &results);
88-
return results;
89-
})
90-
.def_property_readonly("preprocessor", &vision::classification::PaddleClasModel::GetPreprocessor)
91-
.def_property_readonly("postprocessor", &vision::classification::PaddleClasModel::GetPostprocessor);
89+
.def("clone",
90+
[](vision::classification::PaddleClasModel& self) {
91+
return self.Clone();
92+
})
93+
.def("predict",
94+
[](vision::classification::PaddleClasModel& self,
95+
pybind11::array& data) {
96+
cv::Mat im = PyArrayToCvMat(data);
97+
vision::ClassifyResult result;
98+
self.Predict(im, &result);
99+
return result;
100+
})
101+
.def("batch_predict",
102+
[](vision::classification::PaddleClasModel& self,
103+
std::vector<pybind11::array>& data) {
104+
std::vector<cv::Mat> images;
105+
for (size_t i = 0; i < data.size(); ++i) {
106+
images.push_back(PyArrayToCvMat(data[i]));
107+
}
108+
std::vector<vision::ClassifyResult> results;
109+
self.BatchPredict(images, &results);
110+
return results;
111+
})
112+
.def_property_readonly(
113+
"preprocessor",
114+
&vision::classification::PaddleClasModel::GetPreprocessor)
115+
.def_property_readonly(
116+
"postprocessor",
117+
&vision::classification::PaddleClasModel::GetPostprocessor);
92118
}
93119
} // namespace fastdeploy

0 commit comments

Comments
 (0)