From 82fa6cada0a2738d9aecb4186d56bc3e9a626a41 Mon Sep 17 00:00:00 2001 From: co63oc <4617245+co63oc@users.noreply.github.com> Date: Tue, 6 Jan 2026 11:12:50 +0800 Subject: [PATCH] optimize phi::CPUPlace in paddle/fluid/inference/ --- .../passes/convert_to_mixed_precision.cc | 4 +- .../analysis/passes/ir_graph_build_pass.cc | 2 +- .../ir_params_sync_among_devices_pass.cc | 6 +- .../passes/save_optimized_model_pass.cc | 2 +- paddle/fluid/inference/io.cc | 2 +- .../tensorrt/convert/batch_norm_op.cc | 14 ++-- .../inference/tensorrt/convert/dropout_op.cc | 4 +- .../tensorrt/convert/fill_constant_op.cc | 4 +- .../inference/tensorrt/convert/gelu_op.cc | 4 +- .../tensorrt/convert/leaky_relu_op.cc | 2 +- .../inference/tensorrt/convert/op_converter.h | 4 +- .../tensorrt/convert/test_io_converter.cc | 4 +- .../tensorrt/convert/test_op_converter.cc | 2 +- .../inference/tensorrt/convert/ut_helper.h | 4 +- paddle/fluid/inference/tensorrt/engine.cc | 64 ++++++++----------- paddle/fluid/inference/tensorrt/engine.h | 4 +- .../inference/tensorrt/pir/generic_plugin.cu | 4 +- .../tensorrt/plugin/custom_generic_plugin.cu | 2 +- .../plugin/deformable_conv_op_plugin.cu | 2 +- .../elementwiseadd_transpose_op_plugin.cu | 4 +- .../plugin/fused_token_prune_op_plugin.cu | 2 +- .../tensorrt/plugin/generic_plugin.cu | 4 +- .../plugin/instance_norm_op_plugin.cu | 8 +-- .../tensorrt/plugin/layer_norm_op_plugin.cu | 8 +-- .../multihead_matmul_roformer_plugin.cu | 16 ++--- .../tensorrt/plugin/qkv_to_context_plugin.cu | 22 +++---- .../plugin/trans_layernorm_op_plugin.cu | 2 +- ...transformer_input_output_convert_plugin.cu | 2 +- 28 files changed, 97 insertions(+), 105 deletions(-) diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc index d7bba0ada52926..548dff58bba9a8 100644 --- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc +++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc @@ -74,7 +74,7 @@ ConvertToMixedPrecisionPass::ConvertToMixedPrecisionPass( } void ConvertToMixedPrecisionPass::LoadModel() { - framework::Executor exe{phi::CPUPlace{}}; + framework::Executor exe{CPUPlace{}}; // If we did not find the provided weight path, // we assume that the model to be converted only has a model file and no // params file, we believe this situation is reasonable. In this case, weight @@ -177,7 +177,7 @@ void ConvertToMixedPrecisionPass::SaveMixedModel() { op->SetAttr("file_path", save_params_path); op->CheckAttrs(); - framework::Executor exe(phi::CPUPlace{}); + framework::Executor exe(CPUPlace{}); exe.Run(save_program, &scope_, 0, true, true); }; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 9397794cd818b6..54d0fd92a03aa7 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -42,7 +42,7 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { // so that the parameters will on the same device, or they will keep copying // between difference devices. phi::Place place; - place = phi::CPUPlace(); + place = CPUPlace(); if (argument->model_dir_valid()) { auto program = diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 2ce0290d47835a..93f097da216749 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -53,7 +53,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { true, common::errors::PreconditionNotMet( "The gpu_device_id field should be valid")); - phi::Place place = phi::GPUPlace(argument->gpu_device_id()); + phi::Place place = GPUPlace(argument->gpu_device_id()); auto *scope = argument->scope_ptr(); std::vector all_vars = scope->LocalVarNames(); @@ -130,7 +130,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { dst_ptr = dev_ctx->Alloc(t, t->dtype()); phi::memory_utils::Copy(place, dst_ptr, - phi::CPUPlace(), + CPUPlace(), src_ptr, t->numel() * phi::SizeOf(t->dtype()), stream); @@ -269,7 +269,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { CopyParamsToXpu(argument); } #endif - paddle::memory::Release(phi::CPUPlace()); + paddle::memory::Release(CPUPlace()); } std::string IrParamsSyncAmongDevicesPass::repr() const { diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index c8d8153cfa0f3d..f5a012a9aac06a 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -91,7 +91,7 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { op->SetAttr("file_path", save_params_path); op->CheckAttrs(); - framework::Executor exe(phi::CPUPlace{}); + framework::Executor exe(CPUPlace{}); exe.Run(save_program, &scope, 0, true, true); }; // TODO(shentanyue01): Setting hardware and version identification for diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 0cb825058670b2..7d12d94bf019ee 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -273,7 +273,7 @@ void SaveVars(const framework::Scope& scope, op->SetAttr("file_path", dirname + "/param"); op->CheckAttrs(); - phi::CPUPlace place; + CPUPlace place; framework::Executor exe(place); exe.Run(prog, const_cast(&scope), 0, true, true); } diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index e72f0075f0ef8b..c9f8139de8ede5 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -70,7 +70,7 @@ class BatchNormOpConverter : public OpConverter { scale_tensor.Resize(Scale_t->dims()); variance_tensor.Resize(Variance_t->dims()); - phi::CPUPlace cpu_place; + CPUPlace cpu_place; // copy data from gpu to cpu paddle::framework::TensorCopySync((*Bias_t), cpu_place, &bias_tensor); paddle::framework::TensorCopySync((*Mean_t), cpu_place, &mean_tensor); @@ -78,10 +78,10 @@ class BatchNormOpConverter : public OpConverter { paddle::framework::TensorCopySync( (*Variance_t), cpu_place, &variance_tensor); - auto* bias_data = bias_tensor.mutable_data(phi::CPUPlace()); - auto* mean_data = mean_tensor.mutable_data(phi::CPUPlace()); - auto* scale_data = scale_tensor.mutable_data(phi::CPUPlace()); - auto* variance_data = variance_tensor.mutable_data(phi::CPUPlace()); + auto* bias_data = bias_tensor.mutable_data(CPUPlace()); + auto* mean_data = mean_tensor.mutable_data(CPUPlace()); + auto* scale_data = scale_tensor.mutable_data(CPUPlace()); + auto* variance_data = variance_tensor.mutable_data(CPUPlace()); std::unique_ptr combine_scale_tensor( new phi::DenseTensor()); @@ -92,9 +92,9 @@ class BatchNormOpConverter : public OpConverter { combine_bias_tensor->Resize(bias_tensor.dims()); auto* combine_scale_data = - combine_scale_tensor->mutable_data(phi::CPUPlace()); + combine_scale_tensor->mutable_data(CPUPlace()); auto* combine_bias_data = - combine_bias_tensor->mutable_data(phi::CPUPlace()); + combine_bias_tensor->mutable_data(CPUPlace()); size_t ele_num = combine_scale_tensor->memory_size() / sizeof(float); diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc index 5bd89a353e4aab..dddd34b67fa898 100644 --- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc @@ -45,10 +45,10 @@ class DropoutOpConverter : public OpConverter { return; } - phi::CPUPlace cpu_place; + CPUPlace cpu_place; std::unique_ptr weight_tensor(new phi::DenseTensor()); weight_tensor->Resize(common::make_ddim({1})); - auto* weight_data = weight_tensor->mutable_data(phi::CPUPlace()); + auto* weight_data = weight_tensor->mutable_data(CPUPlace()); weight_data[0] = 1 - dropout_prob; TensorRTEngine::Weight scale_weights{ diff --git a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc index 02c11b5dce6d15..19a03681e8afc3 100644 --- a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc @@ -95,13 +95,13 @@ class FillConstantOpConverter : public OpConverter { void* trt_data = nullptr; size_t trt_num; if (dtype == 2 || dtype == 3) { // int,int64 - auto* tmp_ptr = out_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_ptr = out_tensor->mutable_data(CPUPlace()); for (int64_t i = 0; i < out_tensor->numel(); i++) tmp_ptr[i] = std::stoi(str_value); trt_dtype = nvinfer1::DataType::kINT32; trt_data = static_cast(tmp_ptr); } else if (dtype == 5) { // float - auto* tmp_ptr = out_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_ptr = out_tensor->mutable_data(CPUPlace()); for (int64_t i = 0; i < out_tensor->numel(); i++) tmp_ptr[i] = std::stof(str_value); trt_data = static_cast(tmp_ptr); diff --git a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc index ea4933e6a7ce4e..27d8b90da7d679 100644 --- a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc @@ -45,7 +45,7 @@ class GeluOpConverter : public OpConverter { auto create_weights = [&](float data, std::string type) -> float* { std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); - auto* tmp_data = tmp_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(CPUPlace()); tmp_data[0] = data; engine_->SetWeights(out_name + "_gelu_op_" + type, std::move(tmp_tensor)); @@ -146,7 +146,7 @@ class GeluOpConverter : public OpConverter { auto create_weights = [&](float data, std::string type) -> float* { std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); - auto* tmp_data = tmp_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(CPUPlace()); tmp_data[0] = data; engine_->SetWeights(out_name + "_gelu_op_" + type, std::move(tmp_tensor)); diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc index 5ce403aa7cc228..b76ab435254b5f 100644 --- a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc @@ -47,7 +47,7 @@ class LeakyReluOpConverter : public OpConverter { engine_->SetTensorDynamicRange(input, in_scale); } #else - phi::CPUPlace place; + CPUPlace place; std::unique_ptr alpha_tensor(new phi::DenseTensor()); alpha_tensor->Resize(common::make_ddim({2})); float* alpha_data = alpha_tensor->mutable_data(place); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 260b89be715c61..6384fe514cfc63 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -704,7 +704,7 @@ class OpConverter { shape.d, shape.d + shape.nbDims, 1, std::multiplies()); std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({data_size}); - auto* tmp_data = tmp_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(CPUPlace()); for (int i = 0; i < data_size; i++) { tmp_data[i] = data[i]; } @@ -740,7 +740,7 @@ class OpConverter { std::unique_ptr tmp_tensor(new phi::DenseTensor()); int data_size = data.size(); tmp_tensor->Resize({data_size}); - auto* tmp_data = tmp_tensor->mutable_data(phi::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(CPUPlace()); for (int i = 0; i < data_size; i++) { tmp_data[i] = data[i]; } diff --git a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc index 8f7d89a7dbef35..ef4cb1f1ed5a3b 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc @@ -61,13 +61,13 @@ void IOConverterTester(const phi::DeviceContext& ctx) { } TEST(EngineIOConverterTester, DefaultCPU) { - phi::CPUPlace place; + CPUPlace place; phi::CPUContext ctx(place); IOConverterTester(ctx); } TEST(EngineIOConverterTester, DefaultGPU) { - phi::GPUPlace place; + GPUPlace place; phi::GPUContext ctx(place); IOConverterTester(ctx); } diff --git a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc index 8f928dd6d9cf81..308bc6e78ed126 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc @@ -58,7 +58,7 @@ TEST(OpConverter, ConvertBlock) { auto* x = scope.Var("conv2d-Y"); auto* x_tensor = x->GetMutable(); x_tensor->Resize(common::make_ddim(dim_vec)); - x_tensor->mutable_data(phi::GPUPlace(0)); + x_tensor->mutable_data(GPUPlace(0)); OpTeller::Global().SetOpConverterType(conv2d_op, OpConverterType::Default); OpConverter converter; diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index d12e45e46e6f14..38d5e72241f446 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -56,7 +56,7 @@ void RandomizeTensor(phi::DenseTensor* tensor, common::errors::PermissionDenied("RandomizeTensor only can be used for " "tensor which dims is not zero.")); - phi::CPUPlace cpu_place; + CPUPlace cpu_place; phi::DenseTensor temp_tensor; temp_tensor.Resize(dims); auto* temp_data = temp_tensor.mutable_data(cpu_place); @@ -250,7 +250,7 @@ class TRTConvertValidation { framework::Scope& scope() { return scope_; } private: - phi::GPUPlace place_; + GPUPlace place_; std::unique_ptr engine_; cudaStream_t stream_; std::unique_ptr op_; diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 7ce32bc55e0cfe..61730a6b95c689 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -131,7 +131,7 @@ void TensorRTEngine::Execute(int batch_size, inference::Singleton::Global() .GetContextMemory( predictor_id_per_thread, - phi::GPUPlace(device_id()), + GPUPlace(device_id()), phi::Stream(reinterpret_cast(stream))); infer_context->setDeviceMemory(context_memory); } @@ -669,7 +669,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; std::string name_with_suffix = name + splitter + name_suffix; - phi::CPUPlace cpu_place; + CPUPlace cpu_place; PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0, common::errors::AlreadyExists( @@ -686,12 +686,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { phi::DenseTensor bf16_tensor; bf16_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &bf16_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &bf16_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::FLOAT16); auto *fp16_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *bf16_data = bf16_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *bf16_data = bf16_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { fp16_data[i] = static_cast(bf16_data[i]); } @@ -700,12 +699,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( } else if (weight_tensor.dtype() == phi::DataType::FLOAT32) { phi::DenseTensor fp32_tensor; fp32_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &fp32_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &fp32_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::FLOAT16); auto *fp16_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *fp32_data = fp32_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *fp32_data = fp32_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { fp16_data[i] = static_cast(fp32_data[i]); } @@ -714,12 +712,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( } else if (weight_tensor.dtype() == phi::DataType::INT64) { phi::DenseTensor int64_tensor; int64_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &int64_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &int64_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::INT32); auto *int32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *int64_data = int64_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *int64_data = int64_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { int32_data[i] = int64_data[i]; } @@ -742,7 +739,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; std::string name_with_suffix = name + splitter + name_suffix; - phi::CPUPlace cpu_place; + CPUPlace cpu_place; PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0, common::errors::AlreadyExists( @@ -759,12 +756,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { phi::DenseTensor bf16_tensor; bf16_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &bf16_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &bf16_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::FLOAT32); auto *fp32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *bf16_data = bf16_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *bf16_data = bf16_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { fp32_data[i] = static_cast(bf16_data[i]); } @@ -773,12 +769,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( } else if (weight_tensor.dtype() == phi::DataType::FLOAT16) { phi::DenseTensor fp16_tensor; fp16_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &fp16_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &fp16_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::FLOAT32); auto *fp32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *fp16_data = fp16_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *fp16_data = fp16_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { fp32_data[i] = static_cast(fp16_data[i]); } @@ -787,12 +782,11 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( } else if (weight_tensor.dtype() == phi::DataType::INT64) { phi::DenseTensor int64_tensor; int64_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &int64_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &int64_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::INT32); auto *int32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *int64_data = int64_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *int64_data = int64_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { int32_data[i] = int64_data[i]; } @@ -814,7 +808,7 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; std::string name_with_suffix = name + splitter + name_suffix; - phi::CPUPlace cpu_place; + CPUPlace cpu_place; PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0, common::errors::AlreadyExists( @@ -835,12 +829,11 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { phi::DenseTensor bf16_tensor; bf16_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &bf16_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &bf16_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::FLOAT32); auto *fp32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *bf16_data = bf16_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *bf16_data = bf16_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { fp32_data[i] = static_cast(bf16_data[i]); } @@ -849,12 +842,11 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( } else if (weight_tensor.dtype() == phi::DataType::INT64) { phi::DenseTensor int64_tensor; int64_tensor.clear(); - paddle::framework::TensorCopySync( - weight_tensor, phi::CPUPlace(), &int64_tensor); + paddle::framework::TensorCopySync(weight_tensor, CPUPlace(), &int64_tensor); weight_map[name_with_suffix]->set_type(phi::DataType::INT32); auto *int32_data = - weight_map[name_with_suffix]->mutable_data(phi::CPUPlace()); - auto *int64_data = int64_tensor.mutable_data(phi::CPUPlace()); + weight_map[name_with_suffix]->mutable_data(CPUPlace()); + auto *int64_data = int64_tensor.mutable_data(CPUPlace()); for (int i = 0; i < weight_tensor.numel(); i++) { int32_data[i] = int64_data[i]; } diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 6d2c0af53b3004..a94f3db0e34e58 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -698,7 +698,7 @@ class PADDLE_API TRTEngineManager { } void* GetContextMemory(PredictorID predictor_id, - const phi::GPUPlace& place, + const GPUPlace& place, const phi::Stream& stream) { std::lock_guard lock(mutex_); static auto alignment = GetAlignmentSize(place); @@ -719,7 +719,7 @@ class PADDLE_API TRTEngineManager { } private: - size_t GetAlignmentSize(const phi::GPUPlace& place) { + size_t GetAlignmentSize(const GPUPlace& place) { const auto& prop = platform::GetDeviceProperties(place.GetDeviceId()); return prop.textureAlignment; } diff --git a/paddle/fluid/inference/tensorrt/pir/generic_plugin.cu b/paddle/fluid/inference/tensorrt/pir/generic_plugin.cu index ca4049adac3432..f648beb3fec996 100644 --- a/paddle/fluid/inference/tensorrt/pir/generic_plugin.cu +++ b/paddle/fluid/inference/tensorrt/pir/generic_plugin.cu @@ -493,7 +493,7 @@ int GenericPlugin::initialize() TRT_NOEXCEPT { op_name_.c_str())); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); - phi::GPUPlace place(phi::backends::gpu::GetCurrentDeviceId()); + GPUPlace place(phi::backends::gpu::GetCurrentDeviceId()); auto* dev_ctx = static_cast(pool.Get(place)); std::vector precision_types{phi::DataType::FLOAT32, @@ -573,7 +573,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { - phi::GPUPlace place(phi::backends::gpu::GetCurrentDeviceId()); + GPUPlace place(phi::backends::gpu::GetCurrentDeviceId()); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); // TODO(inference): generic plugin do not support INT8 precision now. auto nvType2PhiType = diff --git a/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu index 7522d847c93124..0fb37e1c458b1a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu @@ -477,7 +477,7 @@ int CustomGenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); // TODO(inference): custom generic plugin do not support INT8 precision now. auto protoType2PhiType = [&](GenerateCustomGenericPluginDataType proto_type, diff --git a/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu index 9bbc96bf9379e6..91d43f729d7068 100644 --- a/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu @@ -559,7 +559,7 @@ void gemm_impl_new(int m, const T* beta, T* C) { auto* device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(0))); + phi::DeviceContextPool::Instance().Get(GPUPlace(0))); const phi::GPUContext& dev_ctx = *device_ctx; typedef typename CUDATypeTraits::TYPE run_type; diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu index 117b492fa232bf..dad0bc195f3526 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu @@ -127,7 +127,7 @@ void ElementwiseAddTransposePluginDynamic::configurePlugin( } ele_out_tensor_.Resize(common::make_ddim(x_shape)); phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance(); - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto *device_context = static_cast(pool.Get(place)); const phi::GPUContext &dev_ctx = *device_context; @@ -171,7 +171,7 @@ int ElementwiseAddTransposePluginDynamic::enqueue( void *workspace, cudaStream_t stream) TRT_NOEXCEPT { phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance(); - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto *device_context = static_cast(pool.Get(place)); const phi::GPUContext &dev_ctx = *device_context; diff --git a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu index 6c4695afd66a63..628a1062fb52e4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu @@ -389,7 +389,7 @@ int FusedTokenPrunePluginDynamic::enqueue( NULL, temp_storage_bytes, pruned_token_lengths_, output3, B + 1); // Allocate temporary storage - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto d_temp_storage = phi::memory_utils::Alloc(place, temp_storage_bytes); // Run exclusive prefix sum diff --git a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu index 67f149d5431a91..d35be103ef7131 100644 --- a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu @@ -504,7 +504,7 @@ int GenericPlugin::initialize() TRT_NOEXCEPT { op_type.c_str())); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto* dev_ctx = static_cast(pool.Get(place)); std::vector precision_types{phi::DataType::FLOAT32, @@ -595,7 +595,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); // TODO(inference): generic plugin do not support INT8 precision now. auto nvType2PhiType = diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index 197a828d12af28..2f6594cb68dac7 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -74,8 +74,8 @@ int InstanceNormPlugin::enqueue(int batch_size, bias_t.Resize(common::make_ddim({batch_size, c})); int device_id; cudaGetDevice(&device_id); - float *scale_d = scale_t.mutable_data(phi::GPUPlace(device_id)); - float *bias_d = bias_t.mutable_data(phi::GPUPlace(device_id)); + float *scale_d = scale_t.mutable_data(GPUPlace(device_id)); + float *bias_d = bias_t.mutable_data(GPUPlace(device_id)); for (int i = 0; i < batch_size; i++) { cudaMemcpyAsync(scale_d + i * c, @@ -150,8 +150,8 @@ int InstanceNormPluginEnqueue(const nvinfer1::PluginTensorDesc *inputDesc, bias_t.Resize(common::make_ddim({n, c})); int device_id; cudaGetDevice(&device_id); - float *scale_d = scale_t.mutable_data(phi::GPUPlace(device_id)); - float *bias_d = bias_t.mutable_data(phi::GPUPlace(device_id)); + float *scale_d = scale_t.mutable_data(GPUPlace(device_id)); + float *bias_d = bias_t.mutable_data(GPUPlace(device_id)); for (int i = 0; i < n; i++) { cudaMemcpyAsync(scale_d + i * c, diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu index cf57ee90260e5e..d750857df27095 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu @@ -124,8 +124,8 @@ int LayerNormPlugin::enqueue(int batch_size, cudaGetDevice(&device_id); mean_t.Resize(common::make_ddim({batched_mean_shape})); variance_t.Resize(common::make_ddim({batched_variance_shape})); - float *mean_d = mean_t.mutable_data(phi::GPUPlace(device_id)); - float *variance_d = variance_t.mutable_data(phi::GPUPlace(device_id)); + float *mean_d = mean_t.mutable_data(GPUPlace(device_id)); + float *variance_d = variance_t.mutable_data(GPUPlace(device_id)); auto input_type = getDataType(); if (input_type == nvinfer1::DataType::kFLOAT) { VLOG(1) << "TRT Plugin DataType selected. LayerNorm-->fp32"; @@ -325,8 +325,8 @@ int LayerNormPluginDynamic::enqueue( cudaGetDevice(&device_id); mean_t.Resize(common::make_ddim(mean_shape_)); variance_t.Resize(common::make_ddim(variance_shape_)); - float *mean_d = mean_t.mutable_data(phi::GPUPlace(device_id)); - float *variance_d = variance_t.mutable_data(phi::GPUPlace(device_id)); + float *mean_d = mean_t.mutable_data(GPUPlace(device_id)); + float *variance_d = variance_t.mutable_data(GPUPlace(device_id)); auto input_type = input_desc[0].type; if (input_type == nvinfer1::DataType::kFLOAT) { VLOG(1) << "TRT Plugin DataType selected. LayerNorm-->fp32"; diff --git a/paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu index e115e525130252..155b1522f64ff7 100644 --- a/paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu @@ -190,10 +190,10 @@ int MultiheadMatmulRoformerPlugin::enqueue( if (input_type == nvinfer1::DataType::kFLOAT) { VLOG(1) << "TRT Plugin DataType selected. RoformerQkvToContext-->fp32"; auto *multihead_temp_data = - multihead_temp_tensor.mutable_data(phi::GPUPlace(device_id)); + multihead_temp_tensor.mutable_data(GPUPlace(device_id)); auto *temp_roformer_data = temp_roformer_tensor.mutable_data( // NOLINT - phi::GPUPlace(device_id)); + GPUPlace(device_id)); auto *tmp_roformer_ptr = reinterpret_cast(temp_roformer_data); auto *qkptr = multihead_temp_data; auto *tptr = multihead_temp_data + scratch_size; @@ -205,7 +205,7 @@ int MultiheadMatmulRoformerPlugin::enqueue( if (ProductDim(input_desc[3].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id)); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id)); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast<<>>( @@ -242,7 +242,7 @@ int MultiheadMatmulRoformerPlugin::enqueue( head_size_); // k auto *device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(device_id))); + phi::DeviceContextPool::Instance().Get(GPUPlace(device_id))); const phi::GPUContext &dev_ctx = *device_ctx; phi::funcs::MultiheadGPUComputeFunctor multihead_compute_func; @@ -268,11 +268,11 @@ int MultiheadMatmulRoformerPlugin::enqueue( VLOG(1) << "TRT Plugin DataType selected. QkvToContext-->fp16"; auto *multihead_temp_data = multihead_temp_tensor.mutable_data( // NOLINT - phi::GPUPlace(device_id)); + GPUPlace(device_id)); auto *temp_roformer_data = temp_roformer_tensor.mutable_data( // NOLINT - phi::GPUPlace(device_id)); + GPUPlace(device_id)); half *tmp_roformer_ptr = reinterpret_cast(temp_roformer_data); half *qkptr = reinterpret_cast(multihead_temp_data); half *tptr = qkptr + scratch_size; @@ -284,7 +284,7 @@ int MultiheadMatmulRoformerPlugin::enqueue( if (ProductDim(input_desc[3].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = reinterpret_cast( - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id))); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id))); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast<<>>( @@ -304,7 +304,7 @@ int MultiheadMatmulRoformerPlugin::enqueue( cudaMemcpyDeviceToDevice); auto *device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(device_id))); + phi::DeviceContextPool::Instance().Get(GPUPlace(device_id))); int n_q = seq_len * head_number_ * head_size_ * batch; constexpr int threads = 128; diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index 3e135f7c3db943..28cab8d310d344 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -196,7 +196,7 @@ void QkvToContextPluginDynamic::configurePlugin( int device_id = 0; cudaGetDevice(&device_id); auto *device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(device_id))); + phi::DeviceContextPool::Instance().Get(GPUPlace(device_id))); const phi::GPUContext &dev_ctx = *device_ctx; auto stream = dev_ctx.stream(); tensor_.Resize({batch, seq_len, seq_len, head_number_}); @@ -204,12 +204,12 @@ void QkvToContextPluginDynamic::configurePlugin( tensor_.Resize({batch, seq_len, seq_len, 1}); int blocks = batch * 1 * seq_len; mask_half_ = reinterpret_cast( - tensor_.mutable_data(phi::GPUPlace(device_id))); + tensor_.mutable_data(GPUPlace(device_id))); reset_qk_bias<<>>( mask_half_, real_seq_len, seq_len); } else if (in[0].desc.type == nvinfer1::DataType::kFLOAT) { fake_qk_bias_ = reinterpret_cast( - tensor_.mutable_data(phi::GPUPlace(device_id))); + tensor_.mutable_data(GPUPlace(device_id))); int64_t size = sizeof(int32_t) * batch * seq_len * seq_len * head_number_; #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( @@ -336,7 +336,7 @@ int QkvToContextPluginDynamic::enqueue( if (input_type == nvinfer1::DataType::kFLOAT) { VLOG(1) << "TRT Plugin DataType selected. QkvToContext-->fp32"; auto *multihead_temp_data = - multihead_temp_tensor.mutable_data(phi::GPUPlace(device_id)); + multihead_temp_tensor.mutable_data(GPUPlace(device_id)); auto *qkptr = multihead_temp_data; auto *tptr = multihead_temp_data + scratch_size; @@ -347,7 +347,7 @@ int QkvToContextPluginDynamic::enqueue( if (ProductDim(input_desc[1].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id)); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id)); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast<<>>( @@ -361,7 +361,7 @@ int QkvToContextPluginDynamic::enqueue( if (ProductDim(input_desc[1].dims) == (seq_len * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = reinterpret_cast( - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id))); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id))); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast_batch_head_number<<>>( @@ -382,7 +382,7 @@ int QkvToContextPluginDynamic::enqueue( batch, seq_len, head_size_, head_number_, input0_data, tptr, stream); auto *device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(device_id))); + phi::DeviceContextPool::Instance().Get(GPUPlace(device_id))); const phi::GPUContext &dev_ctx = *device_ctx; phi::funcs::MultiheadGPUComputeFunctor multihead_compute_func; @@ -418,7 +418,7 @@ int QkvToContextPluginDynamic::enqueue( } auto *multihead_temp_data = multihead_temp_tensor.mutable_data( // NOLINT - phi::GPUPlace(device_id)); + GPUPlace(device_id)); half *qkptr = reinterpret_cast(multihead_temp_data); half *tptr = qkptr + scratch_size; @@ -430,7 +430,7 @@ int QkvToContextPluginDynamic::enqueue( if (ProductDim(input_desc[1].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = reinterpret_cast( - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id))); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id))); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast<<>>( @@ -444,7 +444,7 @@ int QkvToContextPluginDynamic::enqueue( if (ProductDim(input_desc[1].dims) == (seq_len * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); auto *temp_qk_bias = reinterpret_cast( - temp_qk_bias_tensor.mutable_data(phi::GPUPlace(device_id))); + temp_qk_bias_tensor.mutable_data(GPUPlace(device_id))); int grid = batch * head_number_ * seq_len; int block = round_up(seq_len); broadcast_batch_head_number<<>>( @@ -481,7 +481,7 @@ int QkvToContextPluginDynamic::enqueue( } auto *device_ctx = static_cast( - phi::DeviceContextPool::Instance().Get(phi::GPUPlace(device_id))); + phi::DeviceContextPool::Instance().Get(GPUPlace(device_id))); int n_q = seq_len * head_number_ * head_size_ * batch; constexpr int threads = 128; diff --git a/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu index 151d00f9e45494..63c37e0f092749 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu @@ -363,7 +363,7 @@ int TransLayerNormPluginDynamic::enqueue( auto input_type = input_desc[0].type; phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance(); - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto *device_context = static_cast(pool.Get(place)); const phi::GPUContext &dev_ctx = *device_context; diff --git a/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu index 9ee2cf83f248f4..a5a9afb98bd05b 100644 --- a/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu @@ -186,7 +186,7 @@ int TransformerInputConvertPlugin::enqueue( cub::DeviceScan::ExclusiveSum( NULL, temp_storage_bytes, input1, output2, B + 1); // Allocate temporary storage - phi::GPUPlace place(platform::GetCurrentDeviceId()); + GPUPlace place(platform::GetCurrentDeviceId()); auto d_temp_storage = phi::memory_utils::Alloc(place, temp_storage_bytes); // Run exclusive prefix sum cub::DeviceScan::ExclusiveSum(