optimize phi::CPUPlace in kernels/impl/

co63oc · co63oc · commit 15c46875ad67 · 2026-01-05T13:27:01.000+08:00
diff --git a/paddle/phi/kernels/impl/accuracy_check_kernel_impl.h b/paddle/phi/kernels/impl/accuracy_check_kernel_impl.h
@@ -251,7 +251,7 @@ struct AccuracyCheckFunctor<phi::GPUContext, T> {
         in_data, other_data, rtol, atol, equal_nan, num, out_data);
 
     DenseTensor out_cpu;
-    Copy(dev_ctx, *output, phi::CPUPlace(), true, &out_cpu);
+    Copy(dev_ctx, *output, CPUPlace(), true, &out_cpu);
     auto data_ptr = out_cpu.data<bool>();
 
     PADDLE_ENFORCE_EQ(*data_ptr,
diff --git a/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h b/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h
@@ -48,18 +48,17 @@ void AverageAccumulatesKernel(const Context& dev_ctx,
   // int64_t num_accumulates = 0;
   // int64_t old_num_accumulates = 0;
 
-  auto num_updates_cpu =
-      phi::memory_utils::Alloc(phi::CPUPlace(), sizeof(int64_t));
+  auto num_updates_cpu = phi::memory_utils::Alloc(CPUPlace(), sizeof(int64_t));
   int64_t* num_updates_cpu_ptr =
       reinterpret_cast<int64_t*>(num_updates_cpu->ptr());
 
   auto num_accumulates_cpu =
-      phi::memory_utils::Alloc(phi::CPUPlace(), sizeof(int64_t));
+      phi::memory_utils::Alloc(CPUPlace(), sizeof(int64_t));
   int64_t* num_accumulates_cpu_ptr =
       reinterpret_cast<int64_t*>(num_accumulates_cpu->ptr());
 
   auto old_num_accumulates_cpu =
-      phi::memory_utils::Alloc(phi::CPUPlace(), sizeof(int64_t));
+      phi::memory_utils::Alloc(CPUPlace(), sizeof(int64_t));
   int64_t* old_num_accumulates_cpu_ptr =
       reinterpret_cast<int64_t*>(old_num_accumulates_cpu->ptr());
 
diff --git a/paddle/phi/kernels/impl/beam_search_decode_kernel_impl.h b/paddle/phi/kernels/impl/beam_search_decode_kernel_impl.h
@@ -49,7 +49,7 @@ struct BeamSearchDecodeFunctor {
           if (tensor_on_gpu_) {
             dev_ctx->Wait();
           }
-          Copy(*dev_ctx, step_id, phi::CPUPlace(), false, &out);
+          Copy(*dev_ctx, step_id, CPUPlace(), false, &out);
           dev_ctx->Wait();
         }
 
@@ -72,7 +72,7 @@ struct BeamSearchDecodeFunctor {
           if (tensor_on_gpu_) {
             dev_ctx->Wait();
           }
-          Copy(*dev_ctx, step_score, phi::CPUPlace(), false, &out);
+          Copy(*dev_ctx, step_score, CPUPlace(), false, &out);
           dev_ctx->Wait();
         }
 
diff --git a/paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h b/paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h
@@ -45,7 +45,7 @@ void ClipByNormFunctor(const Context& dev_ctx,
       temp + (static_cast<T>(1) - temp) * max_norm / (x_norm + epsilon);
   Eigen::array<int, 1> one_dim{{1}};
   Eigen::DSizes<int, 1> m_dsize(input->numel());
-  if (dev_ctx.GetPlace() == phi::CPUPlace()) {
+  if (dev_ctx.GetPlace() == CPUPlace()) {
     out.device(*place) = x * scaling.reshape(one_dim).eval().broadcast(m_dsize);
   } else {
     out.device(*place) = x * scaling.reshape(one_dim).broadcast(m_dsize);
diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h
@@ -92,7 +92,7 @@ inline bool CheckMatrixInvertible(const Context& dev_ctx,
 
   // copy to host
   DenseTensor cpu_tensor;
-  Copy<Context>(dev_ctx, dev_tensor, phi::CPUPlace(), false, &cpu_tensor);
+  Copy<Context>(dev_ctx, dev_tensor, CPUPlace(), false, &cpu_tensor);
 
   // if founded zero, the matrix is not invertible
   // else the matrix is invertible
diff --git a/paddle/phi/kernels/impl/fetch_impl.h b/paddle/phi/kernels/impl/fetch_impl.h
@@ -26,7 +26,7 @@ void FetchKernel(const Context& dev_ctx,
   if (!x.IsInitialized()) {
     return;
   }
-  Copy(dev_ctx, x, phi::CPUPlace(), true, out);
+  Copy(dev_ctx, x, CPUPlace(), true, out);
 }
 
 template <typename T, typename Context>
@@ -35,7 +35,7 @@ void FetchArrayKernel(const Context& dev_ctx,
                       TensorArray* out) {
   out->resize(x.size());
   for (size_t i = 0; i < x.size(); ++i) {
-    Copy(dev_ctx, x[i], phi::CPUPlace(), true, &(out->at(i)));
+    Copy(dev_ctx, x[i], CPUPlace(), true, &(out->at(i)));
   }
 }
 
diff --git a/paddle/phi/kernels/impl/gru_unit_kernel_impl.h b/paddle/phi/kernels/impl/gru_unit_kernel_impl.h
@@ -35,7 +35,7 @@ void ActCompute(
   } else if (act_type == tanh) {
     funcs::TanhFunctor<T>()(d, x, y);
   } else if (act_type == relu) {
-    if (place == phi::CPUPlace())
+    if (place == CPUPlace())
       funcs::ReluCPUFunctor<T>()(d, x, y);
     else
       funcs::ReluCUDAFunctor<T>()(d, x, y);
diff --git a/paddle/phi/kernels/impl/im2sequence_kernel_impl.h b/paddle/phi/kernels/impl/im2sequence_kernel_impl.h
@@ -50,7 +50,7 @@ void Im2SequenceKernel(const Context& dev_ctx,
     const DenseTensor* img_real_size = y.get_ptr();
 
     DenseTensor cpu_shape_tensor;
-    Copy(dev_ctx, *img_real_size, phi::CPUPlace(), true, &cpu_shape_tensor);
+    Copy(dev_ctx, *img_real_size, CPUPlace(), true, &cpu_shape_tensor);
     std::vector<int> img_real_h;
     std::vector<int> img_real_w;
     std::vector<int> output_height;
diff --git a/paddle/phi/kernels/impl/isclose_kernel_impl.h b/paddle/phi/kernels/impl/isclose_kernel_impl.h
@@ -267,7 +267,7 @@ struct GetTensorValue<phi::GPUContext, T> {
     T value;
     const auto gpu_place = dev_ctx.GetPlace();
     memory_utils::Copy(
-        phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());
+        CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());
     return value;
   }
 };
diff --git a/paddle/phi/kernels/impl/lamb_kernel_impl.h b/paddle/phi/kernels/impl/lamb_kernel_impl.h
@@ -186,8 +186,7 @@ void ComputeImpl(const Context& dev_ctx,
   // Diff from here
 
   if (dev_ctx.GetPlace().GetType() == AllocationType::GPU &&
-      beta1_pow.place() == phi::CPUPlace() &&
-      beta2_pow.place() == phi::CPUPlace()) {
+      beta1_pow.place() == CPUPlace() && beta2_pow.place() == CPUPlace()) {
     LambMomentREGUpdateFunctor<T, IsMultiPrecision> moment_update_functor(
         weight_decay,
         beta1,
diff --git a/paddle/phi/kernels/impl/lod_reset_kernel_impl.h b/paddle/phi/kernels/impl/lod_reset_kernel_impl.h
@@ -56,7 +56,7 @@ void LodResetKernel(const Context& dev_ctx,
       auto* lod = lod_t->data<int>();
       DenseTensor lod_cpu;
       if (lod_t->place().GetType() == phi::AllocationType::GPU) {
-        Copy(dev_ctx, *lod_t, phi::CPUPlace(), true, &lod_cpu);
+        Copy(dev_ctx, *lod_t, CPUPlace(), true, &lod_cpu);
         lod = lod_cpu.data<int>();
       }
       level0 = std::vector<int>(lod, lod + lod_t->numel());
diff --git a/paddle/phi/kernels/impl/lstsq_kernel_impl.h b/paddle/phi/kernels/impl/lstsq_kernel_impl.h
@@ -233,7 +233,7 @@ inline void BatchedOrmqr<GPUContext, float>(const GPUContext& dev_ctx,
 
     // check the error info
     int info_h;
-    memory_utils::Copy(phi::CPUPlace(),
+    memory_utils::Copy(CPUPlace(),
                        &info_h,
                        dev_ctx.GetPlace(),
                        info_d,
@@ -302,7 +302,7 @@ inline void BatchedOrmqr<GPUContext, double>(const GPUContext& dev_ctx,
 
     // check the error info
     int info_h;
-    memory_utils::Copy(phi::CPUPlace(),
+    memory_utils::Copy(CPUPlace(),
                        &info_h,
                        dev_ctx.GetPlace(),
                        info_d,
diff --git a/paddle/phi/kernels/impl/lu_kernel_impl.h b/paddle/phi/kernels/impl/lu_kernel_impl.h
@@ -478,7 +478,7 @@ void Unpack_Pivot(const Context& dev_ctx,
   auto prank = Pdimvec.size();
   auto Pnum = dims[prank - 1];
   DenseTensor Pivot_cpu;
-  phi::CPUPlace cpu;
+  CPUPlace cpu;
   Copy(dev_ctx, Pivot, cpu, false, &Pivot_cpu);
   auto pdataptr = Pivot_cpu.data<int32_t>();
   Pdimvec[prank - 1] = h;
diff --git a/paddle/phi/kernels/impl/numel_kernel_impl.h b/paddle/phi/kernels/impl/numel_kernel_impl.h
@@ -25,7 +25,7 @@ void NumelKernel(const Context& dev_ctx,
   auto place = dev_ctx.GetPlace();
   auto out_data = dev_ctx.template Alloc<int64_t>(out);
 
-  if (place == phi::CPUPlace()) {
+  if (place == CPUPlace()) {
     out_data[0] = input.numel();
   } else {
     DenseTensor cpu_tensor;
diff --git a/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h b/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h
@@ -78,7 +78,7 @@ void SequenceMaskKernel(const Context& dev_ctx,
     bool is_gpu_place = dev_ctx.GetPlace().GetType() == AllocationType::GPU;
     if (is_gpu_place) {
       DenseTensor temp;
-      Copy(dev_ctx, *max_len_tensor.get_ptr(), phi::CPUPlace(), false, &temp);
+      Copy(dev_ctx, *max_len_tensor.get_ptr(), CPUPlace(), false, &temp);
       maxlen = *temp.data<int32_t>();
     } else {
       maxlen = *max_len_tensor.get_ptr()->data<int32_t>();
diff --git a/paddle/phi/kernels/impl/sequence_pool_kernel_impl.h b/paddle/phi/kernels/impl/sequence_pool_kernel_impl.h
@@ -65,7 +65,7 @@ void SequencePoolKernel(const Context& dev_ctx,
 
   // Do not create index buffer for inference mode
   if (pooltype == "MAX" &&
-      (is_test == false || (dev_ctx.GetPlace() == phi::CPUPlace()) == false)) {
+      (is_test == false || (dev_ctx.GetPlace() == CPUPlace()) == false)) {
     index = max_index;
     index->Resize({dims});
     dev_ctx.template Alloc<int32_t>(index);
diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h
@@ -276,8 +276,8 @@ void WarpctcKernel(const Context& dev_ctx,
 
     DenseTensor logits_length_cpu;
     DenseTensor labels_length_cpu;
-    Copy(dev_ctx, *logits_length, phi::CPUPlace(), false, &logits_length_cpu);
-    Copy(dev_ctx, *labels_length, phi::CPUPlace(), false, &labels_length_cpu);
+    Copy(dev_ctx, *logits_length, CPUPlace(), false, &logits_length_cpu);
+    Copy(dev_ctx, *labels_length, CPUPlace(), false, &labels_length_cpu);
 
     logits_lod.push_back(0);
     label_lod.push_back(0);
@@ -358,7 +358,7 @@ void WarpctcKernel(const Context& dev_ctx,
     T* pad_value_data = dev_ctx.template HostAlloc<T>(&cpu_pad_value);
     *pad_value_data = static_cast<T>(0);
     DenseTensor pad_value;
-    if (dev_ctx.GetPlace() == phi::CPUPlace()) {
+    if (dev_ctx.GetPlace() == CPUPlace()) {
       pad_value = cpu_pad_value;
     } else {
       Copy(dev_ctx, cpu_pad_value, dev_ctx.GetPlace(), true, &pad_value);
@@ -401,7 +401,7 @@ void WarpctcKernel(const Context& dev_ctx,
     lod.push_back(label_lod);
     warpctc_label.set_lod(lod);
 
-    if (dev_ctx.GetPlace() == phi::CPUPlace()) {
+    if (dev_ctx.GetPlace() == CPUPlace()) {
       funcs::UnpaddingDenseTensorFunctor<Context, int>()(
           dev_ctx,
           label,
@@ -424,10 +424,10 @@ void WarpctcKernel(const Context& dev_ctx,
           0 /*lod_level*/,
           false /*norm_by_times*/,
           funcs::kBatchLengthWidth);
-      Copy(dev_ctx, gpu_label, phi::CPUPlace(), true, &warpctc_label);
+      Copy(dev_ctx, gpu_label, CPUPlace(), true, &warpctc_label);
     }
   } else {
-    Copy(dev_ctx, label, phi::CPUPlace(), true, &warpctc_label);
+    Copy(dev_ctx, label, CPUPlace(), true, &warpctc_label);
   }
 
   const int* warpctc_label_data = warpctc_label.data<int>();

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ struct BeamSearchDecodeFunctor {`
`49`	`49`	`if (tensor_on_gpu_) {`
`50`	`50`	`dev_ctx->Wait();`
`51`	`51`	`}`
`52`		`- Copy(*dev_ctx, step_id, phi::CPUPlace(), false, &out);`
	`52`	`+ Copy(*dev_ctx, step_id, CPUPlace(), false, &out);`
`53`	`53`	`dev_ctx->Wait();`
`54`	`54`	`}`
`55`	`55`
`@@ -72,7 +72,7 @@ struct BeamSearchDecodeFunctor {`
`72`	`72`	`if (tensor_on_gpu_) {`
`73`	`73`	`dev_ctx->Wait();`
`74`	`74`	`}`
`75`		`- Copy(*dev_ctx, step_score, phi::CPUPlace(), false, &out);`
	`75`	`+ Copy(*dev_ctx, step_score, CPUPlace(), false, &out);`
`76`	`76`	`dev_ctx->Wait();`
`77`	`77`	`}`
`78`	`78`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ void FetchKernel(const Context& dev_ctx,`
`26`	`26`	`if (!x.IsInitialized()) {`
`27`	`27`	`return;`
`28`	`28`	`}`
`29`		`- Copy(dev_ctx, x, phi::CPUPlace(), true, out);`
	`29`	`+ Copy(dev_ctx, x, CPUPlace(), true, out);`
`30`	`30`	`}`
`31`	`31`
`32`	`32`	`template <typename T, typename Context>`
`@@ -35,7 +35,7 @@ void FetchArrayKernel(const Context& dev_ctx,`
`35`	`35`	`TensorArray* out) {`
`36`	`36`	`out->resize(x.size());`
`37`	`37`	`for (size_t i = 0; i < x.size(); ++i) {`
`38`		`- Copy(dev_ctx, x[i], phi::CPUPlace(), true, &(out->at(i)));`
	`38`	`+ Copy(dev_ctx, x[i], CPUPlace(), true, &(out->at(i)));`
`39`	`39`	`}`
`40`	`40`	`}`
`41`	`41`
Original file line number	Diff line number	Diff line change
`@@ -267,7 +267,7 @@ struct GetTensorValue<phi::GPUContext, T> {`
`267`	`267`	`T value;`
`268`	`268`	`const auto gpu_place = dev_ctx.GetPlace();`
`269`	`269`	`memory_utils::Copy(`
`270`		`- phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());`
	`270`	`+ CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());`
`271`	`271`	`return value;`
`272`	`272`	`}`
`273`	`273`	`};`