PaddlePaddle · luotao1 · Jan 6, 2026 · Jan 5, 2026
diff --git a/paddle/fluid/pybind/args_mapper.cc b/paddle/fluid/pybind/args_mapper.cc
@@ -133,15 +133,13 @@ void ArgMaxMinMapper(PyObject* args,
   if (axis_obj == Py_None || axis_obj == nullptr) {
     *flatten = true;
     *axis = paddle::dialect::full(
-        std::vector<int64_t>{1}, 0, phi::DataType::INT64, phi::CPUPlace());
+        std::vector<int64_t>{1}, 0, phi::DataType::INT64, CPUPlace());
   } else if (PyObject_CheckIRValue(axis_obj)) {
     *axis = CastPyArg2Value(axis_obj, "argmax", 1);
   } else {
     int64_t axis_tmp = CastPyArg2Long(axis_obj, "argmax", 1);
-    *axis = paddle::dialect::full(std::vector<int64_t>{1},
-                                  axis_tmp,
-                                  phi::DataType::INT64,
-                                  phi::CPUPlace());
+    *axis = paddle::dialect::full(
+        std::vector<int64_t>{1}, axis_tmp, phi::DataType::INT64, CPUPlace());
   }
   *keepdims = CastPyArg2Boolean(keepdims_obj, "argmax", 2, false);
 
@@ -251,7 +249,7 @@ void ArgSumMapper(PyObject* args,
   } else {
     std::vector<int64_t> axis_tmp = CastPyArg2Longs(axis_obj, "sum", 1, {});
     *axis = paddle::dialect::full_int_array(
-        axis_tmp, phi::DataType::INT64, phi::CPUPlace());
+        axis_tmp, phi::DataType::INT64, CPUPlace());
   }
 
   PyObject* py_obj_1 = GetItemFromArgsOrKWArgs(

diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc
@@ -67,7 +67,7 @@ PY_STREAM_TYPE get_legacy_default_stream(int device_id) {
   if (device_id == -1) {
     device_id = phi::backends::gpu::GetCurrentDeviceId();
   }
-  phi::GPUPlace place(device_id);
+  GPUPlace place(device_id);
 
   legacy_default_streams.try_emplace(
       device_id, place, static_cast<gpuStream_t>(0));
@@ -128,8 +128,7 @@ void BindCudaStream(py::module *m_ptr) {
 #endif
           gpuStream_t raw = reinterpret_cast<gpuStream_t>(data_ptr);
 
-          return std::make_unique<phi::CUDAStream>(phi::GPUPlace(device_id),
-                                                   raw);
+          return std::make_unique<phi::CUDAStream>(GPUPlace(device_id), raw);
 #else
         PADDLE_THROW(common::errors::Unavailable(
             "Paddle is not compiled with CUDA/HIP, "
@@ -357,12 +356,11 @@ void BindCudaStream(py::module *m_ptr) {
 
           )DOC")
       .def_property_readonly(
-          "place",
-          [](phi::CUDAStream &self) { return phi::GPUPlace(self.place()); })
+          "place", [](phi::CUDAStream &self) { return GPUPlace(self.place()); })
 #endif
       .def(
           "__init__",
-          [](phi::CUDAStream &self, phi::GPUPlace *place, int priority) {
+          [](phi::CUDAStream &self, GPUPlace *place, int priority) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
             if (priority != 1 && priority != 2) {
               PADDLE_THROW(common::errors::InvalidArgument(
@@ -372,7 +370,7 @@ void BindCudaStream(py::module *m_ptr) {
             auto stream_flag = phi::CUDAStream::StreamFlag::kStreamNonBlocking;
             if (place == nullptr) {
               int curr_device_id = platform::GetCurrentDeviceId();
-              auto place_tmp = phi::GPUPlace(curr_device_id);
+              auto place_tmp = GPUPlace(curr_device_id);
               new (&self) phi::CUDAStream(place_tmp, priority - 2, stream_flag);
             } else {
               // setting priority 1(high) and 2(normal) correspond to the actual
@@ -409,8 +407,8 @@ void BindCudaStream(py::module *m_ptr) {
             auto stream_flag = phi::CUDAStream::StreamFlag::kStreamNonBlocking;
             // setting priority 1(high) and 2(normal) correspond to the actual
             // cuda stream priority -1 and 0.
-            new (&self) phi::CUDAStream(
-                phi::GPUPlace(device), priority - 2, stream_flag);
+            new (&self)
+                phi::CUDAStream(GPUPlace(device), priority - 2, stream_flag);
 #else
             PADDLE_THROW(common::errors::Unavailable(
         "Class CUDAStream can only be initialized on the GPU platform."));
@@ -422,8 +420,8 @@ void BindCudaStream(py::module *m_ptr) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
         int device_id = platform::GetCurrentDeviceId();
         auto stream_flag = phi::CUDAStream::StreamFlag::kStreamNonBlocking;
-        new (&self) phi::CUDAStream(
-            phi::GPUPlace(device_id), /*priority=*/0, stream_flag);
+        new (&self)
+            phi::CUDAStream(GPUPlace(device_id), /*priority=*/0, stream_flag);
 #else
             PADDLE_THROW(common::errors::Unavailable(
         "Class CUDAStream can only be initialized on the GPU platform."));

diff --git a/paddle/fluid/pybind/cudart_py.cc b/paddle/fluid/pybind/cudart_py.cc
@@ -169,7 +169,7 @@ void BindCudaRt(py::module* m) {
 #endif
 
   cudart.def("cudaMemGetInfo", [](int device) -> std::pair<size_t, size_t> {
-    const auto& place = phi::GPUPlace(device);
+    const auto& place = GPUPlace(device);
     platform::CUDADeviceGuard cuda_guard(place);
     size_t device_free = 0;
     size_t device_total = 0;
@@ -399,7 +399,7 @@ void BindCudaRt(py::module* m) {
       "cuda"
       "MemGetInfo",
       [](int device) -> std::pair<size_t, size_t> {
-        const auto& place = phi::GPUPlace(device);
+        const auto& place = GPUPlace(device);
         platform::CUDADeviceGuard cuda_guard(place);
         size_t device_free = 0;
         size_t device_total = 0;

diff --git a/paddle/fluid/pybind/data_set_py.cc b/paddle/fluid/pybind/data_set_py.cc
@@ -102,7 +102,7 @@ class IterableDatasetWrapper {
                           "Device number does not match reader number"));
     for (size_t i = 0; i < places_.size(); ++i) {
       data_feeds_[i]->AssignFeedVar(*scopes_[i]);
-      data_feeds_[i]->SetPlace(phi::CPUPlace());
+      data_feeds_[i]->SetPlace(CPUPlace());
       PADDLE_ENFORCE_EQ(data_feeds_[i]->Start(),
                         true,
                         common::errors::Unavailable(

diff --git a/paddle/fluid/pybind/deep_ep_api.cc b/paddle/fluid/pybind/deep_ep_api.cc
@@ -77,7 +77,7 @@ void BindDeepEPApi(pybind11::module *m) {
              cudaStream_t comm_stream = self.get_comm_stream();
              auto s = phi::Stream(reinterpret_cast<phi::StreamId>(comm_stream));
 #if defined(PADDLE_WITH_CUDA)
-             return phi::CUDAStream(phi::GPUPlace(device_id), s);
+             return phi::CUDAStream(GPUPlace(device_id), s);
 #elif defined(PADDLE_WITH_XPU)
              return phi::XPUCUDAStream(phi::XPUPlace(device_id), s);
 #endif

diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc
@@ -90,8 +90,7 @@ using GlooOptions = paddle::distributed::ProcessGroupGloo::GlooOptions;
 using ProcessGroupFlagcx = paddle::distributed::ProcessGroupFlagcx;
 #endif
 
-static UNUSED void *use_ccl_comm_func =
-    phi::detail::GetCCLComm(phi::CPUPlace());
+static UNUSED void *use_ccl_comm_func = phi::detail::GetCCLComm(CPUPlace());
 
 void BindDistributed(py::module *m) {
   py::enum_<distributed::ReduceOp>(*m, "ReduceOp")
@@ -576,8 +575,7 @@ void BindDistributed(py::module *m) {
                 auto *dev_ctx =
                     self.GetDeviceContext(in_tensor.place(), use_calc_stream);
                 SplitTensor(*dev_ctx, *out_dense, &out_tensor_list);
-                if (!use_calc_stream &&
-                    dev_ctx->GetPlace() != phi::CPUPlace()) {
+                if (!use_calc_stream && dev_ctx->GetPlace() != CPUPlace()) {
                   // calculate stream will wait comm stream
                   task->UpdateWaitChain(*dev_ctx);
                 }

diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
@@ -137,7 +137,7 @@ void EmptyStringTensorInitializer(TensorObject* self,
   auto ddims = common::make_ddim(dims);
   self->tensor.set_name(name);
   // Note(zhoushunjie): Only support CPUPlace when create StringTensor
-  auto actual_place = phi::CPUPlace();
+  auto actual_place = CPUPlace();
   // Allocate memory
   paddle::experimental::DefaultAllocator string_allocator(actual_place);
   std::shared_ptr<phi::StringTensor> string_tensor =
@@ -165,7 +165,7 @@ void InitTensorWithNumpyValue(TensorObject* self,
   phi::DenseTensor* impl_ptr =
       static_cast<phi::DenseTensor*>(self->tensor.impl().get());
   if (phi::is_cpu_place(place)) {
-    SetTensorFromPyArray<phi::CPUPlace>(impl_ptr, array, place, zero_copy);
+    SetTensorFromPyArray<CPUPlace>(impl_ptr, array, place, zero_copy);
   } else if (phi::is_xpu_place(place)) {
 #if defined(PADDLE_WITH_XPU)
     phi::backends::xpu::SetXPUDeviceId(place.device);
@@ -186,7 +186,7 @@ void InitTensorWithNumpyValue(TensorObject* self,
     PADDLE_THROW(common::errors::PreconditionNotMet(
         "PaddlePaddle should compile with GPU if use CUDAPlace."));
 #endif
-    SetTensorFromPyArray<phi::GPUPlace>(impl_ptr, array, place, zero_copy);
+    SetTensorFromPyArray<GPUPlace>(impl_ptr, array, place, zero_copy);
   } else if (phi::is_cuda_pinned_place(place)) {
     SetTensorFromPyArray<phi::GPUPinnedPlace>(
         impl_ptr, array, place, zero_copy);
@@ -227,7 +227,7 @@ void InitStringTensorWithNumpyValue(TensorObject* self, const py::object& obj) {
   phi::Place place = impl_ptr->place();
   auto array = obj.cast<py::array>();
   if (phi::is_cpu_place(place)) {
-    SetStringTensorFromPyArray<phi::CPUPlace>(impl_ptr, array, place);
+    SetStringTensorFromPyArray<CPUPlace>(impl_ptr, array, place);
   } else {
     PADDLE_THROW(common::errors::InvalidArgument(
         "StringTensor only support CPUPlace now, but receive %s",

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
@@ -102,7 +102,7 @@ class EagerNumpyAllocation : public phi::Allocation {
       : Allocation(
             static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data),
             phi::SizeOf(dtype) * PyArray_Size_(numpy_data),
-            phi::CPUPlace()),
+            CPUPlace()),
         arr_(numpy_data) {
     PADDLE_ENFORCE_NOT_NULL(
         arr_,

diff --git a/paddle/fluid/pybind/eager_math_op_patch.cc b/paddle/fluid/pybind/eager_math_op_patch.cc
@@ -88,14 +88,14 @@ void InitTensorWithNumpyValue(const py::object& array,
   phi::DenseTensor* impl_ptr =
       static_cast<phi::DenseTensor*>(self->impl().get());
   if (phi::is_cpu_place(place)) {
-    SetTensorFromPyArray<phi::CPUPlace>(impl_ptr, array, place, zero_copy);
+    SetTensorFromPyArray<CPUPlace>(impl_ptr, array, place, zero_copy);
   } else if (phi::is_xpu_place(place)) {
     SetTensorFromPyArray<phi::XPUPlace>(impl_ptr, array, place, zero_copy);
   } else if (phi::is_xpu_pinned_place(place)) {
     SetTensorFromPyArray<phi::XPUPinnedPlace>(
         impl_ptr, array, place, zero_copy);
   } else if (phi::is_gpu_place(place)) {
-    SetTensorFromPyArray<phi::GPUPlace>(impl_ptr, array, place, zero_copy);
+    SetTensorFromPyArray<GPUPlace>(impl_ptr, array, place, zero_copy);
   } else if (phi::is_cuda_pinned_place(place)) {
     SetTensorFromPyArray<phi::GPUPinnedPlace>(
         impl_ptr, array, place, zero_copy);

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
@@ -250,12 +250,12 @@ static PyObject* tensor_method_numpy(TensorObject* self,
   }
 
   phi::DenseTensor cpu_tensor;
-  phi::CPUPlace cpu_place;
+  CPUPlace cpu_place;
 
   if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned() ||
       self->tensor.is_xpu_pinned()) {
     eager_gil_scoped_release guard;
-    phi::CPUPlace place;
+    CPUPlace place;
     if (self->tensor.is_selected_rows()) {
       VLOG(6) << "Getting SelectedRows's numpy value";
       auto* selected_rows =
@@ -382,7 +382,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
 #endif
 #if defined(PADDLE_WITH_XPU)
   } else if (self->tensor.is_xpu()) {
-    phi::CPUPlace place;
+    CPUPlace place;
     if (self->tensor.is_selected_rows()) {
       VLOG(6) << "Getting SelectedRows's numpy value";
       auto* selected_rows =
@@ -3165,11 +3165,8 @@ static PyObject* tensor_method__share_memory(TensorObject* self,
   const std::string& ipc_name = shared_writer_holder->ipc_name();
   memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name);
   // 4. copy data & reset holder
-  memory::Copy(phi::CPUPlace(),
-               shared_writer_holder->ptr(),
-               phi::CPUPlace(),
-               data_ptr,
-               data_size);
+  memory::Copy(
+      CPUPlace(), shared_writer_holder->ptr(), CPUPlace(), data_ptr, data_size);
   t->ResetHolder(shared_writer_holder);
   return ToPyObject(t);
 #else

diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
@@ -735,9 +735,9 @@ phi::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
   if (PyObject_TypeCheck(obj, g_place_pytype)) {  // NOLINT
     place = ::pybind11::handle(obj).cast<phi::Place>();
   } else if (PyObject_TypeCheck(obj, g_cudaplace_pytype)) {
-    place = ::pybind11::handle(obj).cast<phi::GPUPlace>();
+    place = ::pybind11::handle(obj).cast<GPUPlace>();
   } else if (PyObject_TypeCheck(obj, g_cpuplace_pytype)) {
-    place = ::pybind11::handle(obj).cast<phi::CPUPlace>();
+    place = ::pybind11::handle(obj).cast<CPUPlace>();
   } else if (PyObject_TypeCheck(obj, g_xpuplace_pytype)) {
     place = ::pybind11::handle(obj).cast<phi::XPUPlace>();
   } else if (PyObject_TypeCheck(obj, g_cudapinnedplace_pytype)) {
@@ -2464,7 +2464,7 @@ std::vector<pir::Value> CastPyArg2VectorOfValueOrLong(
     } else if (PyObject_CheckLong(item)) {
       int64_t k_tmp = CastPyArg2Long(item, op_type, arg_pos);
       value_list.emplace_back(
-          paddle::dialect::full(shape, k_tmp, dtype, phi::CPUPlace()));
+          paddle::dialect::full(shape, k_tmp, dtype, CPUPlace()));
     } else if (item == Py_None) {
       continue;  // skip
     } else {

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -120,10 +120,10 @@ class PyVariableWrapperHook : public imperative::VariableWrapperHook {
 };
 
 static const phi::Place PyObjectToPlace(const py::object &place_obj) {
-  if (py::isinstance<phi::CPUPlace>(place_obj)) {
-    return place_obj.cast<phi::CPUPlace>();
-  } else if (py::isinstance<phi::GPUPlace>(place_obj)) {
-    return place_obj.cast<phi::GPUPlace>();
+  if (py::isinstance<CPUPlace>(place_obj)) {
+    return place_obj.cast<CPUPlace>();
+  } else if (py::isinstance<GPUPlace>(place_obj)) {
+    return place_obj.cast<GPUPlace>();
   } else if (py::isinstance<phi::XPUPlace>(place_obj)) {
     return place_obj.cast<phi::XPUPlace>();
   } else if (py::isinstance<phi::GPUPinnedPlace>(place_obj)) {
@@ -177,11 +177,11 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
   auto *tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
   VLOG(4) << "zero_copy: " << zero_copy;
   if (phi::is_cpu_place(place)) {
-    SetTensorFromPyArray<phi::CPUPlace>(tensor, array, place, zero_copy);
+    SetTensorFromPyArray<CPUPlace>(tensor, array, place, zero_copy);
   } else if (phi::is_xpu_place(place)) {
     SetTensorFromPyArray<phi::XPUPlace>(tensor, array, place, zero_copy);
   } else if (phi::is_gpu_place(place)) {
-    SetTensorFromPyArray<phi::GPUPlace>(tensor, array, place, zero_copy);
+    SetTensorFromPyArray<GPUPlace>(tensor, array, place, zero_copy);
   } else if (phi::is_cuda_pinned_place(place)) {
     SetTensorFromPyArray<phi::GPUPinnedPlace>(tensor, array, place, zero_copy);
   } else if (phi::is_xpu_pinned_place(place)) {
@@ -528,7 +528,7 @@ void BindImperative(py::module *m_ptr) {
                   "_generator' to locate the data causes this issue."));
           // 2. construct DenseTensor
           phi::DenseTensor t;
-          SetTensorFromPyArray<phi::CPUPlace>(&t, array, phi::CPUPlace(), true);
+          SetTensorFromPyArray<CPUPlace>(&t, array, CPUPlace(), true);
           // 3. allocate shared memory
           void *data_ptr = t.data();
           size_t data_size = t.numel() * phi::SizeOf(t.dtype());
@@ -538,9 +538,9 @@ void BindImperative(py::module *m_ptr) {
           const std::string &ipc_name = shared_writer_holder->ipc_name();
           memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name);
           // 5. copy data & reset holder
-          memory::Copy(phi::CPUPlace(),
+          memory::Copy(CPUPlace(),
                        shared_writer_holder->ptr(),
-                       phi::CPUPlace(),
+                       CPUPlace(),
                        data_ptr,
                        data_size);
           t.ResetHolder(shared_writer_holder);
@@ -567,7 +567,7 @@ void BindImperative(py::module *m_ptr) {
                 "_generator' to locate the data causes this issue."));
         // 2. construct DenseTensor
         phi::DenseTensor t;
-        SetTensorFromPyArray<phi::CPUPlace>(&t, array, phi::CPUPlace(), true);
+        SetTensorFromPyArray<CPUPlace>(&t, array, CPUPlace(), true);
         // 3. allocate shared memory
         void *data_ptr = t.data();
         size_t data_size = t.numel() * phi::SizeOf(t.dtype());
@@ -577,9 +577,9 @@ void BindImperative(py::module *m_ptr) {
         const std::string &ipc_name = shared_writer_holder->ipc_name();
         memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name);
         // 5. copy data & reset holder
-        memory::Copy(phi::CPUPlace(),
+        memory::Copy(CPUPlace(),
                      shared_writer_holder->ptr(),
-                     phi::CPUPlace(),
+                     CPUPlace(),
                      data_ptr,
                      data_size);
         t.ResetHolder(shared_writer_holder);
@@ -692,8 +692,8 @@ void BindImperative(py::module *m_ptr) {
             return py::cast(self.ExpectedPlace());
           },
           [](imperative::Tracer &self, const py::object &obj) {
-            if (py::isinstance<phi::GPUPlace>(obj)) {
-              auto p = obj.cast<phi::GPUPlace *>();
+            if (py::isinstance<GPUPlace>(obj)) {
+              auto p = obj.cast<GPUPlace *>();
               self.SetExpectedPlace(*p);
               // TODO(jiabin): Support eager here when we need to make all
               // dygraph in eager mode
@@ -704,8 +704,8 @@ void BindImperative(py::module *m_ptr) {
               self.SetExpectedPlace(*p);
               VLOG(4) << "Tracer(" << &self << ")"
                       << " set expected place " << *p;
-            } else if (py::isinstance<phi::CPUPlace>(obj)) {
-              auto p = obj.cast<phi::CPUPlace *>();
+            } else if (py::isinstance<CPUPlace>(obj)) {
+              auto p = obj.cast<CPUPlace *>();
               self.SetExpectedPlace(*p);
               VLOG(4) << "Tracer(" << &self << ")"
                       << " set expected place " << *p;
@@ -844,8 +844,8 @@ void BindImperative(py::module *m_ptr) {
           });
 
   m.def("varbase_copy", &VarBaseCopy<phi::Place>);
-  m.def("varbase_copy", &VarBaseCopy<phi::CPUPlace>);
-  m.def("varbase_copy", &VarBaseCopy<phi::GPUPlace>);
+  m.def("varbase_copy", &VarBaseCopy<CPUPlace>);
+  m.def("varbase_copy", &VarBaseCopy<GPUPlace>);
   m.def("varbase_copy", &VarBaseCopy<phi::XPUPlace>);
   m.def("varbase_copy", &VarBaseCopy<phi::GPUPinnedPlace>);
   m.def("varbase_copy", &VarBaseCopy<phi::XPUPinnedPlace>);
@@ -925,8 +925,7 @@ void BindImperative(py::module *m_ptr) {
              imperative::ParallelContext,
              std::shared_ptr<imperative::NCCLParallelContext>>(
       m, "NCCLParallelContext")
-      .def(py::init<const imperative::ParallelStrategy &,
-                    const phi::GPUPlace &>())
+      .def(py::init<const imperative::ParallelStrategy &, const GPUPlace &>())
       .def("init", [](imperative::NCCLParallelContext &self) { self.Init(); })
       .def("init_with_ring_id",
            &imperative::NCCLParallelContext::InitWithRingID,
@@ -965,8 +964,7 @@ void BindImperative(py::module *m_ptr) {
              imperative::ParallelContext,
              std::shared_ptr<imperative::GLOOParallelContext>>(
       m, "GLOOParallelContext")
-      .def(py::init<const imperative::ParallelStrategy &,
-                    const phi::CPUPlace &>())
+      .def(py::init<const imperative::ParallelStrategy &, const CPUPlace &>())
       .def("init", [](imperative::GLOOParallelContext &self) { self.Init(); })
       .def("init_with_ring_id",
            &imperative::GLOOParallelContext::InitWithRingID,