diff --git a/paddle/fluid/pybind/compiled_program.cc b/paddle/fluid/pybind/compiled_program.cc
index 03b4314fca74a3..477dcca788d6fd 100644
--- a/paddle/fluid/pybind/compiled_program.cc
+++ b/paddle/fluid/pybind/compiled_program.cc
@@ -874,7 +874,7 @@ void BindCompiledProgram(pybind11::module &m) {  // NOLINT
           },
           py::return_value_policy::reference);
   using VarQuantScale =
-      std::unordered_map<std::string, std::pair<bool, phi::DenseTensor>>;
+      std::unordered_map<std::string, std::pair<bool, DenseTensor>>;
   py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass");
   pass.def(py::init())
       .def("has", &ir::Pass::Has)
diff --git a/paddle/fluid/pybind/data_set_py.cc b/paddle/fluid/pybind/data_set_py.cc
index 8d55c89819b260..3ff8e88979d03e 100644
--- a/paddle/fluid/pybind/data_set_py.cc
+++ b/paddle/fluid/pybind/data_set_py.cc
@@ -80,7 +80,7 @@ class IterableDatasetWrapper {
       tensors_.emplace_back();
       for (auto &var_name : slots_) {
         auto *var = scopes_.back()->Var(var_name);
-        auto *t = var->GetMutable<phi::DenseTensor>();
+        auto *t = var->GetMutable<DenseTensor>();
         tensors_.back().emplace_back(t);
       }
     }
@@ -114,7 +114,7 @@ class IterableDatasetWrapper {
     exhaustive_num_ = 0;
   }
 
-  std::vector<std::unordered_map<std::string, phi::DenseTensor>> Next() {
+  std::vector<std::unordered_map<std::string, DenseTensor>> Next() {
     PADDLE_ENFORCE_EQ(
         is_started_,
         true,
@@ -122,7 +122,7 @@ class IterableDatasetWrapper {
             "Reader must be started when getting next batch data."));
     size_t device_num = places_.size();
 
-    std::vector<std::unordered_map<std::string, phi::DenseTensor>> result(
+    std::vector<std::unordered_map<std::string, DenseTensor>> result(
         device_num);
 
     size_t read_num = 0;
@@ -176,7 +176,7 @@ class IterableDatasetWrapper {
   }
 
  private:
-  bool IsValidDenseTensor(const phi::DenseTensor &tensor) const {
+  bool IsValidDenseTensor(const DenseTensor &tensor) const {
     if (!drop_last_) return true;
     return static_cast<size_t>(tensor.dims()[0]) == batch_size_;
   }
@@ -193,7 +193,7 @@ class IterableDatasetWrapper {
   size_t exhaustive_num_;
 
   std::vector<std::unique_ptr<framework::Scope>> scopes_;
-  std::vector<std::vector<phi::DenseTensor *>> tensors_;
+  std::vector<std::vector<DenseTensor *>> tensors_;
   bool is_started_{false};
 };
 
diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc
index 3c2b7f2c2bb6e6..4b18f555684303 100644
--- a/paddle/fluid/pybind/distributed_py.cc
+++ b/paddle/fluid/pybind/distributed_py.cc
@@ -155,7 +155,7 @@ void BindDistributed(py::module *m) {
                  auto *pg_with_stream =
                      dynamic_cast<distributed::ProcessGroupWithStream *>(&self);
                  auto *dense_tensor =
-                     dynamic_cast<phi::DenseTensor *>(tensor.impl().get());
+                     dynamic_cast<DenseTensor *>(tensor.impl().get());
                  if (pg_with_stream && dense_tensor) {
                    pg_with_stream->EraseStream(*dense_tensor);
                  }
@@ -170,7 +170,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
                 auto in_dense = *p_dense;
                 distributed::AllreduceOptions opts{op};
@@ -189,7 +189,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
                 auto in_dense = *p_dense;
                 distributed::BroadcastOptions opts{src};
@@ -208,7 +208,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto out_dense = *p_dense;
                 return self.Send(out_dense, dst, sync_op);
               },
@@ -227,7 +227,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto out_dense = *p_dense;
 
                 int64_t numel = p_dense->numel();
@@ -251,7 +251,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *in_dense = p_dense.get();
                 return self.Recv(in_dense, src, sync_op);
               },
@@ -269,7 +269,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
 
                 int64_t numel = p_dense->numel();
@@ -297,12 +297,12 @@ void BindDistributed(py::module *m) {
                 py::gil_scoped_release release;
 
                 Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0);
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_out_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 auto task = self.AllGather(out_dense, in_dense, sync_op);
@@ -325,12 +325,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllGather(out_dense, in_dense, sync_op);
@@ -374,12 +374,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllToAll(out_dense, in_dense, {}, {}, sync_op);
@@ -400,12 +400,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllToAll(
@@ -427,7 +427,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
                 auto in_dense = *p_dense;
                 distributed::ReduceOptions opts{op, dst};
@@ -450,12 +450,12 @@ void BindDistributed(py::module *m) {
                     CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto out_dense = p_out_tensor.get();
 
                 Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0);
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_in_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
@@ -478,12 +478,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 distributed::ReduceScatterOptions opts{op};
@@ -506,11 +506,11 @@ void BindDistributed(py::module *m) {
                     CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
                 Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0);
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_in_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
@@ -533,12 +533,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 distributed::ScatterOptions opts{src};
@@ -562,11 +562,11 @@ void BindDistributed(py::module *m) {
                 py::gil_scoped_release release;
 
                 Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0);
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_out_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 distributed::GatherOptions gather_opts{dst};
@@ -607,7 +607,7 @@ void BindDistributed(py::module *m) {
                 distributed::AllreduceOptions opts{};
                 opts.reduce_op = op;
                 auto dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 return self.AllReduce(dense.get(), *dense, opts, false);
               },
               py::arg("tensor"),
@@ -623,7 +623,7 @@ void BindDistributed(py::module *m) {
                 distributed::BroadcastOptions opts{};
                 opts.source_rank = source_rank;
                 auto dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 return self.Broadcast(dense.get(), *dense, opts, false);
               },
               py::arg("tensor"),
@@ -637,7 +637,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 return self.Send(*dense, dst, false);
               },
               py::arg("tensor"),
@@ -651,7 +651,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 return self.Recv(dense.get(), src, false);
               },
               py::arg("tensor"),
@@ -665,10 +665,10 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0);
                 py::gil_scoped_release release;
-                auto in_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
-                auto out_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto in_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
+                auto out_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 return self.AllGather(out_dense.get(), *in_dense, false);
               },
               py::arg("in"),
@@ -685,12 +685,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 int64_t numel = in_dense.numel();
@@ -712,10 +712,10 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0);
                 py::gil_scoped_release release;
-                auto in_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
-                auto out_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto in_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
+                auto out_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
 
                 int world_size = self.GetSize();
                 return self.AllToAll(
@@ -739,12 +739,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllToAll(
@@ -766,8 +766,8 @@ void BindDistributed(py::module *m) {
                 distributed::ReduceOptions opts{};
                 opts.reduce_op = op;
                 opts.root_rank = dst;
-                auto dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto dense =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 return self.Reduce(dense.get(), *dense, opts, false);
               },
               py::arg("tensor"),
@@ -785,10 +785,10 @@ void BindDistributed(py::module *m) {
                 py::gil_scoped_release release;
                 distributed::ScatterOptions opts{};
                 opts.root_rank = src;
-                auto in_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
-                auto out_dense = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto in_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
+                auto out_dense =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 return self.Scatter(out_dense.get(), *in_dense, opts, false);
               },
               py::arg("in"),
@@ -806,12 +806,12 @@ void BindDistributed(py::module *m) {
                 py::gil_scoped_release release;
 
                 Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0);
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_out_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
                 auto task = self.AllGather(out_dense,
                                            in_dense,
@@ -833,12 +833,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllGather(out_dense,
@@ -860,12 +860,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 int64_t numel = in_dense.numel();
@@ -892,7 +892,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto in_dense = *p_dense;
                 auto *out_dense = p_dense.get();
                 distributed::AllreduceOptions opts{op};
@@ -937,12 +937,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllToAll(out_dense,
@@ -966,12 +966,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 return self.AllToAll(out_dense,
@@ -994,7 +994,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
                 auto in_dense = *p_dense;
                 distributed::BroadcastOptions opts{src};
@@ -1016,7 +1016,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
                 auto in_dense = *p_dense;
                 distributed::ReduceOptions opts{op, dst};
@@ -1041,12 +1041,12 @@ void BindDistributed(py::module *m) {
                     CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto out_dense = p_out_tensor.get();
 
                 Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0);
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_in_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
@@ -1071,12 +1071,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 distributed::ReduceScatterOptions opts{op};
@@ -1101,12 +1101,12 @@ void BindDistributed(py::module *m) {
                     CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
                 Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0);
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
+                auto p_in_tensor = std::dynamic_pointer_cast<DenseTensor>(
                     stack_in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
@@ -1131,12 +1131,12 @@ void BindDistributed(py::module *m) {
                 auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_out_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    out_tensor.impl());
+                auto p_out_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(out_tensor.impl());
                 auto *out_dense = p_out_tensor.get();
 
-                auto p_in_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    in_tensor.impl());
+                auto p_in_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(in_tensor.impl());
                 auto in_dense = *p_in_tensor;
 
                 distributed::ScatterOptions opts{src};
@@ -1158,7 +1158,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto out_dense = *p_dense;
                 return self.Send(out_dense,
                                  dst,
@@ -1178,7 +1178,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto out_dense = *p_dense;
 
                 int64_t numel = p_dense->numel();
@@ -1205,7 +1205,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *in_dense = p_dense.get();
                 return self.Recv(in_dense,
                                  src,
@@ -1225,7 +1225,7 @@ void BindDistributed(py::module *m) {
                 auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
                 py::gil_scoped_release release;
                 auto p_dense =
-                    std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
+                    std::dynamic_pointer_cast<DenseTensor>(tensor.impl());
                 auto *out_dense = p_dense.get();
 
                 int64_t numel = p_dense->numel();
@@ -1297,12 +1297,12 @@ void BindDistributed(py::module *m) {
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.Offload(dst_dense, src_dense);
@@ -1318,13 +1318,13 @@ void BindDistributed(py::module *m) {
                  size_t src_offset,
                  size_t offload_size) {
                 auto dst_tensor = CastPyArg2Tensor(py_dst_tensor.ptr(), 0);
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.OffloadWithOffset(
@@ -1345,12 +1345,12 @@ void BindDistributed(py::module *m) {
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.Reload(dst_dense, src_dense);
@@ -1444,12 +1444,12 @@ void BindDistributed(py::module *m) {
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.Offload(dst_dense, src_dense);
@@ -1465,13 +1465,13 @@ void BindDistributed(py::module *m) {
                  size_t src_offset,
                  size_t offload_size) {
                 auto dst_tensor = CastPyArg2Tensor(py_dst_tensor.ptr(), 0);
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.OffloadWithOffset(
@@ -1492,12 +1492,12 @@ void BindDistributed(py::module *m) {
                 auto src_tensor = CastPyArg2Tensor(py_src_tensor.ptr(), 0);
                 py::gil_scoped_release release;
 
-                auto p_dst_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    dst_tensor.impl());
+                auto p_dst_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(dst_tensor.impl());
                 auto *dst_dense = p_dst_tensor.get();
 
-                auto p_src_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-                    src_tensor.impl());
+                auto p_src_tensor =
+                    std::dynamic_pointer_cast<DenseTensor>(src_tensor.impl());
                 auto src_dense = *p_src_tensor;
 
                 return self.Reload(dst_dense, src_dense);
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 1e5a6ae847a6bd..75ea1ddecc7c18 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -103,16 +103,16 @@ void EmptyTensorInitializer(TensorObject* self,
     VLOG(6) << "in EmptyTensorInitializer, create DenseTensor";
     if (var_type == paddle::framework::proto::VarType::DENSE_TENSOR) {
       // TODO(jiabin): Maybe support LegacyLoD later
-      std::shared_ptr<phi::DenseTensor> dense_tensor = nullptr;
+      std::shared_ptr<DenseTensor> dense_tensor = nullptr;
       if (dims.size() == 1 && dims[0] == 0) {
         std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
-        dense_tensor = std::make_shared<phi::DenseTensor>(
+        dense_tensor = std::make_shared<DenseTensor>(
             allocation_ptr, phi::DenseTensorMeta(dtype, ddims));
       } else {
         // TODO(dev): we need enhance check for ddims.
-        dense_tensor = std::make_shared<phi::DenseTensor>(
-            std::make_shared<phi::Allocation>(),
-            phi::DenseTensorMeta(dtype, ddims));
+        dense_tensor =
+            std::make_shared<DenseTensor>(std::make_shared<phi::Allocation>(),
+                                          phi::DenseTensorMeta(dtype, ddims));
       }
       self->tensor.set_impl(dense_tensor);
     } else if (var_type == paddle::framework::proto::VarType::SELECTED_ROWS) {
@@ -249,15 +249,15 @@ void InitDistTensorWithTensor(TensorObject* self,
   self->tensor.set_name(name);
   VLOG(4) << "Do TensorCopy from DenseTensor to DistTensor.";
   if (place == src.place()) {
-    std::shared_ptr<phi::DenseTensor> tensor =
-        std::static_pointer_cast<phi::DenseTensor>(src.impl());
+    std::shared_ptr<DenseTensor> tensor =
+        std::static_pointer_cast<DenseTensor>(src.impl());
     self->tensor.set_impl(
         std::make_shared<DistTensor>(tensor, process_mesh, placements));
     VLOG(4) << "Same place, do ShareDataWith for DistTensor.";
   } else {
-    std::shared_ptr<phi::DenseTensor> tensor;
+    std::shared_ptr<DenseTensor> tensor;
     if (src.initialized()) {
-      tensor = std::static_pointer_cast<phi::DenseTensor>(
+      tensor = std::static_pointer_cast<DenseTensor>(
           src.copy_to(place, true).impl());
     } else {
       // lazy init branch. The src tensor is on undefined place.
@@ -265,7 +265,7 @@ void InitDistTensorWithTensor(TensorObject* self,
           src.place().GetType() == phi::AllocationType::UNDEFINED,
           common::errors::InvalidArgument("Only undefined place is support for "
                                           "uninitialized input tensor."));
-      tensor = std::static_pointer_cast<phi::DenseTensor>(src.impl());
+      tensor = std::static_pointer_cast<DenseTensor>(src.impl());
     }
     self->tensor.set_impl(
         std::make_shared<DistTensor>(tensor, process_mesh, placements));
@@ -302,15 +302,14 @@ void InitDistTensorWithTensor(TensorObject* self,
   auto global_ddims = common::make_ddim(global_dims);
   VLOG(4) << "Do TensorCopy from DenseTensor to DistTensor.";
   if (place == local_tensor.place()) {
-    std::shared_ptr<phi::DenseTensor> tensor =
-        std::static_pointer_cast<phi::DenseTensor>(local_tensor.impl());
+    std::shared_ptr<DenseTensor> tensor =
+        std::static_pointer_cast<DenseTensor>(local_tensor.impl());
     self->tensor.set_impl(std::make_shared<DistTensor>(
         tensor, global_ddims, process_mesh, placements));
     VLOG(4) << "Same place, do ShareDataWith for DistTensor.";
   } else {
-    std::shared_ptr<phi::DenseTensor> tensor =
-        std::static_pointer_cast<phi::DenseTensor>(
-            local_tensor.copy_to(place, true).impl());
+    std::shared_ptr<DenseTensor> tensor = std::static_pointer_cast<DenseTensor>(
+        local_tensor.copy_to(place, true).impl());
     self->tensor.set_impl(std::make_shared<DistTensor>(
         tensor, global_ddims, process_mesh, placements));
     VLOG(4) << "Different place, do TensorCopy for DistTensor.";
@@ -357,10 +356,10 @@ void InitTensorWithFrameworkTensor(TensorObject* self,
                                    const std::string& name) {
   self->tensor.set_name(name);
   if (place == src.place()) {
-    self->tensor.set_impl(std::make_shared<phi::DenseTensor>(src));
+    self->tensor.set_impl(std::make_shared<DenseTensor>(src));
     VLOG(4) << "Same place, do ShareDataWith";
   } else {
-    auto temp = paddle::Tensor(std::make_shared<phi::DenseTensor>(src));
+    auto temp = paddle::Tensor(std::make_shared<DenseTensor>(src));
     self->tensor.set_impl(temp.copy_to(place, true).impl());
     VLOG(4) << "Different place, do TensorCopy";
   }
@@ -582,7 +581,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
   InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy);
 }
 
-// initialize Tensor by Tensor or phi::DenseTensor (mix args and
+// initialize Tensor by Tensor or DenseTensor (mix args and
 // kwargs) automatically.
 void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
                             std::unordered_map<std::string, PyObject*> kws_map,
@@ -660,7 +659,7 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
     }
   } else {
     // init by framework tensor
-    phi::DenseTensor src_tensor;
+    DenseTensor src_tensor;
     if (kw_order_map["value"] <= args_num) {
       src_tensor = CastPyArg2FrameworkTensor(
           PyTuple_GET_ITEM(args, kw_order_map["value"] - 1),
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 7b281ab1614b64..0ad5665db1216d 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -343,7 +343,7 @@ static PyObject* eager_api_read_next_tensor_list(PyObject* self,
       auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
       autograd_meta->SetPersistable(false);
       autograd_meta->SetStopGradient(true);
-      tensor.set_impl(std::make_shared<phi::DenseTensor>(tensor_base));
+      tensor.set_impl(std::make_shared<DenseTensor>(tensor_base));
       return tensor;
     };
     for (auto& tensor_base : tensor_base_list) {
@@ -530,9 +530,9 @@ static Tensor InitializedEmptyTensor() {
       egr::Controller::Instance().GenerateUniqueName("generated_tensor"));
   auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
   autograd_meta->SetPersistable(false);
-  std::shared_ptr<phi::DenseTensor> dense_tensor = nullptr;
+  std::shared_ptr<DenseTensor> dense_tensor = nullptr;
   std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
-  dense_tensor = std::make_shared<phi::DenseTensor>(
+  dense_tensor = std::make_shared<DenseTensor>(
       allocation_ptr, phi::DenseTensorMeta(phi::DataType::FLOAT32, ddims));
   tensor.set_impl(dense_tensor);
   autograd_meta->SetGradNode(
@@ -956,9 +956,9 @@ static PyObject* eager_api_sparse_coo_tensor(PyObject* self,
         non_zero_elements.is_dense_tensor(),
         common::errors::Fatal("the non-zero elements must be a DenseTensor."));
     auto dense_indices =
-        std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_indices.impl());
+        std::dynamic_pointer_cast<DenseTensor>(non_zero_indices.impl());
     auto dense_elements =
-        std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_elements.impl());
+        std::dynamic_pointer_cast<DenseTensor>(non_zero_elements.impl());
     // TODO(zhangkaihuo): After creating SparseCooTensor, call coalesced() to
     // sort and merge duplicate indices
     std::shared_ptr<phi::SparseCooTensor> coo_tensor =
@@ -1005,11 +1005,11 @@ static PyObject* eager_api_sparse_csr_tensor(PyObject* self,
         common::errors::Fatal("the non-zero elements must be a DenseTensor."));
 
     auto dense_crows =
-        std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_crows.impl());
+        std::dynamic_pointer_cast<DenseTensor>(non_zero_crows.impl());
     auto dense_cols =
-        std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_cols.impl());
+        std::dynamic_pointer_cast<DenseTensor>(non_zero_cols.impl());
     auto dense_elements =
-        std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_elements.impl());
+        std::dynamic_pointer_cast<DenseTensor>(non_zero_elements.impl());
     std::shared_ptr<phi::SparseCsrTensor> csr_tensor =
         std::make_shared<phi::SparseCsrTensor>(*dense_crows,
                                                *dense_cols,
diff --git a/paddle/fluid/pybind/eager_generator.cc b/paddle/fluid/pybind/eager_generator.cc
index e6b8e0ccb86bba..01b5c3f083fc9b 100644
--- a/paddle/fluid/pybind/eager_generator.cc
+++ b/paddle/fluid/pybind/eager_generator.cc
@@ -830,7 +830,7 @@ static bool CollectGradInformationFromOpInfo(
       ins[in_name].emplace_back(std::make_shared<paddle::imperative::VarBase>(
           "auto_" + in_name + "_" + std::to_string(i)));
       ins[in_name][i]->SetOverriddenStopGradient(false);
-      ins[in_name][i]->MutableVar()->GetMutable<phi::DenseTensor>();
+      ins[in_name][i]->MutableVar()->GetMutable<DenseTensor>();
     }
   } else {
     for (const proto::OpProto::Var& input : op_proto.inputs()) {
@@ -854,7 +854,7 @@ static bool CollectGradInformationFromOpInfo(
       ins[in_name] = {
           std::make_shared<paddle::imperative::VarBase>("auto_" + in_name)};
       ins[in_name][0]->SetOverriddenStopGradient(false);
-      ins[in_name][0]->MutableVar()->GetMutable<phi::DenseTensor>();
+      ins[in_name][0]->MutableVar()->GetMutable<DenseTensor>();
     }
   }
   VLOG(6) << "Prepared Forward Ins Map, size = " << ins.size();
@@ -872,7 +872,7 @@ static bool CollectGradInformationFromOpInfo(
     outs[out_name] = {
         std::make_shared<paddle::imperative::VarBase>("auto_" + out_name)};
     outs[out_name][0]->SetOverriddenStopGradient(false);
-    outs[out_name][0]->MutableVar()->GetMutable<phi::DenseTensor>();
+    outs[out_name][0]->MutableVar()->GetMutable<DenseTensor>();
   }
   VLOG(6) << "Prepared Forward Outs Map, size = " << outs.size();
 
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 981fb09fc0c198..ae40fac0152a3d 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -106,8 +106,7 @@ Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj) {
 
 namespace {
 #ifdef PADDLE_WITH_DISTRIBUTE
-phi::DenseTensor ReshardXToReplicated(
-    phi::distributed::DistTensor* dist_tensor) {
+DenseTensor ReshardXToReplicated(phi::distributed::DistTensor* dist_tensor) {
   if (!dist_tensor->dist_attr().is_replicated()) {
     phi::distributed::TensorDistAttr dist_attr(dist_tensor->dist_attr());
     std::vector<int64_t> dims_mapping(dist_tensor->dims().size(), -1);
@@ -249,7 +248,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
     return array;
   }
 
-  phi::DenseTensor cpu_tensor;
+  DenseTensor cpu_tensor;
   CPUPlace cpu_place;
 
   if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned() ||
@@ -303,7 +302,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
       auto dense_tensor =
-          std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+          std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
       cpu_tensor.set_meta(dense_tensor->meta());
       auto tmp_allocation_ptr =
           memory::Alloc(cpu_place, dense_tensor->Holder()->size());
@@ -368,7 +367,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
       auto dense_tensor =
-          std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+          std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
       cpu_tensor.set_meta(dense_tensor->meta());
       auto tmp_allocation_ptr =
           memory::Alloc(cpu_place, dense_tensor->Holder()->size());
@@ -427,7 +426,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
       auto dense_tensor =
-          std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+          std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
       cpu_tensor.set_meta(dense_tensor->meta());
       auto tmp_allocation_ptr =
           memory::Alloc(cpu_place, dense_tensor->Holder()->size());
@@ -462,14 +461,14 @@ static PyObject* tensor_method_numpy(TensorObject* self,
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
       auto dense_tensor =
-          std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+          std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
       // TODO(qili93): temporary for ascend npu performance to be removed along
       // with npu_identity op
-      paddle::Tensor temp_tensor(std::make_shared<phi::DenseTensor>());
+      paddle::Tensor temp_tensor(std::make_shared<DenseTensor>());
       if (dense_tensor->storage_properties_initialized()) {
         temp_tensor = npu_identity_ad_func(self->tensor, -1);
         dense_tensor =
-            std::dynamic_pointer_cast<phi::DenseTensor>(temp_tensor.impl());
+            std::dynamic_pointer_cast<DenseTensor>(temp_tensor.impl());
       }
       cpu_tensor.set_meta(dense_tensor->meta());
       auto tmp_allocation_ptr =
@@ -491,8 +490,8 @@ static PyObject* tensor_method_numpy(TensorObject* self,
   void* array_buffer = cpu_tensor.Holder()->ptr();
   size_t array_offset = cpu_tensor.offset();
 
-  PyObject* base = ToPyObject(paddle::Tensor(
-      std::make_shared<phi::DenseTensor>(std::move(cpu_tensor))));
+  PyObject* base = ToPyObject(
+      paddle::Tensor(std::make_shared<DenseTensor>(std::move(cpu_tensor))));
   uintptr_t ptr = reinterpret_cast<uintptr_t>(array_buffer) + array_offset;
   PyObject* array = api.PyArray_NewFromDescr_(
       api.PyArray_Type_,
@@ -598,7 +597,7 @@ static PyObject* tensor_method__is_dense_tensor_hold_allocation(
   }
   if (self->tensor.is_dense_tensor()) {
     auto dense_tensor_ptr =
-        std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+        std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
     return ToPyObject(
         dense_tensor_ptr->IsInitialized() &&
         ((dense_tensor_ptr->numel() > 0 && dense_tensor_ptr->Holder()->ptr()) ||
@@ -1072,7 +1071,7 @@ static PyObject* tensor__to_dist(TensorObject* self,
 
   if (self->tensor.is_dense_tensor()) {
     const auto& dense_tensor_ptr =
-        std::static_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+        std::static_pointer_cast<DenseTensor>(self->tensor.impl());
     auto dist_tensor_ptr = std::make_shared<phi::distributed::DistTensor>(
         dense_tensor_ptr, mesh, placements);
     self->tensor.set_impl(dist_tensor_ptr);
@@ -1126,7 +1125,7 @@ static PyObject* tensor__share_buffer_to(TensorObject* self,
           self->tensor.name()));
     }
     if (!dst_ptr->defined()) {
-      dst_ptr->set_impl(std::make_shared<phi::DenseTensor>());
+      dst_ptr->set_impl(std::make_shared<DenseTensor>());
     }
     auto dst_tensor = static_cast<phi::DenseTensor*>(dst_ptr->impl().get());
     dst_tensor->ShareBufferWith(*src_tensor);
@@ -1170,7 +1169,7 @@ static PyObject* tensor__unsafe_share_buffer_to(TensorObject* self,
     auto* src_tensor =
         static_cast<phi::DenseTensor*>(self->tensor.impl().get());
     if (!dst_ptr->defined()) {
-      dst_ptr->set_impl(std::make_shared<phi::DenseTensor>());
+      dst_ptr->set_impl(std::make_shared<DenseTensor>());
     }
     auto dst_tensor = static_cast<phi::DenseTensor*>(dst_ptr->impl().get());
     dst_tensor->ShareBufferWith(*src_tensor);
@@ -1395,7 +1394,7 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
   EAGER_TRY
   if (!self->tensor.defined()) {
     // The original `get_tensor` method of Variable will create a empty tensor
-    phi::DenseTensor empty_tensor;
+    DenseTensor empty_tensor;
     return ToPyObject(&empty_tensor);
   }
   if (self->tensor.is_dense_tensor()) {
@@ -1559,7 +1558,7 @@ static PyObject* tensor_method__get_tensor_from_selected_rows(
   VLOG(4) << "dense_tensor: " << dense_tensor->has_allocation();
 
   auto t = paddle::Tensor(egr::Controller::Instance().GenerateUniqueName());
-  t.set_impl(std::make_shared<phi::DenseTensor>(*dense_tensor));
+  t.set_impl(std::make_shared<DenseTensor>(*dense_tensor));
 
   return ToPyObject(t);
 
@@ -1673,7 +1672,7 @@ static PyObject* tensor__getitem_from_offset(TensorObject* self,
                                              PyObject* kwargs) {
   EAGER_TRY
   phi::DenseTensor* ptr = nullptr;
-  phi::DenseTensor tensor_after_reshard;
+  DenseTensor tensor_after_reshard;
   if (self->tensor.is_selected_rows()) {
     auto* selected_rows =
         static_cast<phi::SelectedRows*>(self->tensor.impl().get());
@@ -2026,17 +2025,16 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
 
         const auto& values_tmp =
             (require_any_grad && transback_sub_tensor.is_dense_tensor() &&
-             !std::dynamic_pointer_cast<phi::DenseTensor>(
+             !std::dynamic_pointer_cast<DenseTensor>(
                   transback_sub_tensor.impl())
                   ->meta()
                   .is_contiguous())
-                ? paddle::Tensor(
-                      std::make_shared<phi::DenseTensor>(
-                          paddle::experimental::Trans2Contiguous(
-                              *(std::dynamic_pointer_cast<phi::DenseTensor>(
-                                  transback_sub_tensor.impl())))),
-                      transback_sub_tensor.mutable_autograd_meta(),
-                      transback_sub_tensor.name())
+                ? paddle::Tensor(std::make_shared<DenseTensor>(
+                                     paddle::experimental::Trans2Contiguous(*(
+                                         std::dynamic_pointer_cast<DenseTensor>(
+                                             transback_sub_tensor.impl())))),
+                                 transback_sub_tensor.mutable_autograd_meta(),
+                                 transback_sub_tensor.name())
                 : transback_sub_tensor;
         if (!x_autograd_meta) {
           VLOG(3) << "x_autograd_meta is null and requires_any_grad is true";
@@ -2282,7 +2280,7 @@ static PyObject* tensor__set_grad_type(TensorObject* self,
   auto grad_tensor =
       egr::EagerUtils::autograd_meta(&self->tensor)->MutableGrad();
   if (var_type == framework::proto::VarType::DENSE_TENSOR) {
-    grad_tensor->set_impl(std::make_shared<phi::DenseTensor>());
+    grad_tensor->set_impl(std::make_shared<DenseTensor>());
   } else if (var_type == framework::proto::VarType::SELECTED_ROWS) {
     grad_tensor->set_impl(std::make_shared<phi::SelectedRows>());
   }
@@ -2317,9 +2315,9 @@ static PyObject* tensor__clear_to_zero_allocation(TensorObject* self,
   auto* dense_tensor =
       dynamic_cast<phi::DenseTensor*>(self->tensor.impl().get());
   if (dense_tensor != nullptr && dense_tensor->Holder() != nullptr) {
-    phi::DenseTensor tmp(std::make_shared<phi::Allocation>(
-                             nullptr, 0, dense_tensor->Holder()->place()),
-                         dense_tensor->meta());
+    DenseTensor tmp(std::make_shared<phi::Allocation>(
+                        nullptr, 0, dense_tensor->Holder()->place()),
+                    dense_tensor->meta());
     dense_tensor->ShareBufferWith(std::move(tmp), /*only_buffer=*/true);
   }
   RETURN_PY_NONE
@@ -2404,7 +2402,7 @@ static PyObject* tensor__use_gpudnn(TensorObject* self,
   // Share all other members of Tensor except use_gpudnn
   phi::DenseTensorMeta target_dense_meta = *dense_tensor_meta;
   target_dense_meta.use_gpudnn = use_gpudnn;
-  phi::DenseTensor target_dense_tensor;
+  DenseTensor target_dense_tensor;
   target_dense_tensor.ShareDataWith(*dense_tensor);
   target_dense_tensor.set_meta(target_dense_meta);
   // Construct returned tensor
@@ -2418,8 +2416,7 @@ static PyObject* tensor__use_gpudnn(TensorObject* self,
     *(target_dist_tensor->unsafe_mutable_value()) = target_dense_tensor;
     target_tensor.set_impl(target_dist_tensor);
   } else {
-    target_tensor.set_impl(
-        std::make_shared<phi::DenseTensor>(target_dense_tensor));
+    target_tensor.set_impl(std::make_shared<DenseTensor>(target_dense_tensor));
   }
 
   VLOG(4) << "Tensor: " << target_tensor.name()
@@ -2556,8 +2553,8 @@ static PyObject* tensor_method_get_non_zero_indices(TensorObject* self,
                      "this method is only effective for SparseCooTensor"));
   auto sparse_coo_tensor =
       std::dynamic_pointer_cast<phi::SparseCooTensor>(self->tensor.impl());
-  paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(
-      sparse_coo_tensor->non_zero_indices()));
+  paddle::Tensor tensor(
+      std::make_shared<DenseTensor>(sparse_coo_tensor->non_zero_indices()));
   return ToPyObject(tensor);
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -2601,14 +2598,14 @@ static PyObject* tensor_method_get_non_zero_elements(TensorObject* self,
   if (self->tensor.is_sparse_coo_tensor()) {
     auto sparse_coo_tensor =
         std::dynamic_pointer_cast<phi::SparseCooTensor>(self->tensor.impl());
-    paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(
-        sparse_coo_tensor->non_zero_elements()));
+    paddle::Tensor tensor(
+        std::make_shared<DenseTensor>(sparse_coo_tensor->non_zero_elements()));
     return ToPyObject(tensor);
   } else {
     auto sparse_csr_tensor =
         std::dynamic_pointer_cast<phi::SparseCsrTensor>(self->tensor.impl());
-    paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(
-        sparse_csr_tensor->non_zero_elements()));
+    paddle::Tensor tensor(
+        std::make_shared<DenseTensor>(sparse_csr_tensor->non_zero_elements()));
     return ToPyObject(tensor);
   }
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -2653,7 +2650,7 @@ static PyObject* tensor_method_get_non_zero_crows(TensorObject* self,
   auto sparse_csr_tensor =
       std::dynamic_pointer_cast<phi::SparseCsrTensor>(self->tensor.impl());
   paddle::Tensor tensor(
-      std::make_shared<phi::DenseTensor>(sparse_csr_tensor->non_zero_crows()));
+      std::make_shared<DenseTensor>(sparse_csr_tensor->non_zero_crows()));
   return ToPyObject(tensor);
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -2697,7 +2694,7 @@ static PyObject* tensor_method_get_non_zero_cols(TensorObject* self,
   auto sparse_csr_tensor =
       std::dynamic_pointer_cast<phi::SparseCsrTensor>(self->tensor.impl());
   paddle::Tensor tensor(
-      std::make_shared<phi::DenseTensor>(sparse_csr_tensor->non_zero_cols()));
+      std::make_shared<DenseTensor>(sparse_csr_tensor->non_zero_cols()));
   return ToPyObject(tensor);
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -3152,8 +3149,7 @@ static PyObject* tensor_method__share_memory(TensorObject* self,
                     common::errors::InvalidArgument(
                         "Sharing memory only support CPU Tensor currently"));
   // 1. get DenseTensor
-  auto* t =
-      std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl()).get();
+  auto* t = std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl()).get();
   // 2. allocate shared memory
   void* data_ptr = t->data();
   size_t data_size =
@@ -3254,8 +3250,7 @@ static PyObject* tensor__local_value(TensorObject* self,
 #ifdef PADDLE_WITH_DISTRIBUTE
     phi::distributed::DistTensor* dist_tensor =
         static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
-    paddle::Tensor result(
-        std::make_shared<phi::DenseTensor>(dist_tensor->value()));
+    paddle::Tensor result(std::make_shared<DenseTensor>(dist_tensor->value()));
     return ToPyObject(result);
 #else
     PADDLE_THROW(common::errors::Unavailable(
@@ -3321,7 +3316,7 @@ static PyObject* tensor_data_ptr(TensorObject* self,
   if (self->tensor.defined() && self->tensor.has_allocation() &&
       self->tensor.is_dense_tensor()) {
     return ToPyObject(
-        (int64_t)std::dynamic_pointer_cast<phi::DenseTensor>(  // NOLINT
+        (int64_t)std::dynamic_pointer_cast<DenseTensor>(  // NOLINT
             self->tensor.impl())
             ->data());
   } else if (self->tensor.defined() && self->tensor.has_allocation() &&
@@ -3558,7 +3553,7 @@ static PyObject* tensor_is_contiguous(TensorObject* self,
   EAGER_TRY
   if (self->tensor.is_dense_tensor()) {
     auto dense_tensor =
-        std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+        std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
     return ToPyObject(dense_tensor->meta().is_contiguous());
   } else if (self->tensor.is_dist_tensor()) {
     auto dense_tensor = std::dynamic_pointer_cast<phi::distributed::DistTensor>(
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index b9b080eebbf7ab..9e3d6a1be72550 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -731,7 +731,7 @@ PyObject* tensor_properties_get_offset(TensorObject* self, void* closure) {
   size_t offset = 0;
   if (self->tensor.is_dense_tensor()) {
     auto dense_tensor =
-        std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+        std::dynamic_pointer_cast<DenseTensor>(self->tensor.impl());
     if (dense_tensor == nullptr) {
       RETURN_PY_NONE;
     }
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 1e7f03739c7df9..9f831eb8e3b6be 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -840,9 +840,9 @@ std::vector<phi::distributed::ProcessMesh> CastPyArg2VectorOfProcessMesh(
 #endif
 }
 
-phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) {
+DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) {
   if (PyObject_TypeCheck(obj, g_framework_tensor_pytype)) {
-    return ::pybind11::handle(obj).cast<phi::DenseTensor>();
+    return ::pybind11::handle(obj).cast<DenseTensor>();
   } else {
     PADDLE_THROW(common::errors::InvalidType(
         "argument (position %d) must be "
@@ -852,16 +852,16 @@ phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) {
   }
 }
 
-std::vector<phi::DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
-                                                           ssize_t arg_pos) {
-  std::vector<phi::DenseTensor> result;
+std::vector<DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
+                                                      ssize_t arg_pos) {
+  std::vector<DenseTensor> result;
   if (PyList_Check(obj)) {
     Py_ssize_t len = PyList_Size(obj);
     PyObject* item = nullptr;
     for (Py_ssize_t i = 0; i < len; i++) {
       item = PyList_GetItem(obj, i);
       if (PyObject_TypeCheck(item, g_framework_tensor_pytype)) {
-        result.emplace_back(::pybind11::handle(item).cast<phi::DenseTensor>());
+        result.emplace_back(::pybind11::handle(item).cast<DenseTensor>());
       } else {
         PADDLE_THROW(common::errors::InvalidType(
             "argument (position %d) must be "
@@ -877,7 +877,7 @@ std::vector<phi::DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
     for (Py_ssize_t i = 0; i < len; i++) {
       item = PyTuple_GetItem(obj, i);
       if (PyObject_TypeCheck(item, g_framework_tensor_pytype)) {
-        result.emplace_back(::pybind11::handle(item).cast<phi::DenseTensor>());
+        result.emplace_back(::pybind11::handle(item).cast<DenseTensor>());
       } else {
         PADDLE_THROW(common::errors::InvalidType(
             "argument (position %d) must be "
@@ -896,7 +896,7 @@ std::vector<phi::DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
   } else if (obj == Py_None) {
     return {};
   } else if (PyObject_TypeCheck(obj, g_framework_tensor_pytype)) {
-    return {::pybind11::handle(obj).cast<phi::DenseTensor>()};
+    return {::pybind11::handle(obj).cast<DenseTensor>()};
   } else {
     PADDLE_THROW(common::errors::InvalidType(
         "argument (position %d) must be "
@@ -2148,15 +2148,15 @@ paddle::Tensor CreateTensorFromVarDesc(
 
   if (var_type == paddle::framework::proto::VarType::DENSE_TENSOR) {
     // TODO(jiabin): Maybe support LegacyLoD later
-    std::shared_ptr<phi::DenseTensor> dense_tensor = nullptr;
+    std::shared_ptr<DenseTensor> dense_tensor = nullptr;
     if (dims.size() == 1 && dims[0] == 0) {
       std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
-      dense_tensor = std::make_shared<phi::DenseTensor>(
+      dense_tensor = std::make_shared<DenseTensor>(
           allocation_ptr,
           phi::DenseTensorMeta(phi::TransToPhiDataType(dtype), ddims));
     } else {
       // TODO(dev): we need enhance check for ddims.
-      dense_tensor = std::make_shared<phi::DenseTensor>(
+      dense_tensor = std::make_shared<DenseTensor>(
           std::make_shared<phi::Allocation>(),
           phi::DenseTensorMeta(phi::TransToPhiDataType(dtype), ddims));
     }
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index f24d57844fbab3..2729595b36e72e 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -82,9 +82,9 @@ std::vector<paddle::Tensor> CastPyArg2VectorOfTensor(
     ssize_t arg_pos,
     const phi::distributed::ProcessMesh* mesh = nullptr);
 phi::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos);
-phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos);
-std::vector<phi::DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
-                                                           ssize_t arg_pos);
+DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos);
+std::vector<DenseTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
+                                                      ssize_t arg_pos);
 std::vector<int> CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos);
 std::vector<int64_t> CastPyArg2VectorOfInt64(PyObject* obj, size_t arg_pos);
 std::vector<size_t> CastPyArg2VectorOfSize_t(PyObject* obj, size_t arg_pos);
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index f5355457a1d7d2..8ac66972d620bd 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -174,7 +174,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
                                  bool zero_copy = false,
                                  int stop_gradient = -1) {
   InitVarBaseOnly(self, name, persistable, stop_gradient);
-  auto *tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
+  auto *tensor = self->MutableVar()->GetMutable<DenseTensor>();
   VLOG(4) << "zero_copy: " << zero_copy;
   if (phi::is_cpu_place(place)) {
     SetTensorFromPyArray<CPUPlace>(tensor, array, place, zero_copy);
@@ -246,7 +246,7 @@ static void InitVarBaseFromNumpyWithArg(imperative::VarBase *self,
           << " / stop_gradient: " << stop_gradient << " / at " << place;
   new (self) imperative::VarBase(name);
   self->SetPersistable(persistable);
-  auto *tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
+  auto *tensor = self->MutableVar()->GetMutable<DenseTensor>();
   if (stop_gradient != -1) {
     self->SetOverriddenStopGradient(stop_gradient);
   }
@@ -263,7 +263,7 @@ static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self,
 }
 
 static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self,
-                                                const phi::DenseTensor &tensor,
+                                                const DenseTensor &tensor,
                                                 const std::string &name) {
   VLOG(4) << "Init VarBase";
   auto place = imperative::GetCurrentTracer()->ExpectedPlace();
@@ -275,7 +275,7 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self,
   self->SetPersistable(false);
   self->SetType(framework::proto::VarType::DENSE_TENSOR);
   self->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
-  auto *new_tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
+  auto *new_tensor = self->MutableVar()->GetMutable<DenseTensor>();
   // Same place, share data directly
   if (place == tensor.place()) {
     new_tensor->ShareDataWith(tensor);
@@ -288,7 +288,7 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self,
 
 template <typename P>
 static void InitVarBaseFromTensorWithArg(imperative::VarBase *self,
-                                         const phi::DenseTensor &tensor,
+                                         const DenseTensor &tensor,
                                          const P &place,
                                          const std::string &name) {
   VLOG(4) << "Init VarBase";
@@ -300,7 +300,7 @@ static void InitVarBaseFromTensorWithArg(imperative::VarBase *self,
   self->SetPersistable(false);
   self->SetType(framework::proto::VarType::DENSE_TENSOR);
   self->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
-  auto *new_tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
+  auto *new_tensor = self->MutableVar()->GetMutable<DenseTensor>();
   // Same place, share data directly
   if (phi::is_same_place(place, tensor.place())) {
     new_tensor->ShareDataWith(tensor);
@@ -327,7 +327,7 @@ Py_ssize_t GetSliceIndexFromPyObject(PyObject *obj) {
     return GetSliceIndexFromTensor(
         py::cast<std::shared_ptr<imperative::VarBase>>(obj)
             ->Var()
-            .Get<phi::DenseTensor>());
+            .Get<DenseTensor>());
   } else {
     PADDLE_THROW(common::errors::InvalidArgument(
         "We should only get paddle::Tensor or VarBase in this "
@@ -430,9 +430,9 @@ static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src,  // NOLINT
     dst.SetType(src->Type());
     dst.SetOverriddenStopGradient(src->OverriddenStopGradient());
     if (!src->SharedVar()->IsEmpty()) {
-      if (src->Var().IsType<phi::DenseTensor>()) {
-        auto &src_tensor = src->Var().Get<phi::DenseTensor>();
-        auto *dst_tensor = dst.MutableVar()->GetMutable<phi::DenseTensor>();
+      if (src->Var().IsType<DenseTensor>()) {
+        auto &src_tensor = src->Var().Get<DenseTensor>();
+        auto *dst_tensor = dst.MutableVar()->GetMutable<DenseTensor>();
         framework::TensorCopy(src_tensor, dst_device, dst_tensor);
         if (blocking) {
           phi::DeviceContextPool::Instance().Get(dst_device)->Wait();
@@ -527,7 +527,7 @@ void BindImperative(py::module *m_ptr) {
                   "function passed to 'set_(sample/sample_list/batch)"
                   "_generator' to locate the data causes this issue."));
           // 2. construct DenseTensor
-          phi::DenseTensor t;
+          DenseTensor t;
           SetTensorFromPyArray<CPUPlace>(&t, array, CPUPlace(), true);
           // 3. allocate shared memory
           void *data_ptr = t.data();
@@ -566,7 +566,7 @@ void BindImperative(py::module *m_ptr) {
                 "function passed to 'set_(sample/sample_list/batch)"
                 "_generator' to locate the data causes this issue."));
         // 2. construct DenseTensor
-        phi::DenseTensor t;
+        DenseTensor t;
         SetTensorFromPyArray<CPUPlace>(&t, array, CPUPlace(), true);
         // 3. allocate shared memory
         void *data_ptr = t.data();
@@ -590,7 +590,7 @@ void BindImperative(py::module *m_ptr) {
 
   m.def("_remove_tensor_list_mmap_fds", [](py::list &tensor_list) {
     for (auto &&tensor : tensor_list) {
-      auto t = tensor.cast<phi::DenseTensor>();
+      auto t = tensor.cast<DenseTensor>();
       auto *mmap_writer_allocation =
           dynamic_cast<memory::allocation::MemoryMapWriterAllocation *>(
               t.Holder().get());
@@ -1084,10 +1084,10 @@ void BindImperative(py::module *m_ptr) {
 
         // TODO(daisiming): In future, add index as arguments following
         // async_read.
-        auto &src_tensor = src.Var().Get<phi::DenseTensor>();
-        auto *dst_tensor = dst.MutableVar()->GetMutable<phi::DenseTensor>();
-        auto &offset_tensor = offset.Var().Get<phi::DenseTensor>();
-        auto &count_tensor = count.Var().Get<phi::DenseTensor>();
+        auto &src_tensor = src.Var().Get<DenseTensor>();
+        auto *dst_tensor = dst.MutableVar()->GetMutable<DenseTensor>();
+        auto &offset_tensor = offset.Var().Get<DenseTensor>();
+        auto &count_tensor = count.Var().Get<DenseTensor>();
         const auto &deviceId = paddle::platform::GetCurrentDeviceId();
 
         PADDLE_ENFORCE_EQ(offset_tensor.dims().size(),
@@ -1241,13 +1241,12 @@ void BindImperative(py::module *m_ptr) {
                 "Required `count` device should be CPUPlace, but received %d.",
                 count.Place()));
 
-        auto &src_tensor = src.Var().Get<phi::DenseTensor>();
-        auto *dst_tensor = dst.MutableVar()->GetMutable<phi::DenseTensor>();
-        auto &index_tensor = index.Var().Get<phi::DenseTensor>();
-        auto *buffer_tensor =
-            buffer.MutableVar()->GetMutable<phi::DenseTensor>();
-        auto &offset_tensor = offset.Var().Get<phi::DenseTensor>();
-        auto &count_tensor = count.Var().Get<phi::DenseTensor>();
+        auto &src_tensor = src.Var().Get<DenseTensor>();
+        auto *dst_tensor = dst.MutableVar()->GetMutable<DenseTensor>();
+        auto &index_tensor = index.Var().Get<DenseTensor>();
+        auto *buffer_tensor = buffer.MutableVar()->GetMutable<DenseTensor>();
+        auto &offset_tensor = offset.Var().Get<DenseTensor>();
+        auto &count_tensor = count.Var().Get<DenseTensor>();
         auto *dst_data = dst_tensor->mutable_data<float>(dst.Place());
         const auto &deviceId = paddle::platform::GetCurrentDeviceId();
 
@@ -1342,9 +1341,9 @@ void BindImperative(py::module *m_ptr) {
         }
 
         // Select the index data to the buffer
-        auto index_select = [](const phi::DenseTensor &src_tensor,
-                               const phi::DenseTensor &index_tensor,
-                               phi::DenseTensor *buffer_tensor) {
+        auto index_select = [](const DenseTensor &src_tensor,
+                               const DenseTensor &index_tensor,
+                               DenseTensor *buffer_tensor) {
           auto *src_data = src_tensor.data<float>();
           auto *index_data = index_tensor.data<int64_t>();
           auto *buffer_data =
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
index bddaca2f1d406a..784a2a97f91520 100644
--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -306,7 +306,7 @@ void PaddleInferShareExternalDataByPtrName(
 }
 
 void PaddleInferShareExternalData(paddle_infer::Tensor &tensor,  // NOLINT
-                                  phi::DenseTensor input_tensor) {
+                                  DenseTensor input_tensor) {
   std::vector<int> shape;
   for (int i = 0; i < input_tensor.dims().size(); ++i) {
     shape.push_back(input_tensor.dims()[i]);  // NOLINT
diff --git a/paddle/fluid/pybind/io.cc b/paddle/fluid/pybind/io.cc
index e6440993790d6f..baa9600bbd030c 100644
--- a/paddle/fluid/pybind/io.cc
+++ b/paddle/fluid/pybind/io.cc
@@ -28,7 +28,7 @@ namespace paddle::pybind {
 template <typename PlaceType>
 void LoadCombine(const std::string &file_path,
                  const std::vector<std::string> &names,
-                 std::vector<phi::DenseTensor *> *out,
+                 std::vector<DenseTensor *> *out,
                  bool load_as_fp16,
                  const PlaceType place) {
   pir::LoadCombineFunction(file_path, names, out, load_as_fp16, place);
@@ -39,13 +39,13 @@ void Load(const std::string &file_path,
           int64_t seek,
           const std::vector<int64_t> &shape,
           bool load_as_fp16,
-          phi::DenseTensor *out,
+          DenseTensor *out,
           const PlaceType place) {
   pir::LoadFunction(file_path, seek, shape, load_as_fp16, out, place);
 }
 void BindIO(pybind11::module *m) {
   m->def("save_dense_tensor",
-         [](const phi::DenseTensor &tensor, const std::string &str_file_name) {
+         [](const DenseTensor &tensor, const std::string &str_file_name) {
            std::ofstream fout(str_file_name, std::ios::binary);
            PADDLE_ENFORCE_EQ(
                static_cast<bool>(fout),
@@ -60,7 +60,7 @@ void BindIO(pybind11::module *m) {
          });
 
   m->def("load_dense_tensor",
-         [](phi::DenseTensor &tensor, const std::string &str_file_name) {
+         [](DenseTensor &tensor, const std::string &str_file_name) {
            std::ifstream fin(str_file_name, std::ios::binary);
            PADDLE_ENFORCE_EQ(
                static_cast<bool>(fin),
@@ -107,14 +107,14 @@ void BindIO(pybind11::module *m) {
       });
 
   m->def("save_dense_tensor_to_memory",
-         [](const phi::DenseTensor &tensor) -> py::bytes {
+         [](const DenseTensor &tensor) -> py::bytes {
            std::ostringstream ss;
            phi::SerializeToStream(ss, tensor);
            return ss.str();
          });
 
   m->def("load_dense_tensor_from_memory",
-         [](phi::DenseTensor &tensor, const std::string &tensor_bytes) {
+         [](DenseTensor &tensor, const std::string &tensor_bytes) {
            std::istringstream fin(tensor_bytes,
                                   std::ios::in | std::ios::binary);
            phi::DeserializeFromStream(fin, &tensor);
@@ -136,7 +136,7 @@ void BindIO(pybind11::module *m) {
          });
 
   m->def("load_dense_tensor", [](const std::string path) {
-    phi::DenseTensor tensor_load;
+    DenseTensor tensor_load;
     paddle::framework::LoadTensor(path, &tensor_load);
     return tensor_load;
   });
diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc
index ac96eb1f3c8054..60c30c6b0425c4 100644
--- a/paddle/fluid/pybind/pir.cc
+++ b/paddle/fluid/pybind/pir.cc
@@ -603,54 +603,52 @@ void BindProgram(py::module *m) {
              return name_analysis::GetAllParameterValues(self);
            })
       .def("num_ops", [](Program &self) { return self.num_ops(); })
-      .def(
-          "_state_dict",
-          [](std::shared_ptr<Program> self,
-             const std::string &mode = "all",
-             const framework::Scope &scope = framework::Scope()) {
-            std::unordered_map<std::string, phi::DenseTensor> state_dict_all;
-            std::unordered_map<std::string, phi::DenseTensor> state_dict_param;
-            std::unordered_map<std::string, phi::DenseTensor> state_dict_opt;
-            for (auto op : self->block()->ops()) {
-              for (auto var : op->results()) {
-                auto is_persistable =
-                    var.attribute<BoolAttribute>(kAttrIsPersistable);
-                if (is_persistable && is_persistable.data()) {
-                  if (var.defining_op()->isa<::pir::ParameterOp>()) {
-                    std::string var_name =
-                        name_analysis::GetValueFirstName(var);
-                    auto tensor =
-                        scope.FindVar(var_name)->GetMutable<phi::DenseTensor>();
-                    state_dict_param[var_name] = *tensor;
-                    state_dict_all[var_name] = *tensor;
-                  } else if (var.defining_op()
-                                 ->isa<paddle::dialect::DataOp>()) {
-                    std::string var_name =
-                        name_analysis::GetValueFirstName(var);
-                    auto tensor =
-                        scope.FindVar(var_name)->GetMutable<phi::DenseTensor>();
-                    state_dict_opt[var_name] = *tensor;
-                    state_dict_all[var_name] = *tensor;
-                  }
-                }
-              }
-            }
-            if (mode == "all") {
-              return state_dict_all;
-            } else if (mode == "param") {
-              return state_dict_param;
-            } else if (mode == "opt") {
-              return state_dict_opt;
-            } else {
-              PADDLE_THROW(common::errors::InvalidArgument(
-                  "The mode is not supported."));
-            }
-          })
+      .def("_state_dict",
+           [](std::shared_ptr<Program> self,
+              const std::string &mode = "all",
+              const framework::Scope &scope = framework::Scope()) {
+             std::unordered_map<std::string, DenseTensor> state_dict_all;
+             std::unordered_map<std::string, DenseTensor> state_dict_param;
+             std::unordered_map<std::string, DenseTensor> state_dict_opt;
+             for (auto op : self->block()->ops()) {
+               for (auto var : op->results()) {
+                 auto is_persistable =
+                     var.attribute<BoolAttribute>(kAttrIsPersistable);
+                 if (is_persistable && is_persistable.data()) {
+                   if (var.defining_op()->isa<::pir::ParameterOp>()) {
+                     std::string var_name =
+                         name_analysis::GetValueFirstName(var);
+                     auto tensor =
+                         scope.FindVar(var_name)->GetMutable<DenseTensor>();
+                     state_dict_param[var_name] = *tensor;
+                     state_dict_all[var_name] = *tensor;
+                   } else if (var.defining_op()
+                                  ->isa<paddle::dialect::DataOp>()) {
+                     std::string var_name =
+                         name_analysis::GetValueFirstName(var);
+                     auto tensor =
+                         scope.FindVar(var_name)->GetMutable<DenseTensor>();
+                     state_dict_opt[var_name] = *tensor;
+                     state_dict_all[var_name] = *tensor;
+                   }
+                 }
+               }
+             }
+             if (mode == "all") {
+               return state_dict_all;
+             } else if (mode == "param") {
+               return state_dict_param;
+             } else if (mode == "opt") {
+               return state_dict_opt;
+             } else {
+               PADDLE_THROW(common::errors::InvalidArgument(
+                   "The mode is not supported."));
+             }
+           })
       .def(
           "set_state_dict",
           [](std::shared_ptr<Program> self,
-             const std::unordered_map<std::string, phi::DenseTensor>
-                 &state_dict,
+             const std::unordered_map<std::string, DenseTensor> &state_dict,
              const framework::Scope &scope = framework::Scope(),
              bool copy_tensor = false) {
             for (auto item : state_dict) {
@@ -660,11 +658,11 @@ void BindProgram(py::module *m) {
                     "The variable %s is not found.", item.first));
               } else {
                 if (copy_tensor) {
-                  auto *mutable_tensor = var->GetMutable<phi::DenseTensor>();
+                  auto *mutable_tensor = var->GetMutable<DenseTensor>();
                   paddle::framework::TensorCopy(
                       item.second, item.second.place(), mutable_tensor);
                 } else {
-                  *var->GetMutable<phi::DenseTensor>() = item.second;
+                  *var->GetMutable<DenseTensor>() = item.second;
                 }
               }
             }
@@ -2566,7 +2564,7 @@ static void inline CreateVariableIfNotExist(
                                   "Please set argument [executor] not None "
                                   "or run startup program first"));
       var = scope->Var(para_name);
-      auto *tensor_temp = var->GetMutable<phi::DenseTensor>();
+      auto *tensor_temp = var->GetMutable<DenseTensor>();
       tensor_temp->Resize(
           common::make_ddim(phi::vectorize(GetValueDims(value))));
       phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance();
diff --git a/paddle/fluid/pybind/process_group_utils.h b/paddle/fluid/pybind/process_group_utils.h
index 30fda1bce3fe6f..0c216656d81002 100644
--- a/paddle/fluid/pybind/process_group_utils.h
+++ b/paddle/fluid/pybind/process_group_utils.h
@@ -26,8 +26,8 @@ namespace pybind {
 template <typename DeviceContext, typename T>
 struct ConcatDenseTensor {
   void operator()(const DeviceContext &context,
-                  const std::vector<phi::DenseTensor> &in,
-                  phi::DenseTensor *out,
+                  const std::vector<DenseTensor> &in,
+                  DenseTensor *out,
                   int axis = 0) {
     phi::funcs::ConcatFunctor<DeviceContext, T> concat_functor;
     concat_functor(context, in, axis, out);
@@ -37,10 +37,10 @@ struct ConcatDenseTensor {
 template <typename DeviceContext, typename T>
 struct SplitDenseTensor {
   void operator()(const DeviceContext &context,
-                  const phi::DenseTensor &in,
-                  std::vector<phi::DenseTensor *> *out,
+                  const DenseTensor &in,
+                  std::vector<DenseTensor *> *out,
                   int axis = 0) {
-    std::vector<const phi::DenseTensor *> shape_refer;
+    std::vector<const DenseTensor *> shape_refer;
     shape_refer.reserve(out->size());
     for (auto *p_tensor : *out) {
       shape_refer.emplace_back(p_tensor);
@@ -54,8 +54,8 @@ struct SplitDenseTensor {
 template <typename T>
 struct ConcatDenseTensor<phi::CustomContext, T> {
   void operator()(const phi::CustomContext &context,
-                  const std::vector<phi::DenseTensor> &in,
-                  phi::DenseTensor *out,
+                  const std::vector<DenseTensor> &in,
+                  DenseTensor *out,
                   int axis UNUSED = 0) {
     VLOG(10) << "ConcatDenseTensor: " << in.size();
     auto kernel_result =
@@ -65,13 +65,12 @@ struct ConcatDenseTensor<phi::CustomContext, T> {
                            phi::DataLayout::ALL_LAYOUT,
                            phi::CppTypeToDataType<T>::Type()));
     const auto &kernel = kernel_result.kernel;
-    using kernel_signature =
-        void (*)(const phi::DeviceContext &,
-                 const std::vector<const phi::DenseTensor *> &,
-                 const phi::Scalar &,
-                 phi::DenseTensor *);
+    using kernel_signature = void (*)(const phi::DeviceContext &,
+                                      const std::vector<const DenseTensor *> &,
+                                      const phi::Scalar &,
+                                      DenseTensor *);
     auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
-    std::vector<const phi::DenseTensor *> inputs;
+    std::vector<const DenseTensor *> inputs;
     (*kernel_fn)(context, inputs, phi::Scalar(0), out);
   }
 };
@@ -79,8 +78,8 @@ struct ConcatDenseTensor<phi::CustomContext, T> {
 template <typename T>
 struct SplitDenseTensor<phi::CustomContext, T> {
   void operator()(const phi::CustomContext &context,
-                  const phi::DenseTensor &in,
-                  std::vector<phi::DenseTensor *> *out,
+                  const DenseTensor &in,
+                  std::vector<DenseTensor *> *out,
                   int axis UNUSED = 0) {
     VLOG(10) << "SplitDenseTensor: " << out->size();
     auto kernel_result =
@@ -91,10 +90,10 @@ struct SplitDenseTensor<phi::CustomContext, T> {
                            phi::CppTypeToDataType<T>::Type()));
     const auto &kernel = kernel_result.kernel;
     using kernel_signature = void (*)(const phi::DeviceContext &,
-                                      const phi::DenseTensor &,
+                                      const DenseTensor &,
                                       int,
                                       const phi::Scalar &,
-                                      std::vector<phi::DenseTensor *>);
+                                      std::vector<DenseTensor *>);
     auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
 
     auto in_dims = common::vectorize(in.dims());
@@ -120,8 +119,8 @@ struct SplitDenseTensor<phi::CustomContext, T> {
 
 template <typename DeviceContext>
 void ConcatDenseTensorWithType(const DeviceContext &dev_ctx,
-                               const std::vector<phi::DenseTensor> &t_list,
-                               phi::DenseTensor *p_out,
+                               const std::vector<DenseTensor> &t_list,
+                               DenseTensor *p_out,
                                phi::DataType type) {
   switch (type) {
     case phi::DataType::BOOL:
@@ -162,8 +161,8 @@ void ConcatDenseTensorWithType(const DeviceContext &dev_ctx,
 #ifdef PADDLE_WITH_XPU
 template <>
 void ConcatDenseTensorWithType(const phi::XPUContext &dev_ctx,
-                               const std::vector<phi::DenseTensor> &t_list,
-                               phi::DenseTensor *p_out,
+                               const std::vector<DenseTensor> &t_list,
+                               DenseTensor *p_out,
                                phi::DataType type) {
   switch (type) {
     case phi::DataType::FLOAT16:
@@ -195,8 +194,8 @@ void ConcatDenseTensorWithType(const phi::XPUContext &dev_ctx,
 
 template <typename DeviceContext>
 void SplitDenseTensorWithType(const DeviceContext &dev_ctx,
-                              const phi::DenseTensor &t_in,
-                              std::vector<phi::DenseTensor *> *p_list,
+                              const DenseTensor &t_in,
+                              std::vector<DenseTensor *> *p_list,
                               phi::DataType type) {
   switch (type) {
     case phi::DataType::BOOL:
@@ -245,8 +244,8 @@ void SplitDenseTensorWithType(const DeviceContext &dev_ctx,
 #ifdef PADDLE_WITH_XPU
 template <>
 void SplitDenseTensorWithType(const phi::XPUContext &dev_ctx,
-                              const phi::DenseTensor &t_in,
-                              std::vector<phi::DenseTensor *> *p_list,
+                              const DenseTensor &t_in,
+                              std::vector<DenseTensor *> *p_list,
                               phi::DataType type) {
   switch (type) {
     case phi::DataType::FLOAT16:
@@ -277,10 +276,10 @@ void SplitDenseTensorWithType(const phi::XPUContext &dev_ctx,
 #endif
 
 void ConcatTensor(const phi::DeviceContext &dev_ctx,
-                  const std::vector<phi::DenseTensor> &tensor_list,
+                  const std::vector<DenseTensor> &tensor_list,
                   const Tensor *tensor) {
   auto *dense_tensor =
-      std::dynamic_pointer_cast<phi::DenseTensor>(tensor->impl()).get();
+      std::dynamic_pointer_cast<DenseTensor>(tensor->impl()).get();
 
   const auto &place = dev_ctx.GetPlace();
   if (phi::is_gpu_place(place)) {
@@ -329,12 +328,12 @@ void ConcatTensor(const phi::DeviceContext &dev_ctx,
 }
 
 void SplitTensor(const phi::DeviceContext &dev_ctx,
-                 const phi::DenseTensor &tensor,
+                 const DenseTensor &tensor,
                  const std::vector<Tensor> *tensor_list) {
-  std::vector<phi::DenseTensor *> dense_list;
+  std::vector<DenseTensor *> dense_list;
   for (auto &tensor : *tensor_list) {
     auto *p_tensor =
-        std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()).get();
+        std::dynamic_pointer_cast<DenseTensor>(tensor.impl()).get();
     dense_list.emplace_back(p_tensor);
   }
 
@@ -383,16 +382,16 @@ void SplitTensor(const phi::DeviceContext &dev_ctx,
   }
 }
 
-inline std::vector<int64_t> GetDefaultSplitSizes(const phi::DenseTensor &tensor,
+inline std::vector<int64_t> GetDefaultSplitSizes(const DenseTensor &tensor,
                                                  int world_size) {
   return std::vector<int64_t>(world_size, tensor.dims()[0] / world_size);
 }
 
-inline std::vector<phi::DenseTensor> ToDenseTensors(
+inline std::vector<DenseTensor> ToDenseTensors(
     const std::vector<Tensor> &tensors) {
-  std::vector<phi::DenseTensor> ret;
+  std::vector<DenseTensor> ret;
   for (auto &t : tensors) {
-    ret.emplace_back(*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
+    ret.emplace_back(*std::dynamic_pointer_cast<DenseTensor>(t.impl()));
   }
   return ret;
 }
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 6a7c7e9d77f87d..51b9fd19f1e938 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -692,7 +692,7 @@ static void inline CreateVariableIfNotExist(
         auto var_desc = PyObjectCast<framework::VarDesc>(py_var_desc);
         Py_DECREF(py_var_desc);
         var = const_cast<framework::Scope *>(&scope)->Var(para_name);
-        auto *tensor_temp = var->GetMutable<phi::DenseTensor>();
+        auto *tensor_temp = var->GetMutable<DenseTensor>();
         tensor_temp->Resize(common::make_ddim(var_desc.GetShape()));
         tensor_temp->mutable_data(
             exe->GetPlace(), phi::TransToPhiDataType(var_desc.GetDataType()));
@@ -770,8 +770,8 @@ int DLPackDLTensorFromPyObjectNoSync(void *py_obj, DLTensor *out) {
     // Use handle (non-owning) to avoid unnecessary refcount operations
     py::handle handle(static_cast<PyObject *>(py_obj));
     paddle::Tensor tensor = handle.cast<paddle::Tensor>();
-    std::shared_ptr<phi::DenseTensor> dense_tensor =
-        std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
+    std::shared_ptr<DenseTensor> dense_tensor =
+        std::static_pointer_cast<DenseTensor>(tensor.impl());
     paddle::framework::ToDLPackNonOwningImpl(*dense_tensor, out);
     return 0;
   } catch (const std::exception &e) {
@@ -785,8 +785,8 @@ int DLPackManagedTensorFromPyObjectNoSync(void *py_obj,
   try {
     py::handle handle(static_cast<PyObject *>(py_obj));
     paddle::Tensor tensor = handle.cast<paddle::Tensor>();
-    std::shared_ptr<phi::DenseTensor> dense_tensor =
-        std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
+    std::shared_ptr<DenseTensor> dense_tensor =
+        std::static_pointer_cast<DenseTensor>(tensor.impl());
     *out = paddle::framework::ToDLPackVersioned(*dense_tensor);
     return 0;
   } catch (const std::exception &e) {
@@ -798,8 +798,8 @@ int DLPackManagedTensorFromPyObjectNoSync(void *py_obj,
 int DLPackManagedTensorToPyObjectNoSync(DLManagedTensorVersioned *src,
                                         void **py_obj_out) {
   try {
-    phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src);
-    paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(dense_tensor));
+    DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src);
+    paddle::Tensor tensor(std::make_shared<DenseTensor>(dense_tensor));
     egr::EagerUtils::autograd_meta(&tensor)->SetPersistable(false);
     *py_obj_out = ToPyObject(tensor);
     return 0;
@@ -821,8 +821,8 @@ int DLPackManagedTensorAllocator(::DLTensor *prototype,
     phi::DataType dtype =
         paddle::framework::DLDataTypeToPhiDataType(prototype->dtype);
     paddle::Tensor tensor = paddle::empty(shape, dtype, place);
-    std::shared_ptr<phi::DenseTensor> dense_tensor =
-        std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
+    std::shared_ptr<DenseTensor> dense_tensor =
+        std::static_pointer_cast<DenseTensor>(tensor.impl());
     *out = paddle::framework::ToDLPackVersioned(*dense_tensor);
     return 0;
   } catch (const std::exception &e) {
@@ -2306,8 +2306,8 @@ All parameter, weight, gradient are variables in Paddle.
            [](const Variable &var) -> float { return var.Get<float>(); })
       .def(
           "get_tensor",
-          [](Variable &self) -> phi::DenseTensor * {
-            return self.GetMutable<phi::DenseTensor>();
+          [](Variable &self) -> DenseTensor * {
+            return self.GetMutable<DenseTensor>();
           },
           py::return_value_policy::reference)
       .def("get_bytes",
@@ -3072,7 +3072,7 @@ All parameter, weight, gradient are variables in Paddle.
            [](Executor &self,
               ExecutorPrepareContext *ctx,
               Scope *scope,
-              std::map<std::string, const phi::DenseTensor *> *feed_targets,
+              std::map<std::string, const DenseTensor *> *feed_targets,
               std::map<std::string, FetchType *> *fetch_targets,
               bool create_local_scope = true,
               bool create_vars = true,
@@ -3377,7 +3377,7 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("set_variable",
         static_cast<void (*)(  // NOLINT
             Scope *,
-            const phi::DenseTensor &,
+            const DenseTensor &,
             const std::string &)>(&framework::SetVariable));
   m.def(
       "set_vlog_level",
@@ -3436,7 +3436,7 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("set_feed_variable",
         static_cast<void (*)(  // NOLINT
             Scope *,
-            const phi::DenseTensor &,
+            const DenseTensor &,
             const std::string &,
             size_t)>(&framework::SetFeedVariable));
   m.def("get_fetch_variable",
@@ -3512,7 +3512,7 @@ All parameter, weight, gradient are variables in Paddle.
           py::return_value_policy::reference)
       .def("__len__", [](phi::TensorArray &self) { return self.size(); })
       .def("__setitem__",
-           [](phi::TensorArray &self, size_t i, const phi::DenseTensor &t) {
+           [](phi::TensorArray &self, size_t i, const DenseTensor &t) {
              PADDLE_ENFORCE_LT(i,
                                self.size(),
                                common::errors::InvalidArgument(
@@ -3522,7 +3522,7 @@ All parameter, weight, gradient are variables in Paddle.
            })
       .def(
           "append",
-          [](phi::TensorArray &self, const phi::DenseTensor &t) {
+          [](phi::TensorArray &self, const DenseTensor &t) {
             self.emplace_back();
             self.back().ShareDataWith(t);
             self.back().set_lod(t.lod());
@@ -3590,7 +3590,7 @@ All parameter, weight, gradient are variables in Paddle.
 
       .def(
           "append",
-          [](FetchList &self, const phi::DenseTensor &t) {
+          [](FetchList &self, const DenseTensor &t) {
             self.emplace_back();
             auto &dense_tensor = PADDLE_GET(phi::DenseTensor, self.back());
             dense_tensor.ShareDataWith(t);
diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc
index e2e152a0a19261..7de3076b21ef07 100644
--- a/paddle/fluid/pybind/reader_py.cc
+++ b/paddle/fluid/pybind/reader_py.cc
@@ -45,7 +45,7 @@ namespace py = pybind11;
 namespace reader = operators::reader;
 
 static paddle::optional<std::vector<int64_t>> DiffTensorShape(
-    const phi::DenseTensor &tensor,
+    const DenseTensor &tensor,
     const std::vector<int64_t> &target_shape,
     size_t num_places) {
   auto tensor_shape = tensor.dims();
@@ -103,7 +103,7 @@ static paddle::optional<std::vector<int64_t>> DiffTensorShape(
 // Check whether the tensor shape matches the VarDesc shape
 // Return the different shape if exists
 static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
-    const phi::DenseTensor &tensor,
+    const DenseTensor &tensor,
     const framework::VarDesc &var_desc,
     size_t num_places) {
   auto desc_shape = var_desc.GetShape();
@@ -127,7 +127,7 @@ template <typename QueueType>
 class MultiDeviceFeedReader {
  public:
   using ResultDictList =
-      std::vector<std::unordered_map<std::string, phi::DenseTensor>>;
+      std::vector<std::unordered_map<std::string, DenseTensor>>;
   using ResultList = std::vector<phi::TensorArray>;
 
   static constexpr bool kKeepOrder =
@@ -377,7 +377,7 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) {
             auto &tensor_list = result_list[0];
             std::vector<std::shared_ptr<imperative::VarBase>> var_list;
             var_list.reserve(tensor_list.size());
-            auto func = [](phi::DenseTensor &dense_tensor) {
+            auto func = [](DenseTensor &dense_tensor) {
               std::string act_name =
                   imperative::GetCurrentTracer()->GenerateUniqueName(
                       "generated_var");
@@ -386,8 +386,7 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) {
               new_var->SetType(framework::proto::VarType::DENSE_TENSOR);
               new_var->SetDataType(
                   framework::TransToProtoVarType(dense_tensor.dtype()));
-              auto *tensor =
-                  new_var->MutableVar()->GetMutable<phi::DenseTensor>();
+              auto *tensor = new_var->MutableVar()->GetMutable<DenseTensor>();
               *tensor = std::move(dense_tensor);
               return new_var;
             };
@@ -408,7 +407,7 @@ void BindReader(py::module *module) {
   auto &m = *module;
 
   m.def("diff_tensor_shape",
-        [](const phi::DenseTensor &tensor,
+        [](const DenseTensor &tensor,
            const framework::VarDesc &var_desc,
            size_t num_places) -> py::object {
           auto diff = DiffTensorShapeWithVarDesc(tensor, var_desc, num_places);
@@ -420,7 +419,7 @@ void BindReader(py::module *module) {
         });
 
   m.def("diff_tensor_shape",
-        [](const phi::DenseTensor &tensor,
+        [](const DenseTensor &tensor,
            const std::vector<int64_t> &target_shape,
            size_t num_places) -> py::object {
           auto diff = DiffTensorShape(tensor, target_shape, num_places);
diff --git a/paddle/fluid/pybind/slice_utils.h b/paddle/fluid/pybind/slice_utils.h
index 439b75193d728b..bf5bc3a7d98749 100644
--- a/paddle/fluid/pybind/slice_utils.h
+++ b/paddle/fluid/pybind/slice_utils.h
@@ -229,7 +229,7 @@ template <typename T>
 inline T GetDenseTensorValue(const phi::DenseTensor* x) {
   T value = static_cast<T>(0);
   if (!(x->place().GetType() == phi::AllocationType::CPU)) {
-    phi::DenseTensor cpu_x;
+    DenseTensor cpu_x;
     framework::TensorCopy(*x, CPUPlace(), &cpu_x);
 #if defined(PADDLE_WITH_CUSTOM_DEVICE)
     phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
@@ -461,7 +461,7 @@ static void ParseIndex(const paddle::Tensor& tensor,
 
       if (IsNumpyArray(slice_item)) {
         paddle::Tensor index_tensor_tmp(
-            std::make_shared<phi::DenseTensor>(),
+            std::make_shared<DenseTensor>(),
             egr::Controller::Instance().GenerateUniqueName());
 
         py::object index_obj_tmp =
@@ -921,7 +921,7 @@ static paddle::Tensor dealWithValues(const paddle::Tensor& tensor,
     value_tensor = reinterpret_cast<TensorObject*>(value_obj)->tensor;
   } else if (py::isinstance<py::array>(value_obj)) {
     paddle::Tensor value_tensor_tmp(
-        std::make_shared<phi::DenseTensor>(),
+        std::make_shared<DenseTensor>(),
         egr::Controller::Instance().GenerateUniqueName());
     py::object value_obj_tmp = py::reinterpret_borrow<py::object>(value_obj);
     py::object value = value_obj_tmp;
diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc
index a51609d041738f..7adc5e9f593d1b 100644
--- a/paddle/fluid/pybind/tensor.cc
+++ b/paddle/fluid/pybind/tensor.cc
@@ -199,7 +199,7 @@ namespace {
 #endif
 
 #if defined(__linux__)
-void ShareTensorViaVmm(const phi::DenseTensor &self, py::tuple *out) {
+void ShareTensorViaVmm(const DenseTensor &self, py::tuple *out) {
   auto *holder =
       dynamic_cast<memory::allocation::Allocation *>(self.Holder().get());
   paddle::memory::VmmTensorPartsVisitor parts_visitor(holder->ptr());
@@ -279,7 +279,7 @@ void ShareTensorViaVmm(const phi::DenseTensor &self, py::tuple *out) {
                         device_id);
 }
 
-phi::DenseTensor RebuildTensorFromVmmMeta(const py::tuple &meta) {
+DenseTensor RebuildTensorFromVmmMeta(const py::tuple &meta) {
   PADDLE_ENFORCE_EQ(
       meta.size(),
       5,
@@ -401,7 +401,7 @@ phi::DenseTensor RebuildTensorFromVmmMeta(const py::tuple &meta) {
       header->alloc_size,
       GPUPlace(device_id),
       keep);
-  phi::DenseTensor tensor;
+  DenseTensor tensor;
   tensor.Resize(phi::make_ddim(dims_vec));
   tensor.ResetHolder(std::move(alloc));
   tensor.set_type(static_cast<phi::DataType>(dtype_idx));
@@ -412,8 +412,8 @@ phi::DenseTensor RebuildTensorFromVmmMeta(const py::tuple &meta) {
 }  // namespace
 
 template <typename PlaceType>
-static void TensorCopyFrom(phi::DenseTensor *dst,
-                           const phi::DenseTensor &src,
+static void TensorCopyFrom(DenseTensor *dst,
+                           const DenseTensor &src,
                            const PlaceType &place,
                            int64_t batch_size) {
   if (batch_size < 0) {
@@ -425,7 +425,7 @@ static void TensorCopyFrom(phi::DenseTensor *dst,
 }
 
 std::tuple<phi::DenseTensor, bool> HandleTensorCopy(
-    const phi::DenseTensor &src,
+    const DenseTensor &src,
     const std::optional<std::tuple<int, int>> dl_device,
     std::optional<bool> copy) {
   bool force_copy = copy.has_value() && copy.value();
@@ -451,8 +451,8 @@ std::tuple<phi::DenseTensor, bool> HandleTensorCopy(
 
   if (force_copy || src.place() != dst_place) {
     phi::Place ctx_place = src.place() != CPUPlace() ? src.place() : dst_place;
-    phi::DenseTensor dst(
-        std::make_shared<phi::Allocation>(nullptr, 0, dst_place), src.meta());
+    DenseTensor dst(std::make_shared<phi::Allocation>(nullptr, 0, dst_place),
+                    src.meta());
     const auto *dev_ctx = phi::DeviceContextPool::Instance().Get(ctx_place);
     phi::Copy(*dev_ctx, src, dst_place, false, &dst);
     return std::make_tuple(dst, true);
@@ -463,7 +463,7 @@ std::tuple<phi::DenseTensor, bool> HandleTensorCopy(
 
 template <typename T>
 pybind11::capsule TensorToDLPack(
-    const phi::DenseTensor &tensor,
+    const DenseTensor &tensor,
     const std::optional<std::tuple<int, int>> dl_device = std::nullopt,
     std::optional<bool> copy = std::nullopt) {
   const auto [maybe_copied_tensor, is_copied] =
@@ -488,7 +488,7 @@ pybind11::capsule TensorToDLPack(
 
 void BindTensor(pybind11::module &m) {  // NOLINT
   using namespace paddle::framework;    // NOLINT
-  py::class_<phi::DenseTensor> framework_tensor(
+  py::class_<DenseTensor> framework_tensor(
       m, "DenseTensor", py::buffer_protocol());
   g_framework_tensor_pytype =
       reinterpret_cast<PyTypeObject *>(framework_tensor.ptr());
@@ -498,17 +498,17 @@ void BindTensor(pybind11::module &m) {  // NOLINT
           // TensorToPyArray() according to the dtype and copy
           // parameters.
           "__array__",
-          [](phi::DenseTensor &self, py::object dtype, py::object copy) {
+          [](DenseTensor &self, py::object dtype, py::object copy) {
             return TensorToPyArray(self, copy);
           },
           py::arg("dtype") = py::none(),
           py::arg("copy") = py::none())
       .def("_ptr",
-           [](const phi::DenseTensor &self) {
+           [](const DenseTensor &self) {
              return reinterpret_cast<uintptr_t>(self.data());
            })
       .def("_slice",
-           [](phi::DenseTensor &self, int64_t begin_idx, int64_t end_idx) {
+           [](DenseTensor &self, int64_t begin_idx, int64_t end_idx) {
              if (!self.meta().is_contiguous()) {
                PADDLE_THROW(common::errors::InvalidArgument(
                    "Tensor is not contiguous, cannot call "
@@ -518,93 +518,93 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            })
       .def("_numel", &phi::DenseTensor::numel)
       .def("_is_initialized",
-           [](const phi::DenseTensor &self) { return self.IsInitialized(); })
+           [](const DenseTensor &self) { return self.IsInitialized(); })
       .def("_get_dims",
-           [](const phi::DenseTensor &self) {
+           [](const DenseTensor &self) {
              return common::vectorize(self.dims());
            })
       .def("_set_dims",
-           [](phi::DenseTensor &self, const std::vector<int64_t> &dim) {
+           [](DenseTensor &self, const std::vector<int64_t> &dim) {
              self.Resize(common::make_ddim(dim));
            })
       .def("_set_layout",
-           [](phi::DenseTensor &self, const std::string &layout) {
+           [](DenseTensor &self, const std::string &layout) {
              self.set_layout(common::StringToDataLayout(layout));
            })
       .def("_alloc_float",
-           [](phi::DenseTensor &self, phi::CustomPlace &place) {
+           [](DenseTensor &self, phi::CustomPlace &place) {
              self.mutable_data<float>(place);
            })
       .def("_alloc_float",
-           [](phi::DenseTensor &self, GPUPlace &place) {
+           [](DenseTensor &self, GPUPlace &place) {
              self.mutable_data<float>(place);
            })
       .def("_alloc_float",
-           [](phi::DenseTensor &self, phi::XPUPlace &place) {
+           [](DenseTensor &self, phi::XPUPlace &place) {
              self.mutable_data<float>(place);
            })
       .def("_alloc_float",
-           [](phi::DenseTensor &self, CPUPlace &place) {
+           [](DenseTensor &self, CPUPlace &place) {
              self.mutable_data<float>(place);
            })
       .def("_alloc_double",
-           [](phi::DenseTensor &self, CPUPlace &place) {
+           [](DenseTensor &self, CPUPlace &place) {
              self.mutable_data<double>(place);
            })
       .def("_alloc_int",
-           [](phi::DenseTensor &self, CPUPlace &place) {
+           [](DenseTensor &self, CPUPlace &place) {
              self.mutable_data<int>(place);
            })
       .def("_alloc_int",
-           [](phi::DenseTensor &self, phi::CustomPlace &place) {
+           [](DenseTensor &self, phi::CustomPlace &place) {
              self.mutable_data<int>(place);
            })
       .def("_alloc_int",
-           [](phi::DenseTensor &self, phi::XPUPlace &place) {
+           [](DenseTensor &self, phi::XPUPlace &place) {
              self.mutable_data<int>(place);
            })
       .def("_alloc_int",
-           [](phi::DenseTensor &self, GPUPlace &place) {
+           [](DenseTensor &self, GPUPlace &place) {
              self.mutable_data<int>(place);
            })
       .def("_alloc_int",
-           [](phi::DenseTensor &self, phi::GPUPinnedPlace &place) {
+           [](DenseTensor &self, phi::GPUPinnedPlace &place) {
              self.mutable_data<int>(place);
            })
       .def("_alloc_float",
-           [](phi::DenseTensor &self, phi::GPUPinnedPlace &place) {
+           [](DenseTensor &self, phi::GPUPinnedPlace &place) {
              self.mutable_data<float>(place);
            })
       .def("_mutable_data",
-           [](phi::DenseTensor &self,
+           [](DenseTensor &self,
               CPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
              return reinterpret_cast<uintptr_t>(
                  self.mutable_data(place, phi::TransToPhiDataType(type)));
            })
       .def("_mutable_data",
-           [](phi::DenseTensor &self,
+           [](DenseTensor &self,
               phi::CustomPlace &place,
               paddle::framework::proto::VarType::Type type) {
              return reinterpret_cast<uintptr_t>(
                  self.mutable_data(place, phi::TransToPhiDataType(type)));
            })
       .def("_mutable_data",
-           [](phi::DenseTensor &self,
+           [](DenseTensor &self,
               phi::XPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
              return reinterpret_cast<uintptr_t>(
                  self.mutable_data(place, phi::TransToPhiDataType(type)));
            })
       .def("_mutable_data",
-           [](phi::DenseTensor &self,
+           [](DenseTensor &self,
               GPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
              return reinterpret_cast<uintptr_t>(
                  self.mutable_data(place, phi::TransToPhiDataType(type)));
            })
       .def("_mutable_data",
-           [](phi::DenseTensor &self,
+           [](DenseTensor &self,
               phi::GPUPinnedPlace &place,
               paddle::framework::proto::VarType::Type type) {
              return reinterpret_cast<uintptr_t>(
@@ -706,7 +706,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
 
       .def(
           "shape",
-          [](phi::DenseTensor &self) { return common::vectorize(self.dims()); },
+          [](DenseTensor &self) { return common::vectorize(self.dims()); },
           R"DOC(
            Return the shape of Tensor.
 
@@ -741,10 +741,10 @@ void BindTensor(pybind11::module &m) {  // NOLINT
       .def("_get_complex64_element", TensorGetElement<paddle::complex64>)
       .def("_set_complex128_element", TensorSetElement<paddle::complex128>)
       .def("_get_complex128_element", TensorGetElement<paddle::complex128>)
-      .def("_place", [](phi::DenseTensor &self) { return self.place(); })
+      .def("_place", [](DenseTensor &self) { return self.place(); })
 #ifdef PADDLE_WITH_XPU
       .def("get_xpu_scale_value",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              if (self.storage_properties_initialized()) {
                const phi::XPUStorageProperties &sp =
                    self.storage_properties<phi::XPUStorageProperties>();
@@ -754,25 +754,25 @@ void BindTensor(pybind11::module &m) {  // NOLINT
              }
            })
       .def("set_xpu_scale_value",
-           [](phi::DenseTensor &self, float new_value) {
+           [](DenseTensor &self, float new_value) {
              std::unique_ptr<phi::StorageProperties> sp =
                  std::make_unique<phi::XPUStorageProperties>(new_value);
              self.set_storage_properties(std::move(sp));
            })
 #endif
       .def("_dtype",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              return framework::TransToProtoVarType(self.type());
            })
       .def("_layout",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              return common::DataLayoutToString(self.layout());
            })
       .def("_share_data_with", &phi::DenseTensor::ShareDataWith)
       .def("_share_data_nocheck_with", &phi::DenseTensor::ShareDataNoCheckWith)
       .def("__getitem__", PySliceTensor, py::return_value_policy::reference)
       .def("__str__",
-           [](const phi::DenseTensor &self) {
+           [](const DenseTensor &self) {
              std::stringstream ostr;
              ostr << self;
              return ostr.str();
@@ -793,16 +793,15 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                 "invalid, "
                 "the LegacyLoD converted by recursive_sequence_lengths is %s",
                 new_lod));
-        return std::make_unique<phi::DenseTensor>(new_offset_lod);
+        return std::make_unique<DenseTensor>(new_offset_lod);
       }))
-      .def(py::init([]() { return std::make_unique<phi::DenseTensor>(); }))
+      .def(py::init([]() { return std::make_unique<DenseTensor>(); }))
       // We implement offset based LegacyLoD in C++ while we use length based
       // with Python API. The discussion is here:
       // https://github.com/PaddlePaddle/Paddle/issues/10855
       .def(
           "set_lod",
-          [](phi::DenseTensor &self,
-             const std::vector<std::vector<size_t>> &lod) {
+          [](DenseTensor &self, const std::vector<std::vector<size_t>> &lod) {
             // the input lod is offset-based level-of-detail info
             LegacyLoD new_lod;
             new_lod.reserve(lod.size());
@@ -839,7 +838,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            )DOC")
       .def(
           "set_recursive_sequence_lengths",
-          [](phi::DenseTensor &self,
+          [](DenseTensor &self,
              const std::vector<std::vector<size_t>>
                  &recursive_sequence_lengths) {
             // the input recursive_sequence_lengths is length-based
@@ -892,7 +891,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            )DOC")
       .def(
           "lod",
-          [](phi::DenseTensor &self) -> std::vector<std::vector<size_t>> {
+          [](DenseTensor &self) -> std::vector<std::vector<size_t>> {
             // output the offset-based lod info
             LegacyLoD lod = self.lod();
             std::vector<std::vector<size_t>> new_lod;
@@ -919,17 +918,17 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                     [[0, 2, 5]]
            )DOC")
       .def("_as_type",
-           [](const phi::DenseTensor &self,
+           [](const DenseTensor &self,
               paddle::framework::proto::VarType::Type type) {
-             phi::DenseTensor dst;
+             DenseTensor dst;
              if (self.IsInitialized() && self.numel() > 0) {
                TransDataType(self, type, &dst);
              }
              return dst;
            })
-      .def("_copy", [](const phi::DenseTensor &self, const phi::Place &place) {
+      .def("_copy", [](const DenseTensor &self, const phi::Place &place) {
         // follow fetch_op's implementation
-        phi::DenseTensor dst;
+        DenseTensor dst;
         if (self.IsInitialized() && self.numel() > 0) {
           TensorCopySync(self, place, &dst);
         } else {
@@ -944,7 +943,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            })
 #ifdef PADDLE_WITH_CUDA
       .def("_share_buffer_with",
-           [](phi::DenseTensor &self, const phi::DenseTensor src,
+           [](DenseTensor &self, const DenseTensor src,
               py::tuple t) {
               if (!src.meta().is_contiguous()) {
                 PADDLE_THROW(common::errors::InvalidArgument(
@@ -989,7 +988,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
 
        )DOC")
       .def("_share_cuda",
-           [](phi::DenseTensor self) {
+           [](DenseTensor self) {
              if (!self.IsInitialized() || self.numel() == 0)
                throw std::runtime_error(
                    "Tensor not initialized or numel is 0.  could not pass "
@@ -1064,7 +1063,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                    "Invalid Tensor meta info for shared cuda tensor!");
 
              // 1. Create a new C++ instance
-             phi::DenseTensor tensor;
+             DenseTensor tensor;
 
              // 2. Rebuild Allocation from handle
              const std::string &handle = t[0].cast<std::string>();
@@ -1107,7 +1106,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
 #endif
 #ifdef PADDLE_WITH_XPU
       .def("_share_buffer_with",
-           [](phi::DenseTensor &self, const phi::DenseTensor src,
+           [](DenseTensor &self, const DenseTensor src,
               py::tuple t) {
              if (!src.meta().is_contiguous()) {
                PADDLE_THROW(common::errors::InvalidArgument(
@@ -1157,7 +1156,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                       information, device index.
            )DOC")
       .def("_share_xpu",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              if (!self.IsInitialized() || self.numel() == 0)
                throw std::runtime_error(
                    "Tensor not initialized or numel is 0. could not pass to "
@@ -1228,7 +1227,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
              VLOG(6) << "[DEBUG XPU] _new_shared_xpu: current XPU device = "
                      << dev_id;
 
-             phi::DenseTensor tensor;
+             DenseTensor tensor;
              const std::string &handle = t[0].cast<std::string>();
              ptrdiff_t offset_bytes = (ptrdiff_t)t[1].cast<int64_t>();
              auto device_id = t[6].cast<int>();
@@ -1262,7 +1261,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            )DOC")
 #endif  // PADDLE_WITH_XPU
       .def("_share_filename",
-           [](phi::DenseTensor &self, bool use_file_descriptor) {
+           [](DenseTensor &self, bool use_file_descriptor) {
              if (!self.IsInitialized() || self.numel() == 0)
                throw std::runtime_error(
                    "Tensor not initialized or numel is 0. could not pass to "
@@ -1348,7 +1347,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
              if (t.size() != 7)
                throw std::runtime_error("Invalid Tensor meta info state!");
 
-             phi::DenseTensor tensor;
+             DenseTensor tensor;
 
              // 2. Rebuild Allocation
              const std::string &ipc_name = t[0].cast<std::string>();
@@ -1395,7 +1394,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                     >>> tensor_from_shared = paddle.to_tensor(paddle.base.core.DenseTensor._new_shared_filename(metainfo))
         )DOC")
       .def("_shared_incref",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              auto *mmap_allocation = dynamic_cast<
                  memory::allocation::RefcountedMemoryMapAllocation *>(
                  self.Holder().get());
@@ -1407,7 +1406,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
             Increase reference count of share_filename tensor.
       )DOC")
       .def("_shared_decref",
-           [](phi::DenseTensor &self) {
+           [](DenseTensor &self) {
              auto *mmap_allocation = dynamic_cast<
                  memory::allocation::RefcountedMemoryMapAllocation *>(
                  self.Holder().get());
@@ -1419,7 +1418,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
             Decrease reference count of share_filename tensor.
       )DOC")
       .def(py::pickle(
-          [](const phi::DenseTensor &t) {  // __getstate__
+          [](const DenseTensor &t) {  // __getstate__
             auto holder = t.Holder();
             PADDLE_ENFORCE_EQ(phi::is_cpu_place(holder->place()), true,
                               common::errors::PreconditionNotMet(
@@ -1444,7 +1443,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
               throw std::runtime_error("Invalid Tensor state!");
 
             // 1. Create a new C++ instance
-            phi::DenseTensor tensor;
+            DenseTensor tensor;
 
             // 2. Rebuild Allocation
             const std::string &ipc_name = t[0].cast<std::string>();
@@ -1547,7 +1546,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            [](const phi::SparseCooTensor &self) -> int64_t {
              return self.numel();
            })
-      .def("indices", [](const phi::SparseCooTensor &self) -> phi::DenseTensor {
+      .def("indices", [](const phi::SparseCooTensor &self) -> DenseTensor {
         return self.indices();
       });
 }
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 9de2ca019d9896..1e65c11ed4c085 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -337,7 +337,7 @@ inline std::string TensorDTypeToPyDTypeStr(
 }  // namespace details
 
 template <typename T>
-T TensorGetElement(const phi::DenseTensor &self, size_t offset) {
+T TensorGetElement(const DenseTensor &self, size_t offset) {
   PADDLE_ENFORCE_LT(offset,
                     self.numel(),
                     common::errors::InvalidArgument(
@@ -374,7 +374,7 @@ T TensorGetElement(const phi::DenseTensor &self, size_t offset) {
 }
 
 template <typename T>
-void TensorSetElement(phi::DenseTensor *self, size_t offset, T elem) {
+void TensorSetElement(DenseTensor *self, size_t offset, T elem) {
   PADDLE_ENFORCE_LT(offset,
                     self->numel(),
                     common::errors::InvalidArgument(
@@ -408,7 +408,7 @@ void TensorSetElement(phi::DenseTensor *self, size_t offset, T elem) {
 
 template <typename T, typename P>
 void SetTensorFromPyArrayT(
-    phi::DenseTensor *self,
+    DenseTensor *self,
     const py::array_t<T, py::array::c_style | py::array::forcecast> &array,
     const P &place,
     bool zero_copy) {
@@ -521,7 +521,7 @@ void SetTensorFromPyArrayT(
 }
 
 template <typename P>
-void SetTensorFromPyArray(phi::DenseTensor *self,
+void SetTensorFromPyArray(DenseTensor *self,
                           const py::object &obj,
                           const P &place,
                           bool zero_copy) {
@@ -626,7 +626,7 @@ void SetStringTensorFromPyArray(phi::StringTensor *self,
 
 template <typename T>
 void SetUVATensorFromPyArrayImpl(
-    phi::DenseTensor *self_tensor,
+    DenseTensor *self_tensor,
     const py::array_t<T, py::array::c_style | py::array::forcecast> &array,
     int device_id) {
 #if defined(PADDLE_WITH_CUDA)
@@ -666,7 +666,7 @@ void SetUVATensorFromPyArray(
     int device_id) {
 #if defined(PADDLE_WITH_CUDA)
   VLOG(4) << "Running in SetUVATensorFromPyArray for VarBase.";
-  auto *self_tensor = self->MutableVar()->GetMutable<phi::DenseTensor>();
+  auto *self_tensor = self->MutableVar()->GetMutable<DenseTensor>();
   SetUVATensorFromPyArrayImpl<T>(self_tensor, array, device_id);
 #endif
 }
@@ -679,20 +679,20 @@ void SetUVATensorFromPyArray(const std::shared_ptr<paddle::Tensor> &self,
   VLOG(4) << "Running in SetUVATensorFromPyArray for Phi::Tensor.";
   phi::DenseTensorMeta meta =
       phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1}));
-  std::shared_ptr<phi::DenseTensor> tmp_t = std::make_shared<phi::DenseTensor>(
+  std::shared_ptr<DenseTensor> tmp_t = std::make_shared<DenseTensor>(
       std::make_unique<paddle::experimental::DefaultAllocator>(CPUPlace())
           .get(),
       meta);
   self.get()->set_impl(tmp_t);
-  auto *self_tensor = static_cast<phi::DenseTensor *>(self.get()->impl().get());
+  auto *self_tensor = static_cast<DenseTensor *>(self.get()->impl().get());
 
   SetUVATensorFromPyArrayImpl<T>(self_tensor, array, device_id);
 #endif
 }
 
 template <typename T, size_t D>
-void _sliceCompute(const phi::DenseTensor *in,
-                   phi::DenseTensor *out,
+void _sliceCompute(const DenseTensor *in,
+                   DenseTensor *out,
                    const phi::CPUContext &ctx,
                    const std::vector<int> &axes,
                    const std::vector<int> &starts) {
@@ -726,8 +726,8 @@ void _sliceCompute(const phi::DenseTensor *in,
 }
 
 template <typename T>
-void _concatCompute(const std::vector<phi::DenseTensor> &ins,
-                    phi::DenseTensor *out,
+void _concatCompute(const std::vector<DenseTensor> &ins,
+                    DenseTensor *out,
                     const phi::CPUContext &ctx,
                     int64_t axis) {
   if (axis == 0 && ins.size() < 10) {
@@ -751,7 +751,7 @@ void _concatCompute(const std::vector<phi::DenseTensor> &ins,
   }
 }
 
-inline void _getSliceinfo(const phi::DenseTensor &self,
+inline void _getSliceinfo(const DenseTensor &self,
                           py::object obj,
                           const int64_t dim,
                           int64_t *pstart,
@@ -803,9 +803,8 @@ inline void _getSliceinfo(const phi::DenseTensor &self,
   }
 }
 
-inline phi::DenseTensor *_getTensor(const phi::DenseTensor &self,
-                                    const phi::DDim &ddim) {
-  phi::DenseTensor *output = new phi::DenseTensor();
+inline DenseTensor *_getTensor(const DenseTensor &self, const phi::DDim &ddim) {
+  DenseTensor *output = new phi::DenseTensor();
   output->Resize(ddim);
   auto place = self.place();
   if (phi::is_cpu_place(place)) {
@@ -829,8 +828,8 @@ inline phi::DenseTensor *_getTensor(const phi::DenseTensor &self,
 }
 
 template <typename T>
-void _sliceDapper(const phi::DenseTensor *in,
-                  phi::DenseTensor *out,
+void _sliceDapper(const DenseTensor *in,
+                  DenseTensor *out,
                   const phi::CPUContext &ctx,
                   const std::vector<int> &axes,
                   const std::vector<int> &starts,
@@ -871,32 +870,32 @@ void _sliceDapper(const phi::DenseTensor *in,
 }
 
 template <typename T>
-inline phi::DenseTensor *_sliceWrapper(const phi::DenseTensor &self,
-                                       const phi::CPUContext &ctx,
-                                       py::object obj UNUSED,
-                                       int dim,
-                                       int64_t start,
-                                       int64_t slicelength) {
+inline DenseTensor *_sliceWrapper(const DenseTensor &self,
+                                  const phi::CPUContext &ctx,
+                                  py::object obj UNUSED,
+                                  int dim,
+                                  int64_t start,
+                                  int64_t slicelength) {
   phi::DDim dstDDim = self.dims();
   dstDDim[dim] = static_cast<int64_t>(slicelength);
   std::vector<int> axes({dim});
   std::vector<int> starts({static_cast<int>(start)});
-  phi::DenseTensor *output = _getTensor(self, dstDDim);
+  DenseTensor *output = _getTensor(self, dstDDim);
   _sliceDapper<T>(&self, output, ctx, axes, starts, dstDDim.size());
   return output;
 }
 
 template <typename T>
-inline phi::DenseTensor *_sliceAndConcat(const phi::DenseTensor &self,
-                                         py::object obj,
-                                         int dim) {
+inline DenseTensor *_sliceAndConcat(const DenseTensor &self,
+                                    py::object obj,
+                                    int dim) {
   phi::CPUContext ctx;
   int64_t start, stop, step, slicelength;
   _getSliceinfo(self, obj, dim, &start, &stop, &step, &slicelength);
   if (step == 1 || slicelength == 1) {
     return _sliceWrapper<T>(self, ctx, obj, dim, start, slicelength);
   } else {
-    std::vector<phi::DenseTensor> ins;
+    std::vector<DenseTensor> ins;
     for (auto i = 0; i < slicelength; ++i, start += step) {
       ins.emplace_back(*_sliceWrapper<T>(self, ctx, obj, dim, start, 1));
     }
@@ -904,15 +903,15 @@ inline phi::DenseTensor *_sliceAndConcat(const phi::DenseTensor &self,
     // do the concat operation
     phi::DDim dstDDim = self.dims();
     dstDDim[dim] = static_cast<int64_t>(slicelength);
-    phi::DenseTensor *output1 = _getTensor(self, dstDDim);
+    DenseTensor *output1 = _getTensor(self, dstDDim);
     _concatCompute<T>(ins, output1, ctx, dim);
     return output1;
   }
 }
 
-inline phi::DenseTensor *_sliceTensor(const phi::DenseTensor &self,
-                                      py::object obj,
-                                      int dim) {
+inline DenseTensor *_sliceTensor(const DenseTensor &self,
+                                 py::object obj,
+                                 int dim) {
   auto src_type = framework::TransToProtoVarType(self.dtype());
   switch (src_type) {
     case framework::proto::VarType::FP16:
@@ -946,12 +945,11 @@ inline phi::DenseTensor *_sliceTensor(const phi::DenseTensor &self,
   }
 }
 
-inline phi::DenseTensor *_pySliceTensor(const phi::DenseTensor &self,
-                                        py::object obj) {
+inline DenseTensor *_pySliceTensor(const DenseTensor &self, py::object obj) {
   if (py::isinstance<py::tuple>(obj)) {
     py::list l = static_cast<py::list>(obj);
-    std::unique_ptr<phi::DenseTensor> target;
-    phi::DenseTensor *src = const_cast<phi::DenseTensor *>(&self);
+    std::unique_ptr<DenseTensor> target;
+    DenseTensor *src = const_cast<DenseTensor *>(&self);
     for (auto i = 0; i < static_cast<int>(l.size()); ++i) {
       src = _sliceTensor(*src, l[i], i);
       if (i + 1 == static_cast<int>(l.size())) {
@@ -966,15 +964,14 @@ inline phi::DenseTensor *_pySliceTensor(const phi::DenseTensor &self,
   }
 }
 
-inline phi::DenseTensor *PySliceTensor(const phi::DenseTensor &self,
-                                       py::object obj) {
+inline DenseTensor *PySliceTensor(const DenseTensor &self, py::object obj) {
   if (phi::is_gpu_place(self.place())) {
-    std::unique_ptr<phi::DenseTensor> holder;
-    phi::DenseTensor src;
+    std::unique_ptr<DenseTensor> holder;
+    DenseTensor src;
     framework::TensorCopySync(self, CPUPlace(), &src);
-    phi::DenseTensor *output = _pySliceTensor(src, obj);
+    DenseTensor *output = _pySliceTensor(src, obj);
     holder.reset(output);
-    phi::DenseTensor *dst = _getTensor(*output, output->dims());
+    DenseTensor *dst = _getTensor(*output, output->dims());
     framework::TensorCopySync(*output, self.place(), dst);
     return dst;
   } else {
@@ -982,7 +979,7 @@ inline phi::DenseTensor *PySliceTensor(const phi::DenseTensor &self,
   }
 }
 
-inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
+inline py::array TensorToPyArray(const DenseTensor &tensor,
                                  py::object copy = py::none()) {
   if (!tensor.has_allocation()) {
     return py::array();
@@ -1019,7 +1016,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
                        const_cast<void *>(tensor_buf_ptr),
                        base);
     } else {
-      phi::DenseTensor cpu_tensor;
+      DenseTensor cpu_tensor;
       CPUPlace cpu_place;
 
       cpu_tensor.set_meta(tensor.meta());
@@ -1045,7 +1042,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
   } else if (is_xpu_tensor) {
 #ifdef PADDLE_WITH_XPU
     auto p = tensor.place();
-    phi::DenseTensor cpu_tensor;
+    DenseTensor cpu_tensor;
     CPUPlace cpu_place;
 
     cpu_tensor.set_meta(tensor.meta());
@@ -1078,7 +1075,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
 #elif defined(PADDLE_WITH_HIP)
     gpuMemcpyKind kind = hipMemcpyDeviceToHost;
 #endif
-    phi::DenseTensor cpu_tensor;
+    DenseTensor cpu_tensor;
     CPUPlace cpu_place;
 
     cpu_tensor.set_meta(tensor.meta());
@@ -1107,16 +1104,16 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
     // TODO(qili93): temporary for ascend npu performance to be removed along
     // with npu_identity op
-    paddle::Tensor tensor_out(std::make_shared<phi::DenseTensor>());
+    paddle::Tensor tensor_out(std::make_shared<DenseTensor>());
     if (tensor.storage_properties_initialized()) {
-      paddle::Tensor tensor_in(std::make_shared<phi::DenseTensor>(tensor));
+      paddle::Tensor tensor_in(std::make_shared<DenseTensor>(tensor));
       tensor_out = npu_identity_ad_func(tensor_in, -1);
       auto dense_tensor =
-          std::dynamic_pointer_cast<phi::DenseTensor>(tensor_out.impl());
+          std::dynamic_pointer_cast<DenseTensor>(tensor_out.impl());
       phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance();
       auto &ctx = *pool.Get(tensor.place());
       auto p = dense_tensor->place();
-      phi::DenseTensor cpu_tensor;
+      DenseTensor cpu_tensor;
       CPUPlace cpu_place;
 
       cpu_tensor.set_meta(dense_tensor->meta());
@@ -1146,7 +1143,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
     phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance();
     auto &ctx = *pool.Get(tensor.place());
     auto p = tensor.place();
-    phi::DenseTensor cpu_tensor;
+    DenseTensor cpu_tensor;
     CPUPlace cpu_place;
 
     cpu_tensor.set_meta(tensor.meta());
diff --git a/paddle/fluid/pybind/uva_utils.h b/paddle/fluid/pybind/uva_utils.h
index cb68e0f7a6fff8..6855fa0fb1ced1 100644
--- a/paddle/fluid/pybind/uva_utils.h
+++ b/paddle/fluid/pybind/uva_utils.h
@@ -24,7 +24,7 @@
 namespace paddle {
 namespace pybind {
 
-static void tensor_uva(phi::DenseTensor *self_tensor, int device_id) {
+static void tensor_uva(DenseTensor *self_tensor, int device_id) {
   VLOG(4) << "Running in _uva interface.";
 #if defined(PADDLE_WITH_CUDA)
   phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance();