From 256a5eee9d19a6df1387937b2e8796f83bb55df1 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Thu, 2 Oct 2025 06:22:49 +0000 Subject: [PATCH 01/17] [DLPack] Implement C functions exchange API --- paddle/fluid/framework/dlpack_tensor.cc | 4 +- paddle/fluid/framework/dlpack_tensor.h | 14 ++-- paddle/fluid/pybind/pybind.cc | 70 +++++++++++++++++++ .../base/dygraph/tensor_patch_methods.py | 3 + 4 files changed, 83 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 793d0bbdf6e695..e01964966d2727 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -265,7 +265,7 @@ ::DLDataType PhiDataTypeToDLDataType(phi::DataType dtype) { framework::TransToProtoVarType(dtype)); } -phi::Place DLDeviceToPlace(const DLDevice &dl_device) { +phi::Place DLDeviceToPlace(const ::DLDevice &dl_device) { phi::Place place; if (dl_device.device_type == kDLCPU) { place = phi::CPUPlace(); @@ -279,7 +279,7 @@ phi::Place DLDeviceToPlace(const DLDevice &dl_device) { return place; } -DLDevice PlaceToDLDevice(const phi::Place &place) { +::DLDevice PlaceToDLDevice(const phi::Place &place) { return phi::VisitPlace(place, internal::DLDeviceVisitor()); } diff --git a/paddle/fluid/framework/dlpack_tensor.h b/paddle/fluid/framework/dlpack_tensor.h index e287ce342fa78c..ed799a192f83f9 100644 --- a/paddle/fluid/framework/dlpack_tensor.h +++ b/paddle/fluid/framework/dlpack_tensor.h @@ -29,15 +29,17 @@ and paddle/phi/api/lib/tensor_utils.cc */ using Deleter = std::function; -phi::Place DLDeviceToPlace(const DLDevice& device); -DLDevice PlaceToDLDevice(const phi::Place& place); +::DLDataType PhiDataTypeToDLDataType(phi::DataType dtype); +phi::DataType DLDataTypeToPhiDataType(::DLDataType type); +phi::Place DLDeviceToPlace(const ::DLDevice& device); +::DLDevice PlaceToDLDevice(const phi::Place& place); TEST_API DLManagedTensor* ToDLPack(const phi::DenseTensor& src, uint64_t flags = 0); -DLManagedTensorVersioned* ToDLPackVersioned(const phi::DenseTensor& src, - uint64_t flags = 0); -TEST_API phi::DenseTensor FromDLPack(DLManagedTensor* src); -phi::DenseTensor FromDLPackVersioned(DLManagedTensorVersioned* src); +::DLManagedTensorVersioned* ToDLPackVersioned(const phi::DenseTensor& src, + uint64_t flags = 0); +TEST_API phi::DenseTensor FromDLPack(::DLManagedTensor* src); +phi::DenseTensor FromDLPackVersioned(::DLManagedTensorVersioned* src); // A traits to support both DLManagedTensor and DLManagedTensorVersioned template diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d3b17ad377b7cf..d2c7b52f272af4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -763,6 +763,64 @@ class PyLayerBlockContextManager { PyLayerBlockContextManager() = default; }; +int DLPackFromPyObject(void *py_obj, + DLManagedTensorVersioned **out, + void **env_stream) { + try { + py::handle handle(static_cast(py_obj)); + paddle::Tensor tensor = handle.cast(); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ + defined(PADDLE_WITH_CUSTOM_DEVICE) + if (env_stream != nullptr && tensor.is_gpu()) { + int device_index = tensor.place().GetDeviceId(); + *env_stream = platform::get_current_stream(device_index)->raw_stream(); + } +#endif + std::shared_ptr dense_tensor = + std::static_pointer_cast(tensor.impl()); + *out = paddle::framework::ToDLPackVersioned(*dense_tensor); + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) { + try { + phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src); + paddle::Tensor tensor(std::make_shared(dense_tensor)); + egr::EagerUtils::autograd_meta(&tensor)->SetPersistable(false); + *py_obj_out = ToPyObject(tensor); + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +int DLPackTensorAllocator(::DLTensor *prototype, + ::DLManagedTensorVersioned **out, + void *error_ctx, + void (*SetError)(void *error_ctx, + const char *kind, + const char *message)) { + try { + phi::IntArray shape(prototype->shape, prototype->ndim); + phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device)); + phi::DataType dtype = + paddle::framework::DLDataTypeToPhiDataType(prototype->dtype); + paddle::Tensor tensor = paddle::empty(shape, dtype, place); + std::shared_ptr dense_tensor = + std::static_pointer_cast(tensor.impl()); + *out = paddle::framework::ToDLPackVersioned(*dense_tensor); + return 0; + } catch (const std::exception &e) { + SetError(error_ctx, "DLPackTensorAllocator", e.what()); + return -1; + } +} + // NOTE: use to load file by Mmap enum MMapLoadModes { ALLOCATOR_MAPPED_SHARED = 1, @@ -1773,6 +1831,18 @@ PYBIND11_MODULE(libpaddle, m) { dl_device.device_id); }); + m.def("dlpack_from_pyobject_ptr", []() -> int64_t { + return reinterpret_cast(DLPackFromPyObject); + }); + + m.def("dlpack_to_pyobject_ptr", []() -> int64_t { + return reinterpret_cast(DLPackToPyObject); + }); + + m.def("dlpack_tensor_allocator_ptr", []() -> int64_t { + return reinterpret_cast(DLPackTensorAllocator); + }); + m.def("from_dlpack", [](py::object data) { if (PyCapsule_IsValid(data.ptr(), DLPackTraits::capsule)) { diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py index e19d5e7f8405d1..2650ebd77f5a29 100644 --- a/python/paddle/base/dygraph/tensor_patch_methods.py +++ b/python/paddle/base/dygraph/tensor_patch_methods.py @@ -1586,6 +1586,9 @@ def __tvm_ffi_env_stream__(self) -> int: ("__dlpack_device__", __dlpack_device__), ("get_device", get_device), ("__tvm_ffi_env_stream__", __tvm_ffi_env_stream__), + ("__c_dlpack_from_pyobject__", core.dlpack_from_pyobject_ptr()), + ("__c_dlpack_to_pyobject__", core.dlpack_to_pyobject_ptr()), + ("__c_dlpack_tensor_allocator__", core.dlpack_tensor_allocator_ptr()), ): setattr(core.eager.Tensor, method_name, method) From 1fb670edb3f6a248dd6bb7af1e9bedeb7264f2bd Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sat, 11 Oct 2025 04:51:31 +0000 Subject: [PATCH 02/17] Implement the latest C dlpack exchange API, refer to apache/tvm-ffi#96 --- paddle/fluid/framework/dlpack_tensor.cc | 16 +++ paddle/fluid/framework/dlpack_tensor.h | 6 +- paddle/fluid/pybind/pybind.cc | 132 ++++++++++++++++-- .../base/dygraph/tensor_patch_methods.py | 1 + python/paddle/utils/dlpack.py | 3 +- .../test_tensor_attr_consistency.py | 4 + third_party/dlpack | 2 +- 7 files changed, 147 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index e01964966d2727..02b27cbe0ef9ad 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -358,6 +358,22 @@ DLManagedTensorVersioned *ToDLPackVersioned(const phi::DenseTensor &src, return ToDLPackImpl(src, flags); } +void ToDLPackNonOwningImpl(const phi::DenseTensor &tensor, + ::DLTensor &out) { // NOLINT + // Fill in the pre-allocated DLTensor struct with direct pointers + // This is a non-owning conversion - the caller owns the tensor + // and must keep it alive for the duration of DLTensor usage + out.data = const_cast(tensor.data()); + out.device = PlaceToDLDevice(tensor.place()); + out.ndim = static_cast(tensor.dims().size()); + out.dtype = PhiDataTypeToDLDataType(tensor.dtype()); + // sizes() and strides() return pointers to TensorImpl's stable storage + // which remains valid as long as the tensor is alive + out.shape = const_cast(tensor.dims().Get()); + out.strides = const_cast(tensor.strides().Get()); + out.byte_offset = 0; +} + template phi::DenseTensor FromDLPackImpl(T *src, Deleter deleter) { std::vector shape_vec; diff --git a/paddle/fluid/framework/dlpack_tensor.h b/paddle/fluid/framework/dlpack_tensor.h index ed799a192f83f9..1aa8e79f93e7de 100644 --- a/paddle/fluid/framework/dlpack_tensor.h +++ b/paddle/fluid/framework/dlpack_tensor.h @@ -34,10 +34,12 @@ phi::DataType DLDataTypeToPhiDataType(::DLDataType type); phi::Place DLDeviceToPlace(const ::DLDevice& device); ::DLDevice PlaceToDLDevice(const phi::Place& place); -TEST_API DLManagedTensor* ToDLPack(const phi::DenseTensor& src, - uint64_t flags = 0); +TEST_API ::DLManagedTensor* ToDLPack(const phi::DenseTensor& src, + uint64_t flags = 0); ::DLManagedTensorVersioned* ToDLPackVersioned(const phi::DenseTensor& src, uint64_t flags = 0); +void ToDLPackNonOwningImpl(const phi::DenseTensor& tensor, + ::DLTensor& out); // NOLINT TEST_API phi::DenseTensor FromDLPack(::DLManagedTensor* src); phi::DenseTensor FromDLPackVersioned(::DLManagedTensorVersioned* src); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d2c7b52f272af4..57edb7db13bfc6 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -763,9 +763,9 @@ class PyLayerBlockContextManager { PyLayerBlockContextManager() = default; }; -int DLPackFromPyObject(void *py_obj, - DLManagedTensorVersioned **out, - void **env_stream) { +int DLPackFromPyObjectLegacy(void *py_obj, + DLManagedTensorVersioned **out, + void **env_stream) { try { py::handle handle(static_cast(py_obj)); paddle::Tensor tensor = handle.cast(); @@ -786,7 +786,7 @@ int DLPackFromPyObject(void *py_obj, } } -int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) { +int DLPackToPyObjectLegacy(DLManagedTensorVersioned *src, void **py_obj_out) { try { phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src); paddle::Tensor tensor(std::make_shared(dense_tensor)); @@ -799,12 +799,12 @@ int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) { } } -int DLPackTensorAllocator(::DLTensor *prototype, - ::DLManagedTensorVersioned **out, - void *error_ctx, - void (*SetError)(void *error_ctx, - const char *kind, - const char *message)) { +int DLPackTensorAllocatorLegacy(::DLTensor *prototype, + ::DLManagedTensorVersioned **out, + void *error_ctx, + void (*SetError)(void *error_ctx, + const char *kind, + const char *message)) { try { phi::IntArray shape(prototype->shape, prototype->ndim); phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device)); @@ -821,6 +821,108 @@ int DLPackTensorAllocator(::DLTensor *prototype, } } +int DLPackDLTensorFromPyObjectNoSync(void *py_obj, DLTensor *out) { + try { + // Use handle (non-owning) to avoid unnecessary refcount operations + py::handle handle(static_cast(py_obj)); + paddle::Tensor tensor = handle.cast(); + std::shared_ptr dense_tensor = + std::static_pointer_cast(tensor.impl()); + paddle::framework::ToDLPackNonOwningImpl(*dense_tensor, *out); + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +int DLPackManagedTensorFromPyObjectNoSync(void *py_obj, + DLManagedTensorVersioned **out) { + try { + py::handle handle(static_cast(py_obj)); + paddle::Tensor tensor = handle.cast(); + std::shared_ptr dense_tensor = + std::static_pointer_cast(tensor.impl()); + *out = paddle::framework::ToDLPackVersioned(*dense_tensor); + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +int DLPackManagedTensorToPyObjectNoSync(DLManagedTensorVersioned *src, + void **py_obj_out) { + try { + phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src); + paddle::Tensor tensor(std::make_shared(dense_tensor)); + egr::EagerUtils::autograd_meta(&tensor)->SetPersistable(false); + *py_obj_out = ToPyObject(tensor); + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +int DLPackManagedTensorAllocator(::DLTensor *prototype, + ::DLManagedTensorVersioned **out, + void *error_ctx, + void (*SetError)(void *error_ctx, + const char *kind, + const char *message)) { + try { + phi::IntArray shape(prototype->shape, prototype->ndim); + phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device)); + phi::DataType dtype = + paddle::framework::DLDataTypeToPhiDataType(prototype->dtype); + paddle::Tensor tensor = paddle::empty(shape, dtype, place); + std::shared_ptr dense_tensor = + std::static_pointer_cast(tensor.impl()); + *out = paddle::framework::ToDLPackVersioned(*dense_tensor); + return 0; + } catch (const std::exception &e) { + SetError(error_ctx, "DLPackManagedTensorAllocator", e.what()); + return -1; + } +} + +int DLPackCurrentWorkStream(DLDeviceType device_type, + int32_t device_id, + void **out_stream) { + try { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ + defined(PADDLE_WITH_CUSTOM_DEVICE) + if (device_type == kDLCUDA || device_type == kDLROCM) { + *out_stream = platform::get_current_stream(device_id)->raw_stream(); + } +#endif + return 0; + } catch (const std::exception &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return -1; + } +} + +struct PaddleDLPackExchangeAPI : public ::DLPackExchangeAPI { + PaddleDLPackExchangeAPI() { + header.version.major = DLPACK_MAJOR_VERSION; + header.version.minor = DLPACK_MINOR_VERSION; + header.prev_api = nullptr; + managed_tensor_allocator = DLPackManagedTensorAllocator; + managed_tensor_from_py_object_no_sync = + DLPackManagedTensorFromPyObjectNoSync; + managed_tensor_to_py_object_no_sync = DLPackManagedTensorToPyObjectNoSync; + dltensor_from_py_object_no_sync = DLPackDLTensorFromPyObjectNoSync; + current_work_stream = DLPackCurrentWorkStream; + } + + static const DLPackExchangeAPI *Instance() { + static PaddleDLPackExchangeAPI inst; + return &inst; + } +}; + // NOTE: use to load file by Mmap enum MMapLoadModes { ALLOCATOR_MAPPED_SHARED = 1, @@ -1832,15 +1934,19 @@ PYBIND11_MODULE(libpaddle, m) { }); m.def("dlpack_from_pyobject_ptr", []() -> int64_t { - return reinterpret_cast(DLPackFromPyObject); + return reinterpret_cast(DLPackFromPyObjectLegacy); }); m.def("dlpack_to_pyobject_ptr", []() -> int64_t { - return reinterpret_cast(DLPackToPyObject); + return reinterpret_cast(DLPackToPyObjectLegacy); }); m.def("dlpack_tensor_allocator_ptr", []() -> int64_t { - return reinterpret_cast(DLPackTensorAllocator); + return reinterpret_cast(DLPackTensorAllocatorLegacy); + }); + + m.def("dlpack_exchange_api_ptr", []() -> int64_t { + return reinterpret_cast(PaddleDLPackExchangeAPI::Instance()); }); m.def("from_dlpack", [](py::object data) { diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py index 2650ebd77f5a29..12720cdc7ceda9 100644 --- a/python/paddle/base/dygraph/tensor_patch_methods.py +++ b/python/paddle/base/dygraph/tensor_patch_methods.py @@ -1589,6 +1589,7 @@ def __tvm_ffi_env_stream__(self) -> int: ("__c_dlpack_from_pyobject__", core.dlpack_from_pyobject_ptr()), ("__c_dlpack_to_pyobject__", core.dlpack_to_pyobject_ptr()), ("__c_dlpack_tensor_allocator__", core.dlpack_tensor_allocator_ptr()), + ("__c_dlpack_exchange_api__", core.dlpack_exchange_api_ptr()), ): setattr(core.eager.Tensor, method_name, method) diff --git a/python/paddle/utils/dlpack.py b/python/paddle/utils/dlpack.py index c1b3c21afaea86..68b44cc27f89ce 100644 --- a/python/paddle/utils/dlpack.py +++ b/python/paddle/utils/dlpack.py @@ -75,6 +75,7 @@ class DLDeviceType(enum.IntEnum): kDLWebGPU = (15,) kDLHexagon = (16,) kDLMAIA = (17,) + kDLTrn = (18,) def to_dlpack(x: Tensor) -> CapsuleType: @@ -215,7 +216,7 @@ def from_dlpack( if hasattr(dlpack, "__dlpack__"): kwargs = {} - kwargs["max_version"] = (1, 1) + kwargs["max_version"] = (1, 2) if copy is not None: kwargs["copy"] = copy diff --git a/test/dygraph_to_static/test_tensor_attr_consistency.py b/test/dygraph_to_static/test_tensor_attr_consistency.py index 86a4437a7c69ce..7176daa31928c2 100644 --- a/test/dygraph_to_static/test_tensor_attr_consistency.py +++ b/test/dygraph_to_static/test_tensor_attr_consistency.py @@ -81,6 +81,10 @@ '__dlpack__', "__dlpack_device__", "__tvm_ffi_env_stream__", + "__c_dlpack_from_pyobject__", + "__c_dlpack_to_pyobject__", + "__c_dlpack_tensor_allocator__", + "__c_dlpack_exchange_api__", ] ) STATIC_ONLY_TENSOR_ATTRS_ALLOW_LIST = OrderedSet( diff --git a/third_party/dlpack b/third_party/dlpack index 3ea601bb413074..111736618e8d10 160000 --- a/third_party/dlpack +++ b/third_party/dlpack @@ -1 +1 @@ -Subproject commit 3ea601bb413074c49a77c4ce3218bc08f8c4703c +Subproject commit 111736618e8d1028b23605f76dcaa6a38cfea809 From 32cb37502aa81161545b2226737cde0f06392054 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sun, 12 Oct 2025 12:59:26 +0800 Subject: [PATCH 03/17] bump dlpack to v1.2 --- third_party/dlpack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/dlpack b/third_party/dlpack index 111736618e8d10..93c8f2a3c774b8 160000 --- a/third_party/dlpack +++ b/third_party/dlpack @@ -1 +1 @@ -Subproject commit 111736618e8d1028b23605f76dcaa6a38cfea809 +Subproject commit 93c8f2a3c774b84af6f652b1992c48164fae60fc From d49fb45c6c3b4082fe729ba4ef3ab6bd7dfdf5b5 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sun, 12 Oct 2025 05:35:09 +0000 Subject: [PATCH 04/17] cleanup legacy impls --- paddle/fluid/pybind/pybind.cc | 70 ------------------- .../base/dygraph/tensor_patch_methods.py | 3 - .../test_tensor_attr_consistency.py | 3 - 3 files changed, 76 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 57edb7db13bfc6..3119464f9cb974 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -763,64 +763,6 @@ class PyLayerBlockContextManager { PyLayerBlockContextManager() = default; }; -int DLPackFromPyObjectLegacy(void *py_obj, - DLManagedTensorVersioned **out, - void **env_stream) { - try { - py::handle handle(static_cast(py_obj)); - paddle::Tensor tensor = handle.cast(); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ - defined(PADDLE_WITH_CUSTOM_DEVICE) - if (env_stream != nullptr && tensor.is_gpu()) { - int device_index = tensor.place().GetDeviceId(); - *env_stream = platform::get_current_stream(device_index)->raw_stream(); - } -#endif - std::shared_ptr dense_tensor = - std::static_pointer_cast(tensor.impl()); - *out = paddle::framework::ToDLPackVersioned(*dense_tensor); - return 0; - } catch (const std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return -1; - } -} - -int DLPackToPyObjectLegacy(DLManagedTensorVersioned *src, void **py_obj_out) { - try { - phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src); - paddle::Tensor tensor(std::make_shared(dense_tensor)); - egr::EagerUtils::autograd_meta(&tensor)->SetPersistable(false); - *py_obj_out = ToPyObject(tensor); - return 0; - } catch (const std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return -1; - } -} - -int DLPackTensorAllocatorLegacy(::DLTensor *prototype, - ::DLManagedTensorVersioned **out, - void *error_ctx, - void (*SetError)(void *error_ctx, - const char *kind, - const char *message)) { - try { - phi::IntArray shape(prototype->shape, prototype->ndim); - phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device)); - phi::DataType dtype = - paddle::framework::DLDataTypeToPhiDataType(prototype->dtype); - paddle::Tensor tensor = paddle::empty(shape, dtype, place); - std::shared_ptr dense_tensor = - std::static_pointer_cast(tensor.impl()); - *out = paddle::framework::ToDLPackVersioned(*dense_tensor); - return 0; - } catch (const std::exception &e) { - SetError(error_ctx, "DLPackTensorAllocator", e.what()); - return -1; - } -} - int DLPackDLTensorFromPyObjectNoSync(void *py_obj, DLTensor *out) { try { // Use handle (non-owning) to avoid unnecessary refcount operations @@ -1933,18 +1875,6 @@ PYBIND11_MODULE(libpaddle, m) { dl_device.device_id); }); - m.def("dlpack_from_pyobject_ptr", []() -> int64_t { - return reinterpret_cast(DLPackFromPyObjectLegacy); - }); - - m.def("dlpack_to_pyobject_ptr", []() -> int64_t { - return reinterpret_cast(DLPackToPyObjectLegacy); - }); - - m.def("dlpack_tensor_allocator_ptr", []() -> int64_t { - return reinterpret_cast(DLPackTensorAllocatorLegacy); - }); - m.def("dlpack_exchange_api_ptr", []() -> int64_t { return reinterpret_cast(PaddleDLPackExchangeAPI::Instance()); }); diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py index 12720cdc7ceda9..f9545777153f21 100644 --- a/python/paddle/base/dygraph/tensor_patch_methods.py +++ b/python/paddle/base/dygraph/tensor_patch_methods.py @@ -1586,9 +1586,6 @@ def __tvm_ffi_env_stream__(self) -> int: ("__dlpack_device__", __dlpack_device__), ("get_device", get_device), ("__tvm_ffi_env_stream__", __tvm_ffi_env_stream__), - ("__c_dlpack_from_pyobject__", core.dlpack_from_pyobject_ptr()), - ("__c_dlpack_to_pyobject__", core.dlpack_to_pyobject_ptr()), - ("__c_dlpack_tensor_allocator__", core.dlpack_tensor_allocator_ptr()), ("__c_dlpack_exchange_api__", core.dlpack_exchange_api_ptr()), ): setattr(core.eager.Tensor, method_name, method) diff --git a/test/dygraph_to_static/test_tensor_attr_consistency.py b/test/dygraph_to_static/test_tensor_attr_consistency.py index 7176daa31928c2..b68c2db87fe609 100644 --- a/test/dygraph_to_static/test_tensor_attr_consistency.py +++ b/test/dygraph_to_static/test_tensor_attr_consistency.py @@ -81,9 +81,6 @@ '__dlpack__', "__dlpack_device__", "__tvm_ffi_env_stream__", - "__c_dlpack_from_pyobject__", - "__c_dlpack_to_pyobject__", - "__c_dlpack_tensor_allocator__", "__c_dlpack_exchange_api__", ] ) From bb3c59d3e70ec78c8d953293cfbf9d477703b1d0 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sun, 12 Oct 2025 06:12:45 +0000 Subject: [PATCH 05/17] add unittests for tvm_ffi --- python/unittest_py/requirements.txt | 1 + test/legacy_test/test_tvm_ffi.py | 104 +++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt index ddfccc8090f240..9d547fd9357d1d 100644 --- a/python/unittest_py/requirements.txt +++ b/python/unittest_py/requirements.txt @@ -20,3 +20,4 @@ xdoctest==1.3.0 ubelt==1.3.3 # just for xdoctest mypy==1.17.1 soundfile +apache-tvm-ffi @ git+https://github.com/apache/tvm-ffi.git@22a78943b78306a73011757fa635afa9dce35114 diff --git a/test/legacy_test/test_tvm_ffi.py b/test/legacy_test/test_tvm_ffi.py index aa6a91b4aa24de..1e95a5db412902 100644 --- a/test/legacy_test/test_tvm_ffi.py +++ b/test/legacy_test/test_tvm_ffi.py @@ -14,10 +14,14 @@ import unittest +import numpy as np +import tvm_ffi.cpp +from tvm_ffi import Module + import paddle -class TestTVMFFI(unittest.TestCase): +class TestTVMFFIEnvStream(unittest.TestCase): def test_tvm_ffi_env_stream_for_gpu_tensor(self): if not paddle.is_compiled_with_cuda(): return @@ -34,5 +38,103 @@ def test_tvm_ffi_env_stream_for_cpu_tensor(self): tensor.__tvm_ffi_env_stream__() +class TestCDLPackExchangeAPI(unittest.TestCase): + def test_c_dlpack_exchange_api_cpu(self): + cpp_source = r""" + void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) { + // implementation of a library function + TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor"; + DLDataType f32_dtype{kDLFloat, 32, 1}; + TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor"; + TVM_FFI_ICHECK(y->ndim == 1) << "y must be a 1D tensor"; + TVM_FFI_ICHECK(y->dtype == f32_dtype) << "y must be a float tensor"; + TVM_FFI_ICHECK(x->shape[0] == y->shape[0]) << "x and y must have the same shape"; + for (int i = 0; i < x->shape[0]; ++i) { + static_cast(y->data)[i] = static_cast(x->data)[i] + 1; + } + } + """ + + mod: Module = tvm_ffi.cpp.load_inline( + name='mod', cpp_sources=cpp_source, functions='add_one_cpu' + ) + + x = paddle.full((3,), 1.0, dtype='float32').cpu() + y = paddle.zeros((3,), dtype='float32').cpu() + mod.add_one_cpu(x, y) + np.testing.assert_allclose(y.numpy(), [2.0, 2.0, 2.0]) + + def test_c_dlpack_exchange_api_gpu(self): + if not paddle.is_compiled_with_cuda(): + return + cpp_sources = r""" + void add_one_cuda(tvm::ffi::TensorView x, tvm::ffi::TensorView y); + """ + cuda_sources = r""" + __global__ void AddOneKernel(float* x, float* y, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + y[idx] = x[idx] + 1; + } + } + + void add_one_cuda(tvm::ffi::TensorView x, tvm::ffi::TensorView y) { + // implementation of a library function + TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor"; + DLDataType f32_dtype{kDLFloat, 32, 1}; + TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor"; + TVM_FFI_ICHECK(y->ndim == 1) << "y must be a 1D tensor"; + TVM_FFI_ICHECK(y->dtype == f32_dtype) << "y must be a float tensor"; + TVM_FFI_ICHECK(x->shape[0] == y->shape[0]) << "x and y must have the same shape"; + + int64_t n = x->shape[0]; + int64_t nthread_per_block = 256; + int64_t nblock = (n + nthread_per_block - 1) / nthread_per_block; + // Obtain the current stream from the environment by calling TVMFFIEnvGetStream + cudaStream_t stream = static_cast( + TVMFFIEnvGetStream(x->device.device_type, x->device.device_id)); + // launch the kernel + AddOneKernel<<>>(static_cast(x->data), + static_cast(y->data), n); + } + """ + mod: Module = tvm_ffi.cpp.load_inline( + name='mod', + cpp_sources=cpp_sources, + cuda_sources=cuda_sources, + functions=['add_one_cuda'], + ) + + x = paddle.full((3,), 1.0, dtype='float32').cuda() + y = paddle.zeros((3,), dtype='float32').cuda() + mod.add_one_cuda(x, y) + np.testing.assert_allclose(y.numpy(), [2.0, 2.0, 2.0]) + + def test_c_dlpack_exchange_api_alloc_tensor(self): + cpp_source = r""" + inline tvm::ffi::Tensor alloc_tensor(tvm::ffi::Shape shape, DLDataType dtype, DLDevice device) { + return tvm::ffi::Tensor::FromDLPackAlloc(TVMFFIEnvGetTensorAllocator(), shape, dtype, device); + } + + tvm::ffi::Tensor add_one_cpu(tvm::ffi::TensorView x) { + TVM_FFI_ICHECK(x->ndim == 1) << "x must be a 1D tensor"; + DLDataType f32_dtype{kDLFloat, 32, 1}; + TVM_FFI_ICHECK(x->dtype == f32_dtype) << "x must be a float tensor"; + tvm::ffi::Shape x_shape(x->shape, x->shape + x->ndim); + tvm::ffi::Tensor y = alloc_tensor(x_shape, f32_dtype, x->device); + for (int i = 0; i < x->shape[0]; ++i) { + static_cast(y->data)[i] = static_cast(x->data)[i] + 1; + } + return y; + } + """ + mod: Module = tvm_ffi.cpp.load_inline( + name='mod', cpp_sources=cpp_source, functions=['add_one_cpu'] + ) + x = paddle.full((3,), 1.0, dtype='float32').cpu() + y = mod.add_one_cpu(x) + np.testing.assert_allclose(y.numpy(), [2.0, 2.0, 2.0]) + + if __name__ == '__main__': unittest.main() From c702c5ef691071a3de962dd2c6f47c689b10e8f5 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sun, 12 Oct 2025 06:15:20 +0000 Subject: [PATCH 06/17] refine ut style --- test/legacy_test/test_tvm_ffi.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/legacy_test/test_tvm_ffi.py b/test/legacy_test/test_tvm_ffi.py index 1e95a5db412902..c1cfef4a44d95e 100644 --- a/test/legacy_test/test_tvm_ffi.py +++ b/test/legacy_test/test_tvm_ffi.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest +from typing import TYPE_CHECKING import numpy as np import tvm_ffi.cpp -from tvm_ffi import Module import paddle +if TYPE_CHECKING: + from tvm_ffi import Module + class TestTVMFFIEnvStream(unittest.TestCase): def test_tvm_ffi_env_stream_for_gpu_tensor(self): From bc16a585e58111d10c566998998954678fbb62a7 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 13 Oct 2025 09:27:23 +0800 Subject: [PATCH 07/17] pin tvm-ffi to new release 0.1.0b16 --- python/unittest_py/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt index 9d547fd9357d1d..0ccf6d98680f22 100644 --- a/python/unittest_py/requirements.txt +++ b/python/unittest_py/requirements.txt @@ -20,4 +20,4 @@ xdoctest==1.3.0 ubelt==1.3.3 # just for xdoctest mypy==1.17.1 soundfile -apache-tvm-ffi @ git+https://github.com/apache/tvm-ffi.git@22a78943b78306a73011757fa635afa9dce35114 +apache-tvm-ffi==0.1.0b16 From d0c21455bf2e5f5bef8b359144de59f6d3b11dcc Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 13 Oct 2025 06:49:16 +0000 Subject: [PATCH 08/17] skip 2cases on windows --- test/legacy_test/test_tvm_ffi.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/legacy_test/test_tvm_ffi.py b/test/legacy_test/test_tvm_ffi.py index c1cfef4a44d95e..917e8e924b6956 100644 --- a/test/legacy_test/test_tvm_ffi.py +++ b/test/legacy_test/test_tvm_ffi.py @@ -14,6 +14,7 @@ from __future__ import annotations +import platform import unittest from typing import TYPE_CHECKING @@ -72,6 +73,9 @@ def test_c_dlpack_exchange_api_cpu(self): def test_c_dlpack_exchange_api_gpu(self): if not paddle.is_compiled_with_cuda(): return + if platform.system() == "Windows": + # Temporary skip this test case on windows because compile bug on TVM FFI + return cpp_sources = r""" void add_one_cuda(tvm::ffi::TensorView x, tvm::ffi::TensorView y); """ @@ -116,6 +120,10 @@ def test_c_dlpack_exchange_api_gpu(self): np.testing.assert_allclose(y.numpy(), [2.0, 2.0, 2.0]) def test_c_dlpack_exchange_api_alloc_tensor(self): + if platform.system() == "Windows": + # Temporary skip this test case on windows because return owned tensor created by + # TVMFFIEnvGetTensorAllocator will cause double free error + return cpp_source = r""" inline tvm::ffi::Tensor alloc_tensor(tvm::ffi::Shape shape, DLDataType dtype, DLDevice device) { return tvm::ffi::Tensor::FromDLPackAlloc(TVMFFIEnvGetTensorAllocator(), shape, dtype, device); From 27e18e758380220d984e9f3c4041806bc1582dd8 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 13 Oct 2025 06:53:23 +0000 Subject: [PATCH 09/17] install python/unittest_py/requirements.txt in dcu workflow --- .github/workflows/_Linux-DCU.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_Linux-DCU.yml b/.github/workflows/_Linux-DCU.yml index 63008000cf5af6..ba82f790b35f61 100644 --- a/.github/workflows/_Linux-DCU.yml +++ b/.github/workflows/_Linux-DCU.yml @@ -290,6 +290,7 @@ jobs: ln -sf $(which python3.10) /usr/local/bin/python ln -sf $(which pip3.10) /usr/local/bin/pip pip3.10 install ./dist/paddlepaddle_dcu-0.0.0-cp310-cp310-linux_x86_64.whl + pip3.10 install -r python/unittest_py/requirements.txt wget -q --no-proxy https://paddle-device.bj.bcebos.com/dcu/hyhal-Z100.tar.gz tar -zxf hyhal-Z100.tar.gz -C /opt source /opt/dtk-24.04.1/env.sh From ffe9d24194b1ba7b63f825a155051b0feeaa054a Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 13 Oct 2025 12:21:36 +0000 Subject: [PATCH 10/17] add a blank line to trigger docker rebuild --- tools/dockerfile/Dockerfile.develop.dtk | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/dockerfile/Dockerfile.develop.dtk b/tools/dockerfile/Dockerfile.develop.dtk index 8426d8282a7f25..90e0b3069c70cd 100644 --- a/tools/dockerfile/Dockerfile.develop.dtk +++ b/tools/dockerfile/Dockerfile.develop.dtk @@ -105,6 +105,7 @@ RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6 make -j16 > /dev/null && make install > /dev/null && \ cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache + ENV CCACHE_MAXSIZE=50G \ CCACHE_LIMIT_MULTIPLE=0.8 \ CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime From 489f02dadf7cbc50b5b558f0ef4c41ff1fbb59e5 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 13 Oct 2025 23:32:47 +0800 Subject: [PATCH 11/17] update other dockerfiles to trigger requirements.txt install --- tools/dockerfile/Dockerfile.develop.npu | 1 + tools/dockerfile/Dockerfile.develop.xre | 1 + tools/dockerfile/Dockerfile.ubuntu20 | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/dockerfile/Dockerfile.develop.npu b/tools/dockerfile/Dockerfile.develop.npu index f0ad07ec9b90be..b03b23c6c88f3f 100644 --- a/tools/dockerfile/Dockerfile.develop.npu +++ b/tools/dockerfile/Dockerfile.develop.npu @@ -12,6 +12,7 @@ RUN groupadd -g 1000 HwHiAiUser && \ useradd -u 1000 -g 1000 -m -d /home/HwHiAiUser HwHiAiUser RUN mkdir -p /usr/local/Ascend/driver + WORKDIR /usr/local/Ascend # install CANN requirement diff --git a/tools/dockerfile/Dockerfile.develop.xre b/tools/dockerfile/Dockerfile.develop.xre index 2bdbe56d7cde1e..422a6e8b667545 100644 --- a/tools/dockerfile/Dockerfile.develop.xre +++ b/tools/dockerfile/Dockerfile.develop.xre @@ -76,6 +76,7 @@ RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6 make -j16 > /dev/null && make install > /dev/null && \ cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache + ENV CCACHE_MAXSIZE=80G \ CCACHE_LIMIT_MULTIPLE=0.8 \ CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime diff --git a/tools/dockerfile/Dockerfile.ubuntu20 b/tools/dockerfile/Dockerfile.ubuntu20 index fc5b56f3c6ec5a..6c6f73c2a6d9d9 100644 --- a/tools/dockerfile/Dockerfile.ubuntu20 +++ b/tools/dockerfile/Dockerfile.ubuntu20 @@ -99,6 +99,7 @@ RUN wget --no-check-certificate -qO- https://paddle-ci.gz.bcebos.com/go1.17.2.li mkdir /root/gopath && \ mkdir /root/gopath/bin && \ mkdir /root/gopath/src + ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin From 597c51938786acd2b524ed6143d131e53e4b1ba1 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 00:45:02 +0800 Subject: [PATCH 12/17] Revert "update other dockerfiles to trigger requirements.txt install" This reverts commit 2a9fc214cce8414c809bc60bda2139e0c9562538. --- tools/dockerfile/Dockerfile.develop.npu | 1 - tools/dockerfile/Dockerfile.develop.xre | 1 - tools/dockerfile/Dockerfile.ubuntu20 | 1 - 3 files changed, 3 deletions(-) diff --git a/tools/dockerfile/Dockerfile.develop.npu b/tools/dockerfile/Dockerfile.develop.npu index b03b23c6c88f3f..f0ad07ec9b90be 100644 --- a/tools/dockerfile/Dockerfile.develop.npu +++ b/tools/dockerfile/Dockerfile.develop.npu @@ -12,7 +12,6 @@ RUN groupadd -g 1000 HwHiAiUser && \ useradd -u 1000 -g 1000 -m -d /home/HwHiAiUser HwHiAiUser RUN mkdir -p /usr/local/Ascend/driver - WORKDIR /usr/local/Ascend # install CANN requirement diff --git a/tools/dockerfile/Dockerfile.develop.xre b/tools/dockerfile/Dockerfile.develop.xre index 422a6e8b667545..2bdbe56d7cde1e 100644 --- a/tools/dockerfile/Dockerfile.develop.xre +++ b/tools/dockerfile/Dockerfile.develop.xre @@ -76,7 +76,6 @@ RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6 make -j16 > /dev/null && make install > /dev/null && \ cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache - ENV CCACHE_MAXSIZE=80G \ CCACHE_LIMIT_MULTIPLE=0.8 \ CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime diff --git a/tools/dockerfile/Dockerfile.ubuntu20 b/tools/dockerfile/Dockerfile.ubuntu20 index 6c6f73c2a6d9d9..fc5b56f3c6ec5a 100644 --- a/tools/dockerfile/Dockerfile.ubuntu20 +++ b/tools/dockerfile/Dockerfile.ubuntu20 @@ -99,7 +99,6 @@ RUN wget --no-check-certificate -qO- https://paddle-ci.gz.bcebos.com/go1.17.2.li mkdir /root/gopath && \ mkdir /root/gopath/bin && \ mkdir /root/gopath/src - ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin From 65d1e1423fa19a272eaccd7dd0bbbb187a05af70 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 00:45:06 +0800 Subject: [PATCH 13/17] Revert "add a blank line to trigger docker rebuild" This reverts commit 89ce02d3685dd0c9171ec0db9d62795c1304d5ee. --- tools/dockerfile/Dockerfile.develop.dtk | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/dockerfile/Dockerfile.develop.dtk b/tools/dockerfile/Dockerfile.develop.dtk index 90e0b3069c70cd..8426d8282a7f25 100644 --- a/tools/dockerfile/Dockerfile.develop.dtk +++ b/tools/dockerfile/Dockerfile.develop.dtk @@ -105,7 +105,6 @@ RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6 make -j16 > /dev/null && make install > /dev/null && \ cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache - ENV CCACHE_MAXSIZE=50G \ CCACHE_LIMIT_MULTIPLE=0.8 \ CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime From b4a4a348a978e9e426b2c090ba998b3142811c18 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 00:45:23 +0800 Subject: [PATCH 14/17] Revert "install python/unittest_py/requirements.txt in dcu workflow" This reverts commit aefca0e95b441e3d1abbd71e5f657ea287381bc5. --- .github/workflows/_Linux-DCU.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/_Linux-DCU.yml b/.github/workflows/_Linux-DCU.yml index ba82f790b35f61..63008000cf5af6 100644 --- a/.github/workflows/_Linux-DCU.yml +++ b/.github/workflows/_Linux-DCU.yml @@ -290,7 +290,6 @@ jobs: ln -sf $(which python3.10) /usr/local/bin/python ln -sf $(which pip3.10) /usr/local/bin/pip pip3.10 install ./dist/paddlepaddle_dcu-0.0.0-cp310-cp310-linux_x86_64.whl - pip3.10 install -r python/unittest_py/requirements.txt wget -q --no-proxy https://paddle-device.bj.bcebos.com/dcu/hyhal-Z100.tar.gz tar -zxf hyhal-Z100.tar.gz -C /opt source /opt/dtk-24.04.1/env.sh From c1735e41d5c4e947767b6813ae4f5b07092e9d2e Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 00:46:20 +0800 Subject: [PATCH 15/17] install python/unittest_py/requirements.txt in ci/dcu_test.sh --- ci/dcu_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/dcu_test.sh b/ci/dcu_test.sh index be2d0e96369c75..7b4bbca440a4f7 100644 --- a/ci/dcu_test.sh +++ b/ci/dcu_test.sh @@ -75,6 +75,7 @@ function hybrid_paddlex() { function main(){ cd ${PADDLE_ROOT}/build pip install hypothesis + pip install -r ${PADDLE_ROOT}/python/unittest_py/requirements.txt /opt/py310/bin/pip install safetensors if ls ${PADDLE_ROOT}/build/python/dist/*whl >/dev/null 2>&1; then pip install ${PADDLE_ROOT}/build/python/dist/*whl From be4654b8e0af5bcaab3dbebf7776ce6f2ec7c808 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 08:34:53 +0000 Subject: [PATCH 16/17] install requirements with `/opt/py310/bin/pip` in ci --- ci/dcu_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dcu_test.sh b/ci/dcu_test.sh index 7b4bbca440a4f7..cc303f5466ea50 100644 --- a/ci/dcu_test.sh +++ b/ci/dcu_test.sh @@ -75,7 +75,7 @@ function hybrid_paddlex() { function main(){ cd ${PADDLE_ROOT}/build pip install hypothesis - pip install -r ${PADDLE_ROOT}/python/unittest_py/requirements.txt + /opt/py310/bin/pip install -r ${PADDLE_ROOT}/python/unittest_py/requirements.txt /opt/py310/bin/pip install safetensors if ls ${PADDLE_ROOT}/build/python/dist/*whl >/dev/null 2>&1; then pip install ${PADDLE_ROOT}/build/python/dist/*whl From 567c660753f0fc4c35d674761b20735b0d9957f2 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 14 Oct 2025 22:00:08 +0800 Subject: [PATCH 17/17] skip gpu case on DCU --- test/legacy_test/test_tvm_ffi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/legacy_test/test_tvm_ffi.py b/test/legacy_test/test_tvm_ffi.py index 917e8e924b6956..ce1a955932ebe4 100644 --- a/test/legacy_test/test_tvm_ffi.py +++ b/test/legacy_test/test_tvm_ffi.py @@ -73,6 +73,9 @@ def test_c_dlpack_exchange_api_cpu(self): def test_c_dlpack_exchange_api_gpu(self): if not paddle.is_compiled_with_cuda(): return + if paddle.is_compiled_with_rocm(): + # Skip on DCU because CUDA_HOME is not available + return if platform.system() == "Windows": # Temporary skip this test case on windows because compile bug on TVM FFI return