diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx index abe27b8ab..31482229c 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/experimental/_memoryview.pyx @@ -12,6 +12,7 @@ from typing import Any, Optional import numpy from cuda.core.experimental._utils.cuda_utils import handle_return, driver +from cuda.core.experimental._utils cimport cuda_utils # TODO(leofang): support NumPy structured dtypes @@ -184,48 +185,47 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None): stream=int(stream_ptr) if stream_ptr else None) cdef void* data = NULL + cdef DLTensor* dl_tensor + cdef DLManagedTensorVersioned* dlm_tensor_ver + cdef DLManagedTensor* dlm_tensor + cdef const char *used_name if cpython.PyCapsule_IsValid( capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): data = cpython.PyCapsule_GetPointer( capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) versioned = True + dlm_tensor_ver = data + dl_tensor = &dlm_tensor_ver.dl_tensor + is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) + used_name = DLPACK_VERSIONED_TENSOR_USED_NAME elif cpython.PyCapsule_IsValid( capsule, DLPACK_TENSOR_UNUSED_NAME): data = cpython.PyCapsule_GetPointer( capsule, DLPACK_TENSOR_UNUSED_NAME) versioned = False - else: - assert False - - cdef DLManagedTensor* dlm_tensor - cdef DLManagedTensorVersioned* dlm_tensor_ver - cdef DLTensor* dl_tensor - if versioned: - dlm_tensor_ver = data - dl_tensor = &dlm_tensor_ver.dl_tensor - is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) - else: dlm_tensor = data dl_tensor = &dlm_tensor.dl_tensor is_readonly = False + used_name = DLPACK_TENSOR_USED_NAME + else: + assert False cdef StridedMemoryView buf = StridedMemoryView() if view is None else view buf.ptr = (dl_tensor.data) - buf.shape = tuple(int(dl_tensor.shape[i]) for i in range(dl_tensor.ndim)) + + buf.shape = cuda_utils.carray_int64_t_to_tuple(dl_tensor.shape, dl_tensor.ndim) if dl_tensor.strides: - buf.strides = tuple( - int(dl_tensor.strides[i]) for i in range(dl_tensor.ndim)) + buf.strides = cuda_utils.carray_int64_t_to_tuple(dl_tensor.strides, dl_tensor.ndim) else: # C-order buf.strides = None + buf.dtype = dtype_dlpack_to_numpy(&dl_tensor.dtype) buf.device_id = device_id buf.is_device_accessible = is_device_accessible buf.readonly = is_readonly buf.exporting_obj = obj - cdef const char* used_name = ( - DLPACK_VERSIONED_TENSOR_USED_NAME if versioned else DLPACK_TENSOR_USED_NAME) cpython.PyCapsule_SetName(capsule, used_name) return buf diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd index b082fb8bb..601736c47 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd @@ -2,7 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 + +cimport cpython +cimport libc.stdint + + cpdef int _check_driver_error(error) except?-1 cpdef int _check_runtime_error(error) except?-1 cpdef int _check_nvrtc_error(error) except?-1 cpdef check_or_create_options(type cls, options, str options_description=*, bint keep_none=*) + + +cdef inline tuple carray_int64_t_to_tuple(libc.stdint.int64_t *ptr, int length): + # Construct shape and strides tuples using the Python/C API for speed + result = cpython.PyTuple_New(length) + for i in range(length): + cpython.PyTuple_SET_ITEM(result, i, cpython.PyLong_FromLongLong(ptr[i])) + return result