Skip to content

Improve #449: Improve StridedMemoryView creation time #838

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions cuda_core/cuda/core/experimental/_memoryview.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ from typing import Any, Optional
import numpy

from cuda.core.experimental._utils.cuda_utils import handle_return, driver
from cuda.core.experimental._utils cimport cuda_utils


# TODO(leofang): support NumPy structured dtypes
Expand Down Expand Up @@ -184,48 +185,47 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
stream=int(stream_ptr) if stream_ptr else None)

cdef void* data = NULL
cdef DLTensor* dl_tensor
cdef DLManagedTensorVersioned* dlm_tensor_ver
cdef DLManagedTensor* dlm_tensor
cdef const char *used_name
if cpython.PyCapsule_IsValid(
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
data = cpython.PyCapsule_GetPointer(
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
versioned = True
dlm_tensor_ver = <DLManagedTensorVersioned*>data
dl_tensor = &dlm_tensor_ver.dl_tensor
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
used_name = DLPACK_VERSIONED_TENSOR_USED_NAME
elif cpython.PyCapsule_IsValid(
capsule, DLPACK_TENSOR_UNUSED_NAME):
data = cpython.PyCapsule_GetPointer(
capsule, DLPACK_TENSOR_UNUSED_NAME)
versioned = False
else:
assert False

cdef DLManagedTensor* dlm_tensor
cdef DLManagedTensorVersioned* dlm_tensor_ver
cdef DLTensor* dl_tensor
if versioned:
dlm_tensor_ver = <DLManagedTensorVersioned*>data
dl_tensor = &dlm_tensor_ver.dl_tensor
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
else:
dlm_tensor = <DLManagedTensor*>data
dl_tensor = &dlm_tensor.dl_tensor
is_readonly = False
used_name = DLPACK_TENSOR_USED_NAME
else:
assert False

cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
buf.ptr = <intptr_t>(dl_tensor.data)
buf.shape = tuple(int(dl_tensor.shape[i]) for i in range(dl_tensor.ndim))

buf.shape = cuda_utils.carray_int64_t_to_tuple(dl_tensor.shape, dl_tensor.ndim)
if dl_tensor.strides:
buf.strides = tuple(
int(dl_tensor.strides[i]) for i in range(dl_tensor.ndim))
buf.strides = cuda_utils.carray_int64_t_to_tuple(dl_tensor.strides, dl_tensor.ndim)
else:
# C-order
buf.strides = None

buf.dtype = dtype_dlpack_to_numpy(&dl_tensor.dtype)
buf.device_id = device_id
buf.is_device_accessible = is_device_accessible
buf.readonly = is_readonly
buf.exporting_obj = obj

cdef const char* used_name = (
DLPACK_VERSIONED_TENSOR_USED_NAME if versioned else DLPACK_TENSOR_USED_NAME)
cpython.PyCapsule_SetName(capsule, used_name)

return buf
Expand Down
13 changes: 13 additions & 0 deletions cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,20 @@
#
# SPDX-License-Identifier: Apache-2.0


cimport cpython
cimport libc.stdint


cpdef int _check_driver_error(error) except?-1
cpdef int _check_runtime_error(error) except?-1
cpdef int _check_nvrtc_error(error) except?-1
cpdef check_or_create_options(type cls, options, str options_description=*, bint keep_none=*)


cdef inline tuple carray_int64_t_to_tuple(libc.stdint.int64_t *ptr, int length):
# Construct shape and strides tuples using the Python/C API for speed
result = cpython.PyTuple_New(length)
for i in range(length):
cpython.PyTuple_SET_ITEM(result, i, cpython.PyLong_FromLongLong(ptr[i]))
return result
Loading