Skip to content

Commit 8f1dd40

Browse files
authored
Merge pull request #838 from mdboom/issue449
Improve #449: Improve StridedMemoryView creation time
2 parents b87c787 + df71f24 commit 8f1dd40

File tree

2 files changed

+29
-16
lines changed

2 files changed

+29
-16
lines changed

cuda_core/cuda/core/experimental/_memoryview.pyx

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from typing import Any, Optional
1212
import numpy
1313

1414
from cuda.core.experimental._utils.cuda_utils import handle_return, driver
15+
from cuda.core.experimental._utils cimport cuda_utils
1516

1617

1718
# TODO(leofang): support NumPy structured dtypes
@@ -184,48 +185,47 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
184185
stream=int(stream_ptr) if stream_ptr else None)
185186

186187
cdef void* data = NULL
188+
cdef DLTensor* dl_tensor
189+
cdef DLManagedTensorVersioned* dlm_tensor_ver
190+
cdef DLManagedTensor* dlm_tensor
191+
cdef const char *used_name
187192
if cpython.PyCapsule_IsValid(
188193
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
189194
data = cpython.PyCapsule_GetPointer(
190195
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
191196
versioned = True
197+
dlm_tensor_ver = <DLManagedTensorVersioned*>data
198+
dl_tensor = &dlm_tensor_ver.dl_tensor
199+
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
200+
used_name = DLPACK_VERSIONED_TENSOR_USED_NAME
192201
elif cpython.PyCapsule_IsValid(
193202
capsule, DLPACK_TENSOR_UNUSED_NAME):
194203
data = cpython.PyCapsule_GetPointer(
195204
capsule, DLPACK_TENSOR_UNUSED_NAME)
196205
versioned = False
197-
else:
198-
assert False
199-
200-
cdef DLManagedTensor* dlm_tensor
201-
cdef DLManagedTensorVersioned* dlm_tensor_ver
202-
cdef DLTensor* dl_tensor
203-
if versioned:
204-
dlm_tensor_ver = <DLManagedTensorVersioned*>data
205-
dl_tensor = &dlm_tensor_ver.dl_tensor
206-
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
207-
else:
208206
dlm_tensor = <DLManagedTensor*>data
209207
dl_tensor = &dlm_tensor.dl_tensor
210208
is_readonly = False
209+
used_name = DLPACK_TENSOR_USED_NAME
210+
else:
211+
assert False
211212

212213
cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
213214
buf.ptr = <intptr_t>(dl_tensor.data)
214-
buf.shape = tuple(int(dl_tensor.shape[i]) for i in range(dl_tensor.ndim))
215+
216+
buf.shape = cuda_utils.carray_int64_t_to_tuple(dl_tensor.shape, dl_tensor.ndim)
215217
if dl_tensor.strides:
216-
buf.strides = tuple(
217-
int(dl_tensor.strides[i]) for i in range(dl_tensor.ndim))
218+
buf.strides = cuda_utils.carray_int64_t_to_tuple(dl_tensor.strides, dl_tensor.ndim)
218219
else:
219220
# C-order
220221
buf.strides = None
222+
221223
buf.dtype = dtype_dlpack_to_numpy(&dl_tensor.dtype)
222224
buf.device_id = device_id
223225
buf.is_device_accessible = is_device_accessible
224226
buf.readonly = is_readonly
225227
buf.exporting_obj = obj
226228

227-
cdef const char* used_name = (
228-
DLPACK_VERSIONED_TENSOR_USED_NAME if versioned else DLPACK_TENSOR_USED_NAME)
229229
cpython.PyCapsule_SetName(capsule, used_name)
230230

231231
return buf

cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,20 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
6+
cimport cpython
7+
cimport libc.stdint
8+
9+
510
cpdef int _check_driver_error(error) except?-1
611
cpdef int _check_runtime_error(error) except?-1
712
cpdef int _check_nvrtc_error(error) except?-1
813
cpdef check_or_create_options(type cls, options, str options_description=*, bint keep_none=*)
14+
15+
16+
cdef inline tuple carray_int64_t_to_tuple(libc.stdint.int64_t *ptr, int length):
17+
# Construct shape and strides tuples using the Python/C API for speed
18+
result = cpython.PyTuple_New(length)
19+
for i in range(length):
20+
cpython.PyTuple_SET_ITEM(result, i, cpython.PyLong_FromLongLong(ptr[i]))
21+
return result

0 commit comments

Comments
 (0)