Skip to content

Commit a66e9e5

Browse files
committed
Improve #449: Improve StridedMemoryView creation time
Two changes: 1. Refactor the versioned/non-versioned paths to reduce the number of branches. 2. Create shape and strides tuples using Python/C API
1 parent b87c787 commit a66e9e5

File tree

1 file changed

+24
-18
lines changed

1 file changed

+24
-18
lines changed

cuda_core/cuda/core/experimental/_memoryview.pyx

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -184,48 +184,52 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
184184
stream=int(stream_ptr) if stream_ptr else None)
185185

186186
cdef void* data = NULL
187+
cdef DLTensor* dl_tensor
188+
cdef DLManagedTensorVersioned* dlm_tensor_ver
189+
cdef DLManagedTensor* dlm_tensor
190+
cdef const char *used_name
187191
if cpython.PyCapsule_IsValid(
188192
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
189193
data = cpython.PyCapsule_GetPointer(
190194
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
191195
versioned = True
196+
dlm_tensor_ver = <DLManagedTensorVersioned*>data
197+
dl_tensor = &dlm_tensor_ver.dl_tensor
198+
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
199+
used_name = DLPACK_VERSIONED_TENSOR_USED_NAME
192200
elif cpython.PyCapsule_IsValid(
193201
capsule, DLPACK_TENSOR_UNUSED_NAME):
194202
data = cpython.PyCapsule_GetPointer(
195203
capsule, DLPACK_TENSOR_UNUSED_NAME)
196204
versioned = False
197-
else:
198-
assert False
199-
200-
cdef DLManagedTensor* dlm_tensor
201-
cdef DLManagedTensorVersioned* dlm_tensor_ver
202-
cdef DLTensor* dl_tensor
203-
if versioned:
204-
dlm_tensor_ver = <DLManagedTensorVersioned*>data
205-
dl_tensor = &dlm_tensor_ver.dl_tensor
206-
is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
207-
else:
208205
dlm_tensor = <DLManagedTensor*>data
209206
dl_tensor = &dlm_tensor.dl_tensor
210207
is_readonly = False
208+
used_name = DLPACK_TENSOR_USED_NAME
209+
else:
210+
assert False
211211

212212
cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
213213
buf.ptr = <intptr_t>(dl_tensor.data)
214-
buf.shape = tuple(int(dl_tensor.shape[i]) for i in range(dl_tensor.ndim))
214+
215+
# Construct shape and strides tuples using the Python/C API for speed
216+
buf.shape = cpython.PyTuple_New(dl_tensor.ndim)
217+
for i in range(dl_tensor.ndim):
218+
cpython.PyTuple_SET_ITEM(buf.shape, i, cpython.PyLong_FromLong(dl_tensor.shape[i]))
215219
if dl_tensor.strides:
216-
buf.strides = tuple(
217-
int(dl_tensor.strides[i]) for i in range(dl_tensor.ndim))
220+
buf.strides = cpython.PyTuple_New(dl_tensor.ndim)
221+
for i in range(dl_tensor.ndim):
222+
cpython.PyTuple_SET_ITEM(buf.strides, i, cpython.PyLong_FromLong(dl_tensor.strides[i]))
218223
else:
219224
# C-order
220225
buf.strides = None
226+
221227
buf.dtype = dtype_dlpack_to_numpy(&dl_tensor.dtype)
222228
buf.device_id = device_id
223229
buf.is_device_accessible = is_device_accessible
224230
buf.readonly = is_readonly
225231
buf.exporting_obj = obj
226232

227-
cdef const char* used_name = (
228-
DLPACK_VERSIONED_TENSOR_USED_NAME if versioned else DLPACK_TENSOR_USED_NAME)
229233
cpython.PyCapsule_SetName(capsule, used_name)
230234

231235
return buf
@@ -308,8 +312,10 @@ cdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
308312
buf.dtype = numpy.dtype(cai_data["typestr"])
309313
buf.strides = cai_data.get("strides")
310314
if buf.strides is not None:
311-
# convert to counts
312-
buf.strides = tuple(s // buf.dtype.itemsize for s in buf.strides)
315+
# convert to counts, using the Python/C API for speed
316+
for i in range(len(buf.strides)):
317+
cpython.PyTuple_SET_ITEM(buf.strides, i,
318+
cpython.PyLong_FromLong(buf.strides[i] // buf.dtype.itemsize))
313319
buf.is_device_accessible = True
314320
buf.device_id = handle_return(
315321
driver.cuPointerGetAttribute(

0 commit comments

Comments
 (0)