Skip to content

Commit 27aeda2

Browse files
authored
Switch Buffers to memoryviews & remove extra copies/allocations (#656)
When this was written in the code, Python's Buffer Protocol support was inconsistent across Python versions (specifically on Python 2.7). Since Python 2.7 reached EOL and it was dropped from Numcodecs, the Python Buffer Protocol support has become more consistent. At this stage the `memoryview` object, which Cython also supports, does all the same things that `Buffer` would do for us. Plus it is builtin to the Python standard library. It behaves similarly in a lot of ways. Given this, switch the code over to `memoryview`s internally and drop `Buffer`. <hr> Additionally have pushed changes to this PR to improve overall memory usage. This eliminates some unneeded copies that occurred at the ended of some codecs. Also have eliminated some temporary allocations used in some codec pipelines by allocating output buffers earlier and changing operations to act in-place. This should eliminate some spiky behavior seen recently with codecs.
1 parent 3c933cf commit 27aeda2

File tree

8 files changed

+312
-237
lines changed

8 files changed

+312
-237
lines changed

docs/release.rst

+7
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ Fixes
3737
~~~~~
3838
* Remove redundant ``id`` from codec metadata serialization in Zarr3 codecs.
3939
By :user:`Norman Rzepka <normanrz>`, :issue:`685`
40+
* Preallocate output buffers and resize directly as needed.
41+
By :user:`John Kirkham <jakirkham>`, :issue:`656`
42+
43+
Maintenance
44+
~~~~~~~~~~~
45+
* Replace internal ``Buffer`` usage with ``memoryview``\ s.
46+
By :user:`John Kirkham <jakirkham>`, :issue:`656`
4047

4148
.. _release_0.15.0:
4249

numcodecs/blosc.pyx

+81-74
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@ import os
99
from deprecated import deprecated
1010

1111

12-
from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE
13-
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
12+
from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize
13+
from cpython.memoryview cimport PyMemoryView_GET_BUFFER
1414

15+
from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview
1516

16-
from .compat_ext cimport Buffer
17-
from .compat_ext import Buffer
1817
from .compat import ensure_contiguous_ndarray
1918
from .abc import Codec
2019

@@ -154,17 +153,16 @@ def _cbuffer_sizes(source):
154153
155154
"""
156155
cdef:
157-
Buffer buffer
156+
memoryview source_mv
157+
const Py_buffer* source_pb
158158
size_t nbytes, cbytes, blocksize
159159

160-
# obtain buffer
161-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
160+
# obtain source memoryview
161+
source_mv = ensure_continguous_memoryview(source)
162+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
162163

163164
# determine buffer size
164-
blosc_cbuffer_sizes(buffer.ptr, &nbytes, &cbytes, &blocksize)
165-
166-
# release buffers
167-
buffer.release()
165+
blosc_cbuffer_sizes(source_pb.buf, &nbytes, &cbytes, &blocksize)
168166

169167
return nbytes, cbytes, blocksize
170168

@@ -173,16 +171,15 @@ cbuffer_sizes = deprecated(_cbuffer_sizes)
173171
def cbuffer_complib(source):
174172
"""Return the name of the compression library used to compress `source`."""
175173
cdef:
176-
Buffer buffer
174+
memoryview source_mv
175+
const Py_buffer* source_pb
177176

178-
# obtain buffer
179-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
177+
# obtain source memoryview
178+
source_mv = ensure_continguous_memoryview(source)
179+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
180180

181181
# determine buffer size
182-
complib = blosc_cbuffer_complib(buffer.ptr)
183-
184-
# release buffers
185-
buffer.release()
182+
complib = blosc_cbuffer_complib(source_pb.buf)
186183

187184
complib = complib.decode('ascii')
188185

@@ -202,18 +199,17 @@ def _cbuffer_metainfo(source):
202199
203200
"""
204201
cdef:
205-
Buffer buffer
202+
memoryview source_mv
203+
const Py_buffer* source_pb
206204
size_t typesize
207205
int flags
208206

209-
# obtain buffer
210-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
207+
# obtain source memoryview
208+
source_mv = ensure_continguous_memoryview(source)
209+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
211210

212211
# determine buffer size
213-
blosc_cbuffer_metainfo(buffer.ptr, &typesize, &flags)
214-
215-
# release buffers
216-
buffer.release()
212+
blosc_cbuffer_metainfo(source_pb.buf, &typesize, &flags)
217213

218214
# decompose flags
219215
if flags & BLOSC_DOSHUFFLE:
@@ -263,28 +259,34 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
263259
"""
264260

265261
cdef:
266-
char *source_ptr
267-
char *dest_ptr
268-
Buffer source_buffer
262+
memoryview source_mv
263+
const Py_buffer* source_pb
264+
const char* source_ptr
269265
size_t nbytes, itemsize
270266
int cbytes
271267
bytes dest
268+
char* dest_ptr
272269

273270
# check valid cname early
274271
cname_str = cname.decode('ascii')
275272
if cname_str not in list_compressors():
276273
_err_bad_cname(cname_str)
277274

278-
# setup source buffer
279-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
280-
source_ptr = source_buffer.ptr
281-
nbytes = source_buffer.nbytes
275+
# obtain source memoryview
276+
source_mv = ensure_continguous_memoryview(source)
277+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
278+
279+
# extract metadata
280+
source_ptr = <const char*>source_pb.buf
281+
nbytes = source_pb.len
282+
283+
# validate typesize
282284
if isinstance(typesize, int):
283285
if typesize < 1:
284286
raise ValueError(f"Cannot use typesize {typesize} less than 1.")
285287
itemsize = typesize
286288
else:
287-
itemsize = source_buffer.itemsize
289+
itemsize = source_pb.itemsize
288290

289291
# determine shuffle
290292
if shuffle == AUTOSHUFFLE:
@@ -333,16 +335,14 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
333335
cname, blocksize, 1)
334336

335337
finally:
336-
337-
# release buffers
338-
source_buffer.release()
338+
pass
339339

340340
# check compression was successful
341341
if cbytes <= 0:
342342
raise RuntimeError('error during blosc compression: %d' % cbytes)
343343

344344
# resize after compression
345-
dest = dest[:cbytes]
345+
PyBytes_RESIZE(dest, cbytes)
346346

347347
return dest
348348

@@ -366,30 +366,36 @@ def decompress(source, dest=None):
366366
"""
367367
cdef:
368368
int ret
369-
char *source_ptr
370-
char *dest_ptr
371-
Buffer source_buffer
372-
Buffer dest_buffer = None
369+
memoryview source_mv
370+
const Py_buffer* source_pb
371+
const char* source_ptr
372+
memoryview dest_mv
373+
Py_buffer* dest_pb
374+
char* dest_ptr
373375
size_t nbytes, cbytes, blocksize
374376

375-
# setup source buffer
376-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
377-
source_ptr = source_buffer.ptr
377+
# obtain source memoryview
378+
source_mv = ensure_continguous_memoryview(source)
379+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
380+
381+
# get source pointer
382+
source_ptr = <const char*>source_pb.buf
378383

379384
# determine buffer size
380385
blosc_cbuffer_sizes(source_ptr, &nbytes, &cbytes, &blocksize)
381386

382387
# setup destination buffer
383388
if dest is None:
384389
# allocate memory
385-
dest = PyBytes_FromStringAndSize(NULL, nbytes)
386-
dest_ptr = PyBytes_AS_STRING(dest)
387-
dest_nbytes = nbytes
390+
dest_1d = dest = PyBytes_FromStringAndSize(NULL, nbytes)
388391
else:
389-
arr = ensure_contiguous_ndarray(dest)
390-
dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
391-
dest_ptr = dest_buffer.ptr
392-
dest_nbytes = dest_buffer.nbytes
392+
dest_1d = ensure_contiguous_ndarray(dest)
393+
394+
# obtain dest memoryview
395+
dest_mv = memoryview(dest_1d)
396+
dest_pb = PyMemoryView_GET_BUFFER(dest_mv)
397+
dest_ptr = <char*>dest_pb.buf
398+
dest_nbytes = dest_pb.len
393399

394400
try:
395401

@@ -408,11 +414,7 @@ def decompress(source, dest=None):
408414
ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1)
409415

410416
finally:
411-
412-
# release buffers
413-
source_buffer.release()
414-
if dest_buffer is not None:
415-
dest_buffer.release()
417+
pass
416418

417419
# handle errors
418420
if ret <= 0:
@@ -449,14 +451,20 @@ def _decompress_partial(source, start, nitems, dest=None):
449451
int encoding_size
450452
int nitems_bytes
451453
int start_bytes
452-
char *source_ptr
453-
char *dest_ptr
454-
Buffer source_buffer
455-
Buffer dest_buffer = None
454+
memoryview source_mv
455+
const Py_buffer* source_pb
456+
const char* source_ptr
457+
memoryview dest_mv
458+
Py_buffer* dest_pb
459+
char* dest_ptr
460+
size_t dest_nbytes
456461

457-
# setup source buffer
458-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
459-
source_ptr = source_buffer.ptr
462+
# obtain source memoryview
463+
source_mv = ensure_continguous_memoryview(source)
464+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
465+
466+
# setup source pointer
467+
source_ptr = <const char*>source_pb.buf
460468

461469
# get encoding size from source buffer header
462470
encoding_size = source[3]
@@ -467,26 +475,25 @@ def _decompress_partial(source, start, nitems, dest=None):
467475

468476
# setup destination buffer
469477
if dest is None:
470-
dest = PyBytes_FromStringAndSize(NULL, nitems_bytes)
471-
dest_ptr = PyBytes_AS_STRING(dest)
472-
dest_nbytes = nitems_bytes
478+
# allocate memory
479+
dest_1d = dest = PyBytes_FromStringAndSize(NULL, nitems_bytes)
473480
else:
474-
arr = ensure_contiguous_ndarray(dest)
475-
dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
476-
dest_ptr = dest_buffer.ptr
477-
dest_nbytes = dest_buffer.nbytes
481+
dest_1d = ensure_contiguous_ndarray(dest)
482+
483+
# obtain dest memoryview
484+
dest_mv = memoryview(dest_1d)
485+
dest_pb = PyMemoryView_GET_BUFFER(dest_mv)
486+
dest_ptr = <char*>dest_pb.buf
487+
dest_nbytes = dest_pb.len
478488

479489
# try decompression
480490
try:
481491
if dest_nbytes < nitems_bytes:
482492
raise ValueError('destination buffer too small; expected at least %s, '
483493
'got %s' % (nitems_bytes, dest_nbytes))
484494
ret = blosc_getitem(source_ptr, start, nitems, dest_ptr)
485-
486495
finally:
487-
source_buffer.release()
488-
if dest_buffer is not None:
489-
dest_buffer.release()
496+
pass
490497

491498
# ret refers to the number of bytes returned from blosc_getitem.
492499
if ret <= 0:

numcodecs/compat_ext.pxd

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
# cython: language_level=3
22

33

4-
cdef class Buffer:
5-
cdef:
6-
char *ptr
7-
Py_buffer buffer
8-
size_t nbytes
9-
size_t itemsize
10-
bint acquired
11-
12-
cpdef release(self)
4+
cdef extern from *:
5+
"""
6+
#define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n)
7+
"""
8+
int PyBytes_RESIZE(object b, Py_ssize_t n) except -1
9+
10+
11+
cpdef memoryview ensure_continguous_memoryview(obj)

numcodecs/compat_ext.pyx

+11-20
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,17 @@
33
# cython: linetrace=False
44
# cython: binding=False
55
# cython: language_level=3
6-
from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release
76

7+
from cpython.buffer cimport PyBuffer_IsContiguous
8+
from cpython.memoryview cimport PyMemoryView_GET_BUFFER
89

9-
from .compat import ensure_contiguous_ndarray
1010

11-
12-
cdef class Buffer:
13-
"""Convenience class for buffer interface."""
14-
15-
def __cinit__(self, obj, int flags):
16-
PyObject_GetBuffer(obj, &(self.buffer), flags)
17-
self.acquired = True
18-
self.ptr = <char *> self.buffer.buf
19-
self.itemsize = self.buffer.itemsize
20-
self.nbytes = self.buffer.len
21-
22-
cpdef release(self):
23-
if self.acquired:
24-
PyBuffer_Release(&(self.buffer))
25-
self.acquired = False
26-
27-
def __dealloc__(self):
28-
self.release()
11+
cpdef memoryview ensure_continguous_memoryview(obj):
12+
cdef memoryview mv
13+
if type(obj) is memoryview:
14+
mv = <memoryview>obj
15+
else:
16+
mv = memoryview(obj)
17+
if not PyBuffer_IsContiguous(PyMemoryView_GET_BUFFER(mv), b'A'):
18+
raise BufferError("Expected contiguous memory")
19+
return mv

0 commit comments

Comments
 (0)