-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfpzip.pyx
246 lines (193 loc) · 7.46 KB
/
fpzip.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# cython: language_level=3
from libc.stdio cimport FILE, fopen, fwrite, fclose
from libc.stdlib cimport calloc, free
from libc.stdint cimport uint8_t
from cpython cimport array
import array
import sys
cimport numpy as numpy
import numpy as np
__VERSION__ = '1.2.5'
__version__ = __VERSION__
FPZ_ERROR_STRINGS = [
"success",
"cannot read stream",
"cannot write stream",
"not an fpz stream",
"fpz format version not supported",
"precision not supported",
"memory buffer overflow"
]
cdef extern from "fpzip.h":
ctypedef struct FPZ:
int type
int prec
int nx
int ny
int nz
int nf
cdef FPZ* fpzip_read_from_file(FILE* file)
cdef FPZ* fpzip_read_from_buffer(void* buffer)
cdef int fpzip_read_header(FPZ* fpz)
cdef size_t fpzip_read(FPZ* fpz, void* data)
cdef void fpzip_read_close(FPZ* fpz)
cdef FPZ* fpzip_write_to_file(FILE* file)
cdef FPZ* fpzip_write_to_buffer(void* buffer, size_t size)
cdef int fpzip_write_header(FPZ* fpz)
cdef int fpzip_write(FPZ* fpz, const void* data)
cdef void fpzip_write_close(FPZ* fpz)
ctypedef enum fpzipError:
fpzipSuccess = 0, # no error
fpzipErrorReadStream = 1, # cannot read stream
fpzipErrorWriteStream = 2, # cannot write stream
fpzipErrorBadFormat = 3, # magic mismatch; not an fpz stream
fpzipErrorBadVersion = 4, # fpz format version not supported
fpzipErrorBadPrecision = 5, # precision not supported
fpzipErrorBufferOverflow = 6 # compressed buffer overflow
cdef fpzipError fpzip_errno = fpzipSuccess
class FpzipError(Exception):
pass
class FpzipWriteError(FpzipError):
pass
class FpzipReadError(FpzipError):
pass
cpdef allocate(typecode, ct):
cdef array.array array_template = array.array(chr(typecode), [])
# create an array with 3 elements with same type as template
return array.clone(array_template, ct, zero=True)
def validate_order(order):
order = order.upper()
if order not in ('C', 'F'):
raise ValueError("Undefined order parameter '{}'. Options are 'C' or 'F'".format(order))
return order
def compress(data, precision=0, order='C'):
"""
fpzip.compress(data, precision=0, order='C')
Takes up to a 4d numpy array of floats or doubles and returns
a compressed bytestring.
precision indicates the number of bits to truncate. Any value above
zero indicates a lossy operation.
order is 'C' or 'F' (row major vs column major memory layout) and
should correspond to the underlying orientation of the input array.
"""
MAX_ATTEMPTS = 5
BUFFER_GROWTH_FACTOR = 1.5
# Occasionally the fpzip compression algorithm produces larger 'compressed'
# representation than the size of the input data array (particularly for very
# short input arrays) so we gradually grow the compression buffer if a buffer
# size of `data.nbytes` is insufficient.
buffer_size = data.nbytes
for i in range(MAX_ATTEMPTS):
try:
return _try_compress(data, buffer_size, precision=precision, order=order)
except:
# Only increase the buffer size if the exception was caused by buffer
# overflow and we aren't on our final iteration.
if fpzip_errno == fpzipErrorBufferOverflow and i < (MAX_ATTEMPTS - 1):
buffer_size = int(BUFFER_GROWTH_FACTOR * buffer_size)
continue
else:
raise
def _try_compress(data, buffer_size, precision, order):
if data.dtype not in (np.float32, np.float64):
raise ValueError("Data type {} must be a floating type.".format(data.dtype))
order = validate_order(order)
while len(data.shape) < 4:
if order == 'C':
data = data[np.newaxis, ...]
else: # F
data = data[..., np.newaxis ]
if not data.flags['C_CONTIGUOUS'] and not data.flags['F_CONTIGUOUS']:
data = np.copy(data, order=order)
header_bytes = 24 # read.cpp:fpzip_read_header
cdef char fptype = b'f' if data.dtype == np.float32 else b'd'
# some compressed data can be bigger than the original data
cdef array.array compression_buf = allocate(fptype, data.size + header_bytes)
cdef FPZ* fpz_ptr
if fptype == b'f':
fpz_ptr = fpzip_write_to_buffer(compression_buf.data.as_floats, buffer_size + header_bytes)
else:
fpz_ptr = fpzip_write_to_buffer(compression_buf.data.as_doubles, buffer_size + header_bytes)
if data.dtype == np.float32:
fpz_ptr[0].type = 0 # float
else:
fpz_ptr[0].type = 1 # double
fpz_ptr[0].prec = precision
shape = list(data.shape)
if order == 'C':
shape.reverse()
# Dr. Lindstrom noted that fpzip expects czyx order with
# channels changing most slowly. We should probably change
# this up in v2 and write in the documentation what should
# go where.
fpz_ptr[0].nx = shape[0]
fpz_ptr[0].ny = shape[1]
fpz_ptr[0].nz = shape[2]
fpz_ptr[0].nf = shape[3]
if fpzip_write_header(fpz_ptr) == 0:
fpzip_write_close(fpz_ptr)
del compression_buf
raise FpzipWriteError("Cannot write header. %s" % FPZ_ERROR_STRINGS[fpzip_errno])
cdef float[:,:,:,:] arr_memviewf
cdef double[:,:,:,:] arr_memviewd
cdef size_t outbytes
cdef float[:] bufviewf
cdef double[:] bufviewd
if data.size == 0:
fpzip_write_close(fpz_ptr)
if data.dtype == np.float32:
bufviewf = compression_buf
bytes_out = bytearray(bufviewf[:header_bytes])
else:
bufviewd = compression_buf
bytes_out = bytearray(bufviewd[:header_bytes])
del compression_buf
return bytes(bytes_out)
if data.dtype == np.float32:
arr_memviewf = data
outbytes = fpzip_write(fpz_ptr, <void*>&arr_memviewf[0,0,0,0])
bufviewf = compression_buf
bytes_out = bytearray(bufviewf[:outbytes])[:outbytes]
else:
arr_memviewd = data
outbytes = fpzip_write(fpz_ptr, <void*>&arr_memviewd[0,0,0,0])
bufviewd = compression_buf
bytes_out = bytearray(bufviewd[:outbytes])[:outbytes]
del compression_buf
fpzip_write_close(fpz_ptr)
if outbytes == 0:
raise FpzipWriteError("Compression failed. %s" % FPZ_ERROR_STRINGS[fpzip_errno])
return bytes(bytes_out)
def decompress(bytes encoded, order='C'):
"""
fpzip.decompress(encoded, order='C')
Accepts an fpzip encoded bytestring (e.g. b'fpy)....') and
returns the original array as a 4d numpy array.
order is 'C' or 'F' (row major vs column major memory layout) and
should correspond to the byte order of the originally compressed
array.
"""
order = validate_order(order)
# line below necessary to convert from PyObject to a naked pointer
cdef unsigned char *encodedptr = <unsigned char*>encoded
cdef FPZ* fpz_ptr = fpzip_read_from_buffer(<void*>encodedptr)
if fpzip_read_header(fpz_ptr) == 0:
raise FpzipReadError("cannot read header: %s" % FPZ_ERROR_STRINGS[fpzip_errno])
cdef char fptype = b'f' if fpz_ptr[0].type == 0 else b'd'
nx, ny, nz, nf = fpz_ptr[0].nx, fpz_ptr[0].ny, fpz_ptr[0].nz, fpz_ptr[0].nf
cdef array.array buf = allocate(fptype, nx * ny * nz * nf)
cdef size_t read_bytes = 0;
if fptype == b'f':
read_bytes = fpzip_read(fpz_ptr, buf.data.as_floats)
else:
read_bytes = fpzip_read(fpz_ptr, buf.data.as_doubles)
if read_bytes == 0:
raise FpzipReadError("decompression failed: %s" % FPZ_ERROR_STRINGS[fpzip_errno])
fpzip_read_close(fpz_ptr)
dtype = np.float32 if fptype == b'f' else np.float64
if order == 'C':
return np.frombuffer(buf, dtype=dtype).reshape( (nf, nz, ny, nx), order='C')
elif order == 'F':
return np.frombuffer(buf, dtype=dtype).reshape( (nx, ny, nz, nf), order='F')
else:
raise ValueError(f"Undefined order parameter '{order}'. Options are 'C' or 'F'")