Skip to content

Commit 94cf4c8

Browse files
rmmancommorotti
authored andcommitted
pythongh-91349: Expose the crc32 function from the lzma library
1 parent 3b93979 commit 94cf4c8

5 files changed

Lines changed: 143 additions & 2 deletions

File tree

Doc/library/lzma.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,23 @@ Compressing and decompressing data in memory
310310
*preset* and *filters* arguments.
311311

312312

313+
.. function:: crc32(data, value=0)
314+
315+
.. index::
316+
single: Cyclic Redundancy Check
317+
single: checksum; Cyclic Redundancy Check
318+
319+
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
320+
result is a positive integer, less than :math:`2^32`. If *value* is present, it is used
321+
as the starting value of the checksum; otherwise, a default value of 0
322+
is used. Passing in *value* allows computing a running checksum over the
323+
concatenation of several inputs. The algorithm is not cryptographically
324+
strong, and should not be used for authentication or digital signatures. Since
325+
the algorithm is designed for use as a checksum algorithm, it is not suitable
326+
for use as a general hash algorithm.
327+
328+
.. versionadded:: next
329+
313330
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
314331

315332
Decompress *data* (a :class:`bytes` object), returning the uncompressed data

Lib/test/test_lzma.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import array
2+
import binascii
23
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
34
import os
45
import pickle
@@ -8,7 +9,7 @@
89
import unittest
910
from compression._common import _streams
1011

11-
from test.support import _4G, bigmemtest
12+
from test.support import _1G, _4G, bigmemtest
1213
from test.support.import_helper import import_module
1314
from test.support.os_helper import (
1415
TESTFN, unlink, FakePath
@@ -17,6 +18,44 @@
1718
lzma = import_module("lzma")
1819
from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile
1920

21+
class ChecksumTestCase(unittest.TestCase):
22+
# checksum test cases
23+
def test_crc32start(self):
24+
self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0))
25+
self.assertTrue(lzma.crc32(b"abc", 0xffffffff))
26+
27+
def test_crc32empty(self):
28+
self.assertEqual(lzma.crc32(b"", 0), 0)
29+
self.assertEqual(lzma.crc32(b"", 1), 1)
30+
self.assertEqual(lzma.crc32(b"", 432), 432)
31+
32+
def test_penguins(self):
33+
self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120)
34+
self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94)
35+
self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0))
36+
37+
def test_crc32_unsigned(self):
38+
foo = b'abcdefghijklmnop'
39+
# explicitly test signed behavior
40+
self.assertEqual(lzma.crc32(foo), 2486878355)
41+
self.assertEqual(lzma.crc32(b'spam'), 1138425661)
42+
43+
def test_same_as_binascii_crc32(self):
44+
foo = b'abcdefghijklmnop'
45+
crc = 2486878355
46+
self.assertEqual(binascii.crc32(foo), crc)
47+
self.assertEqual(lzma.crc32(foo), crc)
48+
self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam'))
49+
50+
51+
# GH-54485 - check that inputs >=4 GiB are handled correctly.
52+
class ChecksumBigBufferTestCase(unittest.TestCase):
53+
54+
@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
55+
def test_big_buffer(self, size):
56+
data = b"nyan" * (_1G + 1)
57+
self.assertEqual(lzma.crc32(data), 1044521549)
58+
2059

2160
class CompressorDecompressorTestCase(unittest.TestCase):
2261

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Expose the crc32 function from the lzma library as :func:`lzma.crc32`.

Modules/_lzmamodule.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,10 +1587,40 @@ lzma_exec(PyObject *module)
15871587
return 0;
15881588
}
15891589

1590+
/*[clinic input]
1591+
_lzma.crc32 -> unsigned_int
1592+
1593+
data: Py_buffer
1594+
value: unsigned_int(bitwise=True) = 0
1595+
Starting value of the checksum.
1596+
/
1597+
1598+
Compute a CRC-32 checksum of data.
1599+
1600+
The returned checksum is an integer.
1601+
[clinic start generated code]*/
1602+
1603+
static unsigned int
1604+
_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
1605+
/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/
1606+
{
1607+
/* Releasing the GIL for very small buffers is inefficient
1608+
and may lower performance */
1609+
if (data->len > 1024*5) {
1610+
Py_BEGIN_ALLOW_THREADS
1611+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1612+
Py_END_ALLOW_THREADS
1613+
} else {
1614+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1615+
}
1616+
return value;
1617+
}
1618+
15901619
static PyMethodDef lzma_methods[] = {
15911620
_LZMA_IS_CHECK_SUPPORTED_METHODDEF
15921621
_LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
15931622
_LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1623+
_LZMA_CRC32_METHODDEF
15941624
{NULL}
15951625
};
15961626

Modules/clinic/_lzmamodule.c.h

Lines changed: 55 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)