diff --git a/src/borg/chunkers/reader.pyx b/src/borg/chunkers/reader.pyx index 26258914b9..9169ed3262 100644 --- a/src/borg/chunkers/reader.pyx +++ b/src/borg/chunkers/reader.pyx @@ -137,8 +137,11 @@ class FileFMAPReader: if self.try_sparse: try: fmap = list(sparsemap(self.fd, self.fh)) - except OSError as err: - # seeking did not work + except (OSError, ValueError) as err: + # Building a sparse map failed: + # - OSError: low-level lseek with SEEK_HOLE/SEEK_DATA not supported by FS/OS. + # - ValueError: high-level file objects (e.g. io.BytesIO or some fd wrappers) + # don't accept SEEK_HOLE/SEEK_DATA as a valid "whence" and raise ValueError. pass if fmap is None: @@ -170,6 +173,9 @@ class FileFMAPReader: # read block from the range data = dread(offset, wanted, self.fd, self.fh) got = len(data) + # Detect zero-filled blocks regardless of sparse mode. + # Zero detection is important to avoid reading/storing allocated zeros + # even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA. if zeros.startswith(data): data = None allocation = CH_ALLOC diff --git a/src/borg/testsuite/chunkers/__init__.py b/src/borg/testsuite/chunkers/__init__.py index 2e01e98e33..ce9ff11e21 100644 --- a/src/borg/testsuite/chunkers/__init__.py +++ b/src/borg/testsuite/chunkers/__init__.py @@ -77,14 +77,38 @@ def fs_supports_sparse(): BS = 4096 # filesystem block size # Some sparse files. X = content blocks, _ = sparse blocks. +# Block size must always be BS. + # X__XXX____ -map_sparse1 = [(0 * BS, 1 * BS, True), (1 * BS, 2 * BS, False), (3 * BS, 3 * BS, True), (6 * BS, 4 * BS, False)] +map_sparse1 = [ + (0, BS, True), + (1 * BS, BS, False), + (2 * BS, BS, False), + (3 * BS, BS, True), + (4 * BS, BS, True), + (5 * BS, BS, True), + (6 * BS, BS, False), + (7 * BS, BS, False), + (8 * BS, BS, False), + (9 * BS, BS, False), +] # _XX___XXXX -map_sparse2 = [(0 * BS, 1 * BS, False), (1 * BS, 2 * BS, True), (3 * BS, 3 * BS, False), (6 * BS, 4 * BS, True)] +map_sparse2 = [ + (0, BS, False), + (1 * BS, BS, True), + (2 * BS, BS, True), + (3 * BS, BS, False), + (4 * BS, BS, False), + (5 * BS, BS, False), + (6 * BS, BS, True), + (7 * BS, BS, True), + (8 * BS, BS, True), + (9 * BS, BS, True), +] # XXX -map_notsparse = [(0 * BS, 3 * BS, True)] +map_notsparse = [(0, BS, True), (BS, BS, True), (2 * BS, BS, True)] # ___ -map_onlysparse = [(0 * BS, 3 * BS, False)] +map_onlysparse = [(0, BS, False), (BS, BS, False), (2 * BS, BS, False)] diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index b8598a9266..a06d6bdb14 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -34,13 +34,15 @@ ) def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): def get_chunks(fname, sparse, header_size): - chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse) + chunker = ChunkerFixed(BS, header_size=header_size, sparse=sparse) with open(fname, "rb") as fd: return cf(chunker.chunkify(fd)) + # this only works if sparse map blocks are same size as fixed chunker blocks fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) - get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + expected_content = make_content(sparse_map, header_size=header_size) + assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected_content @pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")