Skip to content

Commit 43e7793

Browse files
Fix ChunkerFixed sparse handling and update tests
- Fix FileFMAPReader to respect `sparse=False` (disable zero detection). - Update fixed_test.py expectations for non-sparse chunking. - Enable `sparse=True` in interaction_test.py and reader_test.py where zero detection is required. - Catch `ValueError` in _build_fmap to support `BytesIO` seeking.
1 parent 1ede79b commit 43e7793

File tree

4 files changed

+40
-6
lines changed

4 files changed

+40
-6
lines changed

src/borg/chunkers/reader.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class FileFMAPReader:
126126
assert 0 < read_size <= len(zeros)
127127
self.read_size = read_size # how much data we want to read at once
128128
self.reading_time = 0.0 # time spent in reading/seeking
129+
self.sparse = sparse
129130
# should borg try to do sparse input processing?
130131
# whether it actually can be done depends on the input file being seekable.
131132
self.try_sparse = sparse and has_seek_hole
@@ -137,7 +138,7 @@ class FileFMAPReader:
137138
if self.try_sparse:
138139
try:
139140
fmap = list(sparsemap(self.fd, self.fh))
140-
except OSError as err:
141+
except (OSError, ValueError) as err:
141142
# seeking did not work
142143
pass
143144

@@ -170,7 +171,7 @@ class FileFMAPReader:
170171
# read block from the range
171172
data = dread(offset, wanted, self.fd, self.fh)
172173
got = len(data)
173-
if zeros.startswith(data):
174+
if self.sparse and zeros.startswith(data):
174175
data = None
175176
allocation = CH_ALLOC
176177
else:

src/borg/testsuite/chunkers/fixed_test.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,40 @@ def get_chunks(fname, sparse, header_size):
4040

4141
fn = str(tmpdir / fname)
4242
make_sparsefile(fn, sparse_map, header_size=header_size)
43-
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
43+
expected_content = make_content(sparse_map, header_size=header_size)
44+
45+
# ChunkerFixed splits everything into fixed-size chunks (except maybe the header)
46+
# We need to split the expected content similarly.
47+
expected = []
48+
49+
# Handle header if present (it's the first item if header_size > 0)
50+
if header_size > 0:
51+
header = expected_content.pop(0)
52+
expected.append(header)
53+
54+
# Flatten the rest and split into 4096 chunks
55+
current_chunk_size = 4096
56+
for item in expected_content:
57+
if isinstance(item, int):
58+
# Hole
59+
count = item
60+
while count > 0:
61+
size = min(count, current_chunk_size)
62+
expected.append(size)
63+
count -= size
64+
else:
65+
# Data
66+
data = item
67+
while len(data) > 0:
68+
size = min(len(data), current_chunk_size)
69+
expected.append(data[:size])
70+
data = data[size:]
71+
72+
if not sparse:
73+
# if the chunker is not sparse-aware, it will read holes as zeros
74+
expected = [b"\0" * x if isinstance(x, int) else x for x in expected]
75+
76+
assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected
4477

4578

4679
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")

src/borg/testsuite/chunkers/interaction_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_reader_chunker_interaction(chunker_params):
2929
random_data = os.urandom(data_size // 3) + b"\0" * (data_size // 3) + os.urandom(data_size // 3)
3030

3131
# Chunk the data
32-
chunker = get_chunker(*chunker_params)
32+
chunker = get_chunker(*chunker_params, sparse=True)
3333
data_file = BytesIO(random_data)
3434
chunks = list(chunker.chunkify(data_file))
3535

src/borg/testsuite/chunkers/reader_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def blockify(self):
170170
)
171171
def test_filefmapreader_basic(file_content, read_size, expected_chunks):
172172
"""Test basic functionality of FileFMAPReader with different file contents."""
173-
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
173+
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None)
174174

175175
# Collect all chunks from blockify
176176
chunks = list(reader.blockify())
@@ -252,7 +252,7 @@ def test_filefmapreader_allocation_types(zeros_length, read_size, expected_alloc
252252
# Create a file with all zeros
253253
file_content = b"\0" * zeros_length
254254

255-
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
255+
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None)
256256

257257
# Collect all chunks from blockify
258258
chunks = list(reader.blockify())

0 commit comments

Comments
 (0)