Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/borg/archiver/transfer_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@


def transfer_chunks(
upgrader, other_repository, other_manifest, other_chunks, archive, cache, recompress, dry_run, chunker_params=None
upgrader,
other_repository,
other_manifest,
other_chunks,
archive,
cache,
manifest,
recompress,
dry_run,
chunker_params=None,
):
"""
Transfer chunks from another repository to the current repository.
Expand All @@ -41,7 +50,7 @@ def transfer_chunks(
file = ChunkIteratorFileWrapper(chunk_iterator)

# Create a chunker with the specified parameters
chunker = get_chunker(*chunker_params, key=archive.key, sparse=False)
chunker = get_chunker(*chunker_params, key=manifest.key, sparse=False)
for chunk in chunker.chunkify(file):
if not dry_run:
chunk_id, data = cached_hash(chunk, archive.key.id_hash)
Expand Down Expand Up @@ -226,6 +235,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non
other_chunks,
archive,
cache,
manifest,
args.recompress,
dry_run,
args.chunker_params,
Expand Down
23 changes: 13 additions & 10 deletions src/borg/legacyremote.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,11 +664,12 @@ def __len__(self):
def list(self, limit=None, marker=None):
"""actual remoting is done via self.call in the @api decorator"""

def get(self, id, read_data=True):
for resp in self.get_many([id], read_data=read_data):
def get(self, id, read_data=True, raise_missing=True):
for resp in self.get_many([id], read_data=read_data, raise_missing=raise_missing):
return resp

def get_many(self, ids, read_data=True, is_preloaded=False):
def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True):
# note: legacy remote protocol does not support raise_missing parameter, so we ignore it here
yield from self.call_many("get", [{"id": id, "read_data": read_data} for id in ids], is_preloaded=is_preloaded)

@api(since=parse_version("1.0.0"))
Expand Down Expand Up @@ -747,11 +748,11 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

def get(self, key, read_data=True):
return next(self.get_many([key], read_data=read_data, cache=False))
def get(self, key, read_data=True, raise_missing=True):
return next(self.get_many([key], read_data=read_data, raise_missing=raise_missing, cache=False))

def get_many(self, keys, read_data=True, cache=True):
for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data)):
def get_many(self, keys, read_data=True, cache=True, raise_missing=True):
for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data, raise_missing=raise_missing)):
yield self.transform(key, data)

def log_instrumentation(self):
Expand Down Expand Up @@ -856,10 +857,12 @@ def close(self):
self.cache.clear()
shutil.rmtree(self.basedir)

def get_many(self, keys, read_data=True, cache=True):
def get_many(self, keys, read_data=True, cache=True, raise_missing=True):
# It could use different cache keys depending on read_data and cache full vs. meta-only chunks.
unknown_keys = [key for key in keys if self.prefixed_key(key, complete=read_data) not in self.cache]
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data))
repository_iterator = zip(
unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data, raise_missing=raise_missing)
)
for key in keys:
pkey = self.prefixed_key(key, complete=read_data)
if pkey in self.cache:
Expand All @@ -877,7 +880,7 @@ def get_many(self, keys, read_data=True, cache=True):
else:
# slow path: eviction during this get_many removed this key from the cache
t0 = time.perf_counter()
data = self.repository.get(key, read_data=read_data)
data = self.repository.get(key, read_data=read_data, raise_missing=raise_missing)
self.slow_lat += time.perf_counter() - t0
transformed = self.add_entry(key, data, cache, complete=read_data)
self.slow_misses += 1
Expand Down
27 changes: 27 additions & 0 deletions src/borg/testsuite/archiver/transfer_cmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,33 @@ def test_transfer_rechunk(archivers, request, monkeypatch):
assert dest_hash == source_file_hashes[item.path], f"Content hash mismatch for {item.path}"


def test_transfer_rechunk_dry_run(archivers, request, monkeypatch):
"""Ensure --dry-run works together with --chunker-params (re-chunking path).

This specifically guards against regressions like AttributeError when archive is None
during dry-run (see issue #9199).
"""
archiver = request.getfixturevalue(archivers)

BLKSIZE = 512
source_chunker_params = "buzhash,19,23,21,4095" # default-ish buzhash parameters
dest_chunker_params = f"fixed,{BLKSIZE}" # simple deterministic chunking

# Prepare source repo and create one archive
with setup_repos(archiver, monkeypatch) as other_repo1:
contents = random.randbytes(8 * BLKSIZE)
create_regular_file(archiver.input_path, "file.bin", contents=contents)
cmd(archiver, "create", f"--chunker-params={source_chunker_params}", "arch", "input")

# Now we are in the destination repo (setup_repos switched us on context exit).
# Run transfer in dry-run mode with re-chunking. This must not crash.
cmd(archiver, "transfer", other_repo1, "--dry-run", f"--chunker-params={dest_chunker_params}")

# Dry-run must not have created archives in the destination repo.
listing = cmd(archiver, "repo-list")
assert "arch" not in listing


def test_issue_9022(archivers, request, monkeypatch):
"""
Regression test for borgbackup/borg#9022: After "borg transfer --from-borg1",
Expand Down
Loading