diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py index 1ebc496cc4..f27e19babd 100644 --- a/src/borg/archiver/transfer_cmd.py +++ b/src/borg/archiver/transfer_cmd.py @@ -20,7 +20,16 @@ def transfer_chunks( - upgrader, other_repository, other_manifest, other_chunks, archive, cache, recompress, dry_run, chunker_params=None + upgrader, + other_repository, + other_manifest, + other_chunks, + archive, + cache, + manifest, + recompress, + dry_run, + chunker_params=None, ): """ Transfer chunks from another repository to the current repository. @@ -41,7 +50,7 @@ def transfer_chunks( file = ChunkIteratorFileWrapper(chunk_iterator) # Create a chunker with the specified parameters - chunker = get_chunker(*chunker_params, key=archive.key, sparse=False) + chunker = get_chunker(*chunker_params, key=manifest.key, sparse=False) for chunk in chunker.chunkify(file): if not dry_run: chunk_id, data = cached_hash(chunk, archive.key.id_hash) @@ -226,6 +235,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non other_chunks, archive, cache, + manifest, args.recompress, dry_run, args.chunker_params, diff --git a/src/borg/legacyremote.py b/src/borg/legacyremote.py index 10b5ed7405..cedeb375a2 100644 --- a/src/borg/legacyremote.py +++ b/src/borg/legacyremote.py @@ -664,11 +664,12 @@ def __len__(self): def list(self, limit=None, marker=None): """actual remoting is done via self.call in the @api decorator""" - def get(self, id, read_data=True): - for resp in self.get_many([id], read_data=read_data): + def get(self, id, read_data=True, raise_missing=True): + for resp in self.get_many([id], read_data=read_data, raise_missing=raise_missing): return resp - def get_many(self, ids, read_data=True, is_preloaded=False): + def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True): + # note: legacy remote protocol does not support raise_missing parameter, so we ignore it here yield from self.call_many("get", [{"id": id, "read_data": read_data} for id in ids], is_preloaded=is_preloaded) @api(since=parse_version("1.0.0")) @@ -747,11 +748,11 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def get(self, key, read_data=True): - return next(self.get_many([key], read_data=read_data, cache=False)) + def get(self, key, read_data=True, raise_missing=True): + return next(self.get_many([key], read_data=read_data, raise_missing=raise_missing, cache=False)) - def get_many(self, keys, read_data=True, cache=True): - for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data)): + def get_many(self, keys, read_data=True, cache=True, raise_missing=True): + for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data, raise_missing=raise_missing)): yield self.transform(key, data) def log_instrumentation(self): @@ -856,10 +857,12 @@ def close(self): self.cache.clear() shutil.rmtree(self.basedir) - def get_many(self, keys, read_data=True, cache=True): + def get_many(self, keys, read_data=True, cache=True, raise_missing=True): # It could use different cache keys depending on read_data and cache full vs. meta-only chunks. unknown_keys = [key for key in keys if self.prefixed_key(key, complete=read_data) not in self.cache] - repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data)) + repository_iterator = zip( + unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data, raise_missing=raise_missing) + ) for key in keys: pkey = self.prefixed_key(key, complete=read_data) if pkey in self.cache: @@ -877,7 +880,7 @@ def get_many(self, keys, read_data=True, cache=True): else: # slow path: eviction during this get_many removed this key from the cache t0 = time.perf_counter() - data = self.repository.get(key, read_data=read_data) + data = self.repository.get(key, read_data=read_data, raise_missing=raise_missing) self.slow_lat += time.perf_counter() - t0 transformed = self.add_entry(key, data, cache, complete=read_data) self.slow_misses += 1 diff --git a/src/borg/testsuite/archiver/transfer_cmd_test.py b/src/borg/testsuite/archiver/transfer_cmd_test.py index fe3d4be546..68a29728a8 100644 --- a/src/borg/testsuite/archiver/transfer_cmd_test.py +++ b/src/borg/testsuite/archiver/transfer_cmd_test.py @@ -473,6 +473,33 @@ def test_transfer_rechunk(archivers, request, monkeypatch): assert dest_hash == source_file_hashes[item.path], f"Content hash mismatch for {item.path}" +def test_transfer_rechunk_dry_run(archivers, request, monkeypatch): + """Ensure --dry-run works together with --chunker-params (re-chunking path). + + This specifically guards against regressions like AttributeError when archive is None + during dry-run (see issue #9199). + """ + archiver = request.getfixturevalue(archivers) + + BLKSIZE = 512 + source_chunker_params = "buzhash,19,23,21,4095" # default-ish buzhash parameters + dest_chunker_params = f"fixed,{BLKSIZE}" # simple deterministic chunking + + # Prepare source repo and create one archive + with setup_repos(archiver, monkeypatch) as other_repo1: + contents = random.randbytes(8 * BLKSIZE) + create_regular_file(archiver.input_path, "file.bin", contents=contents) + cmd(archiver, "create", f"--chunker-params={source_chunker_params}", "arch", "input") + + # Now we are in the destination repo (setup_repos switched us on context exit). + # Run transfer in dry-run mode with re-chunking. This must not crash. + cmd(archiver, "transfer", other_repo1, "--dry-run", f"--chunker-params={dest_chunker_params}") + + # Dry-run must not have created archives in the destination repo. + listing = cmd(archiver, "repo-list") + assert "arch" not in listing + + def test_issue_9022(archivers, request, monkeypatch): """ Regression test for borgbackup/borg#9022: After "borg transfer --from-borg1",