-
Notifications
You must be signed in to change notification settings - Fork 21
Option to retry writes if on-disk checksums don't match #58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
abe70ae
5063632
14de9ec
6dd5108
996b433
653607f
9977b02
d6f7269
70c9fc5
fe1b5b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,16 +213,21 @@ class BmapCopy(object): | |
| instance. | ||
| """ | ||
|
|
||
| def __init__(self, image, dest, bmap=None, image_size=None): | ||
| def __init__(self, image, dest, bmap=None, image_size=None, checksum_retry=None): | ||
| """ | ||
| The class constructor. The parameters are: | ||
| image - file-like object of the image which should be copied, | ||
| should only support 'read()' and 'seek()' methods, | ||
| and only seeking forward has to be supported. | ||
| dest - file object of the destination file to copy the image | ||
| to. | ||
| bmap - file object of the bmap file to use for copying. | ||
| image_size - size of the image in bytes. | ||
| image - file-like object of the image which should be copied, | ||
| should only support 'read()' and 'seek()' methods, | ||
| and only seeking forward has to be supported. | ||
| dest - file object of the destination file to copy the image | ||
| to. | ||
| bmap - file object of the bmap file to use for copying. | ||
| image_size - size of the image in bytes. | ||
| checksum_retry - number of retries for verifying written blocks. | ||
| If set to a value > 0, each written block will be read | ||
| back and its checksum verified against the expected value | ||
| from the bmap file. If a mismatch is detected, the block | ||
| will be rewritten up to checksum_retry times. | ||
| """ | ||
|
|
||
| self._xml = None | ||
|
|
@@ -270,6 +275,9 @@ def __init__(self, image, dest, bmap=None, image_size=None): | |
| self._cs_attrib_name = None | ||
| self._bmap_cs_attrib_name = None | ||
|
|
||
| # Checksum retry configuration for post-write verification | ||
| self._checksum_retry = checksum_retry | ||
|
|
||
| # Special quirk for /dev/null which does not support fsync() | ||
| if ( | ||
| stat.S_ISCHR(st_data.st_mode) | ||
|
|
@@ -615,10 +623,12 @@ def _get_batches(self, first, last): | |
| def _get_data(self, verify): | ||
| """ | ||
| This is generator which reads the image file in '_batch_blocks' chunks | ||
| and yields ('type', 'start', 'end', 'buf) tuples, where: | ||
| and yields ('type', 'start', 'end', 'buf', 'range_first', 'range_last', 'range_chksum') | ||
| tuples, where: | ||
| * 'start' is the starting block number of the batch; | ||
| * 'end' is the last block of the batch; | ||
| * 'buf' a buffer containing the batch data. | ||
| * 'buf' a buffer containing the batch data; | ||
| * 'range_first', 'range_last', 'range_chksum' are the bmap range info for post-write verification. | ||
| """ | ||
|
|
||
| _log.debug("the reader thread has started") | ||
|
|
@@ -655,7 +665,7 @@ def _get_data(self, verify): | |
| % (blocks, self._batch_queue.qsize()) | ||
| ) | ||
|
|
||
| self._batch_queue.put(("range", start, start + blocks - 1, buf)) | ||
| self._batch_queue.put(("range", start, start + blocks - 1, buf, first, last, chksum)) | ||
|
|
||
| if verify and chksum and hash_obj.hexdigest() != chksum: | ||
| raise Error( | ||
|
|
@@ -673,6 +683,125 @@ def _get_data(self, verify): | |
|
|
||
| self._batch_queue.put(None) | ||
|
|
||
| def _verify_written_blocks(self, start, end, expected_chksum): | ||
| """ | ||
| Verify the checksum of blocks that were written to the destination file. | ||
| Returns True if the checksum matches, False otherwise. | ||
|
|
||
| Args: | ||
| start - starting block number | ||
| end - ending block number | ||
| expected_chksum - expected checksum string | ||
|
|
||
| Returns: | ||
| True if checksum matches, False if mismatch | ||
| """ | ||
| if not self._cs_type or not expected_chksum: | ||
| return True | ||
|
|
||
| try: | ||
| self._f_dest.seek(start * self.block_size) | ||
| buf = self._f_dest.read((end - start + 1) * self.block_size) | ||
|
|
||
| if not buf: | ||
alchark marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return False | ||
|
|
||
alchark marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| hash_obj = hashlib.new(self._cs_type) | ||
| hash_obj.update(buf) | ||
|
|
||
| calculated = hash_obj.hexdigest() | ||
| return calculated == expected_chksum | ||
| except IOError as err: | ||
| _log.error( | ||
| "error while verifying blocks %d-%d of '%s': %s", | ||
| start, end, self._dest_path, err | ||
| ) | ||
| return False | ||
|
|
||
| def _drop_cached_blocks(self, start, end): | ||
| """ | ||
| Best-effort invalidation of destination page cache for block range. | ||
| This helps make checksum verification read data back from storage | ||
| rather than returning cached pages. | ||
| """ | ||
|
|
||
| if not hasattr(os, "posix_fadvise") or not hasattr( | ||
| os, "POSIX_FADV_DONTNEED" | ||
| ): | ||
| return | ||
|
|
||
| offset = start * self.block_size | ||
| length = (end - start + 1) * self.block_size | ||
|
|
||
| try: | ||
| os.posix_fadvise( | ||
| self._f_dest.fileno(), offset, length, os.POSIX_FADV_DONTNEED | ||
| ) | ||
| except OSError as err: | ||
| _log.debug( | ||
| "cannot drop page cache for blocks %d-%d of '%s': %s", | ||
| start, | ||
| end, | ||
| self._dest_path, | ||
| err, | ||
| ) | ||
|
|
||
| def _verify_range_with_retry(self, range_first, range_last, range_chksum, range_buffers): | ||
| """ | ||
| Verify a block range's checksum after writing. If verification fails, | ||
| retry writing the range up to self._checksum_retry times. | ||
|
|
||
| Args: | ||
| range_first - first block of the range | ||
| range_last - last block of the range | ||
| range_chksum - expected checksum for the range | ||
| range_buffers - dict of {(start, end): buf} for blocks in this range | ||
| """ | ||
| if not range_chksum or not self._checksum_retry: | ||
| return | ||
|
|
||
| retry_count = 0 | ||
| retry_limit = int(self._checksum_retry) | ||
|
|
||
| while True: | ||
| # Sync to disk before reading back for verification | ||
| # This ensures we verify data that has actually reached the physical | ||
| # disk, not just what's in the kernel's page cache | ||
| self.sync() | ||
| self._drop_cached_blocks(range_first, range_last) | ||
|
|
||
| # Verify the entire range | ||
| if self._verify_written_blocks(range_first, range_last, range_chksum): | ||
| _log.debug( | ||
| "checksum verification passed for blocks %d-%d" | ||
| % (range_first, range_last) | ||
| ) | ||
| return | ||
|
|
||
| # Checksum mismatch - retry | ||
| retry_count += 1 | ||
| if retry_count > retry_limit: | ||
| raise Error( | ||
| "checksum verification failed for blocks %d-%d after " | ||
| "%d retries" % (range_first, range_last, retry_limit) | ||
| ) | ||
|
|
||
| _log.warning( | ||
| "checksum mismatch for blocks %d-%d, retrying " | ||
| "(attempt %d/%d)" % (range_first, range_last, retry_count, retry_limit) | ||
| ) | ||
|
|
||
| # Re-write all blocks in this range | ||
| for (start, end), buf in range_buffers.items(): | ||
| try: | ||
| self._f_dest.seek(start * self.block_size) | ||
| self._f_dest.write(buf) | ||
| except IOError as err: | ||
| raise Error( | ||
| "error while writing blocks %d-%d of '%s': %s" | ||
| % (start, end, self._dest_path, err) | ||
| ) | ||
|
|
||
| def copy(self, sync=True, verify=True): | ||
| """ | ||
| Copy the image to the destination file using bmap. The 'sync' argument | ||
|
|
@@ -704,22 +833,48 @@ def copy(self, sync=True, verify=True): | |
|
|
||
| # Read the image in '_batch_blocks' chunks and write them to the | ||
| # destination file | ||
| range_buffers = {} # Track buffers for each range to enable retry | ||
| current_range = None | ||
| current_range_blocks = set() # Track which blocks we've written for this range | ||
|
|
||
alchark marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| while True: | ||
| batch = self._batch_queue.get() | ||
| if batch is None: | ||
| # No more data, the image is written | ||
| # Verify any remaining range | ||
| if self._checksum_retry and current_range: | ||
| range_first, range_last, range_chksum = current_range | ||
| if range_chksum and self._cs_type: | ||
| self._verify_range_with_retry( | ||
| range_first, range_last, range_chksum, range_buffers | ||
| ) | ||
| break | ||
| elif batch[0] == "error": | ||
| # The reader thread encountered an error and passed us the | ||
| # exception. | ||
| exc_info = batch[1] | ||
| raise exc_info[1] | ||
|
|
||
| (start, end, buf) = batch[1:4] | ||
| (start, end, buf, range_first, range_last, range_chksum) = batch[1:7] | ||
|
|
||
| assert len(buf) <= (end - start + 1) * self.block_size | ||
| assert len(buf) > (end - start) * self.block_size | ||
|
|
||
| # Check if we've moved to a new range - if so, verify the previous one | ||
| new_range = (range_first, range_last, range_chksum) | ||
| if self._checksum_retry and current_range and new_range != current_range: | ||
| # Verify the completed range before moving to the next one | ||
| prev_first, prev_last, prev_chksum = current_range | ||
| if prev_chksum and self._cs_type: | ||
| self._verify_range_with_retry( | ||
| prev_first, prev_last, prev_chksum, range_buffers | ||
| ) | ||
|
||
| # Reset for new range | ||
| range_buffers = {} | ||
| current_range_blocks = set() | ||
|
|
||
| current_range = new_range | ||
|
|
||
| self._f_dest.seek(start * self.block_size) | ||
|
|
||
| # Synchronize the destination file if we reached the watermark | ||
|
|
@@ -740,6 +895,11 @@ def copy(self, sync=True, verify=True): | |
| blocks_written += end - start + 1 | ||
| bytes_written += len(buf) | ||
|
|
||
| # Track buffers for this range (for retry if needed) | ||
| if self._checksum_retry: | ||
| range_buffers[(start, end)] = buf | ||
| current_range_blocks.add((start, end)) | ||
|
|
||
| self._update_progress(blocks_written) | ||
|
|
||
| if not self.image_size: | ||
|
|
@@ -785,7 +945,12 @@ def sync(self): | |
|
|
||
| if self._dest_supports_fsync: | ||
| try: | ||
| os.fsync(self._f_dest.fileno()), | ||
| self._f_dest.flush() | ||
| except IOError as err: | ||
| raise Error("cannot flush '%s': %s" % (self._dest_path, err)) | ||
|
|
||
| try: | ||
| os.fsync(self._f_dest.fileno()) | ||
| except OSError as err: | ||
| raise Error( | ||
| "cannot synchronize '%s': %s " % (self._dest_path, err.strerror) | ||
|
|
@@ -800,14 +965,14 @@ class BmapBdevCopy(BmapCopy): | |
| scheduler. | ||
| """ | ||
|
|
||
| def __init__(self, image, dest, bmap=None, image_size=None): | ||
| def __init__(self, image, dest, bmap=None, image_size=None, checksum_retry=None): | ||
| """ | ||
| The same as the constructor of the 'BmapCopy' base class, but adds | ||
| useful guard-checks specific to block devices. | ||
| """ | ||
|
|
||
| # Call the base class constructor first | ||
| BmapCopy.__init__(self, image, dest, bmap, image_size) | ||
| BmapCopy.__init__(self, image, dest, bmap, image_size, checksum_retry) | ||
|
|
||
| self._dest_fsync_watermark = (6 * 1024 * 1024) // self.block_size | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -104,7 +104,7 @@ def __getattr__(self, name): | |
| return getattr(self._file_obj, name) | ||
|
|
||
|
|
||
| def open_block_device(path): | ||
| def open_block_device(path, need_read_access=False): | ||
| """ | ||
| This is a helper function for 'open_files()' which is called if the | ||
| destination file of the "copy" command is a block device. We handle block | ||
|
|
@@ -115,17 +115,25 @@ def open_block_device(path): | |
| that we are the only users of the block device. | ||
|
|
||
| This function opens a block device specified by 'path' in exclusive mode. | ||
| The 'need_read_access' parameter controls whether the device is opened | ||
| with read+write (True) or write-only (False) permissions. | ||
| Returns opened file object. | ||
| """ | ||
|
|
||
| try: | ||
| descriptor = os.open(path, os.O_WRONLY | os.O_EXCL) | ||
| if need_read_access: | ||
| descriptor = os.open(path, os.O_RDWR | os.O_EXCL) | ||
| else: | ||
| descriptor = os.open(path, os.O_WRONLY | os.O_EXCL) | ||
| except OSError as err: | ||
| error_out("cannot open block device '%s' in exclusive mode: %s", path, err) | ||
|
|
||
| # Turn the block device file descriptor into a file object | ||
| try: | ||
| file_obj = os.fdopen(descriptor, "wb") | ||
| if need_read_access: | ||
| file_obj = os.fdopen(descriptor, "r+b") | ||
| else: | ||
| file_obj = os.fdopen(descriptor, "wb") | ||
| except OSError as err: | ||
| os.close(descriptor) | ||
| error_out("cannot open block device '%s':\n%s", path, err) | ||
|
|
@@ -586,7 +594,9 @@ def open_files(args): | |
| try: | ||
| if pathlib.Path(args.dest).is_block_device(): | ||
| dest_is_blkdev = True | ||
| dest_obj = open_block_device(args.dest) | ||
| # Request read access only if checksum-retry is enabled | ||
| need_read = bool(args.checksum_retry) | ||
| dest_obj = open_block_device(args.dest, need_read_access=need_read) | ||
| else: | ||
| dest_obj = open(args.dest, "wb+") | ||
| except IOError as err: | ||
|
|
@@ -610,6 +620,22 @@ def copy_command(args): | |
| if args.no_sig_verify and args.fingerprint: | ||
| error_out("--no-sig-verify and --fingerprint cannot be used together") | ||
|
|
||
| # Validate checksum_retry argument | ||
| checksum_retry = None | ||
| if args.checksum_retry is not None: | ||
| try: | ||
| checksum_retry = int(args.checksum_retry) | ||
| if checksum_retry < 1: | ||
| error_out("--checksum-retry argument must be a positive integer") | ||
| except ValueError: | ||
| error_out("--checksum-retry argument must be a valid integer") | ||
|
|
||
|
Comment on lines
+623
to
+632
|
||
| if checksum_retry: | ||
| log.info( | ||
| "checksum verification of written blocks enabled with up to %d retry attempts" | ||
| % checksum_retry | ||
| ) | ||
alchark marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| image_obj, dest_obj, bmap_obj, bmap_path, image_size, dest_is_blkdev = open_files( | ||
| args | ||
| ) | ||
|
|
@@ -631,10 +657,14 @@ def copy_command(args): | |
| if dest_is_blkdev: | ||
| dest_str = "block device '%s'" % args.dest | ||
| # For block devices, use the specialized class | ||
| writer = BmapCopy.BmapBdevCopy(image_obj, dest_obj, bmap_obj, image_size) | ||
| writer = BmapCopy.BmapBdevCopy( | ||
| image_obj, dest_obj, bmap_obj, image_size, checksum_retry | ||
| ) | ||
| else: | ||
| dest_str = "file '%s'" % os.path.basename(args.dest) | ||
| writer = BmapCopy.BmapCopy(image_obj, dest_obj, bmap_obj, image_size) | ||
| writer = BmapCopy.BmapCopy( | ||
| image_obj, dest_obj, bmap_obj, image_size, checksum_retry | ||
| ) | ||
| except BmapCopy.Error as err: | ||
| error_out(err) | ||
|
|
||
|
|
@@ -855,6 +885,19 @@ def parse_arguments(): | |
| text = "do not verify the data checksum while writing" | ||
| parser_copy.add_argument("--no-verify", action="store_true", help=text) | ||
|
|
||
| # The --checksum-retry option | ||
| text = ( | ||
| "verify checksums of written blocks and retry writing on mismatch " | ||
| "(optional: number of retries, default 1)" | ||
| ) | ||
| parser_copy.add_argument( | ||
| "--checksum-retry", | ||
| nargs="?", | ||
| const="1", | ||
| type=str, | ||
| help=text, | ||
| ) | ||
alchark marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # The --psplash-pipe option | ||
| text = "write progress to a psplash pipe" | ||
| parser_copy.add_argument("--psplash-pipe", help=text) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.