From b830abacdc0f9d2db9100b05981efb786c6cfe6e Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 4 Jun 2024 12:16:28 -0700 Subject: [PATCH 1/9] Remove py38-fips jobs because stream8 is dead The centos project has yanked their entire mirror of stream8: https://mirror.dfw.rax.opendev.org/centos/8-stream/readme As such, these jobs can't run at all and thus will block any patches from landing. Change-Id: I7b86f9036914dec59db01347ac8a2163e6e9843f (cherry picked from commit 56da508eaf9dc23deb4a9adcc564162943f1954b) (cherry picked from commit a45667b7140ec49a74bd14ba786624646967bb91) --- .zuul.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.zuul.yaml b/.zuul.yaml index ff34b52a44..e8d900541f 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -299,7 +299,6 @@ - release-notes-jobs-python3 check: jobs: - - openstack-tox-functional-py38-fips - openstack-tox-functional-py39 - glance-tox-functional-py39-rbac-defaults - glance-ceph-thin-provisioning: From a5ec69ba1c0a92cc9b91b52b4101b81a330ea684 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 26 Jun 2024 08:41:02 -0700 Subject: [PATCH 2/9] Add safety check and detection support to FI tool This adds a safety check and detection mechanism to the tools/test_format_inspector.py utility for verifying those features outside of glance. Change-Id: I447e7e51315472f8fa6013d4c4852f54c1e0c43d (cherry picked from commit b27040b87e43ff4acfb6870ef0d13d54e5ca5caa) (cherry picked from commit b394ef00c3426771092d099cf1d96077bfa4b919) (cherry picked from commit aa12d39b453068d9ee8367591eb60d4a15cddece) (cherry picked from commit a1c94b47d02a7bd3ac076ea02a23e456da2d5a62) --- tools/test_format_inspector.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/test_format_inspector.py b/tools/test_format_inspector.py index aa554386ef..63e23210cc 100755 --- a/tools/test_format_inspector.py +++ b/tools/test_format_inspector.py @@ -102,6 +102,13 @@ def main(): else: print('Confirmed size with qemu-img') + print('Image safety check: %s' % ( + fmt.safety_check() and 'passed' or 'FAILED')) + if args.input: + detected_fmt = format_inspector.detect_file_format(args.input) + print('Detected inspector for image as: %s' % ( + detected_fmt.__class__.__name__)) + if __name__ == '__main__': sys.exit(main()) From 8f780f80acc8e0f3d420c0c30c6291ae9d6b927b Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 1 Apr 2024 08:06:31 -0700 Subject: [PATCH 3/9] Reject qcow files with data-file attributes Change-Id: I6326a3e85c1ba4cb1da944a4323769f2399ed2c1 Closes-Bug: #2059809 (cherry picked from commit 2ca29af4433e9fa99a0a48e230d8d25d6eaa4a87) (cherry picked from commit c3586f3a122f6cb0663217b12b52203e74e2e4fa) (cherry picked from commit a92c438fb5ba55440b38cae7c8b4361b58daa9dd) (cherry picked from commit dba3bdb458aa8a5d0193f12b7f1e374a89ed34a2) --- glance/async_/flows/base_import.py | 10 ++++++ .../async_/flows/plugins/image_conversion.py | 8 +++++ .../flows/plugins/test_image_conversion.py | 16 +++++++++ glance/tests/unit/async_/flows/test_import.py | 33 +++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/glance/async_/flows/base_import.py b/glance/async_/flows/base_import.py index e6bb526b4d..c0e2b72839 100644 --- a/glance/async_/flows/base_import.py +++ b/glance/async_/flows/base_import.py @@ -181,6 +181,16 @@ def execute(self, image_id): 'bfile': backing_file} raise RuntimeError(msg) + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + msg = _("File %(path)s has invalid data-file " + "%(dfile)s, aborting.") % {"path": path, + "dfile": data_file} + raise RuntimeError(msg) + return path def revert(self, image_id, result, **kwargs): diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index e977764fa8..4a9f754dc2 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -121,6 +121,14 @@ def _execute(self, action, file_path, **kwargs): raise RuntimeError( 'QCOW images with backing files are not allowed') + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + raise RuntimeError( + 'QCOW images with data-file set are not allowed') + if metadata.get('format') == 'vmdk': create_type = metadata.get( 'format-specific', {}).get( diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index a60e2e1a59..bf8ca007d4 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -184,6 +184,22 @@ def test_image_convert_invalid_qcow(self): self.assertEqual('QCOW images with backing files are not allowed', str(e)) + def test_image_convert_invalid_qcow_data_file(self): + data = {'format': 'qcow2', + 'format-specific': { + 'data': { + 'data-file': '/etc/hosts', + }, + }} + + convert = self._setup_image_convert_info_fail() + with mock.patch.object(processutils, 'execute') as exc_mock: + exc_mock.return_value = json.dumps(data), '' + e = self.assertRaises(RuntimeError, + convert.execute, 'file:///test/path.qcow') + self.assertEqual('QCOW images with data-file set are not allowed', + str(e)) + def _test_image_convert_invalid_vmdk(self): data = {'format': 'vmdk', 'format-specific': { diff --git a/glance/tests/unit/async_/flows/test_import.py b/glance/tests/unit/async_/flows/test_import.py index 79f6b6de57..55d6f09283 100644 --- a/glance/tests/unit/async_/flows/test_import.py +++ b/glance/tests/unit/async_/flows/test_import.py @@ -178,6 +178,39 @@ def create_image(*args, **kwargs): self.assertFalse(os.path.exists(tmp_image_path)) self.assertTrue(os.path.exists(image_path)) + def test_import_flow_invalid_data_file(self): + self.config(engine_mode='serial', + group='taskflow_executor') + + img_factory = mock.MagicMock() + + executor = taskflow_executor.TaskExecutor( + self.context, + self.task_repo, + self.img_repo, + img_factory) + + self.task_repo.get.return_value = self.task + + def create_image(*args, **kwargs): + kwargs['image_id'] = UUID1 + return self.img_factory.new_image(*args, **kwargs) + + self.img_repo.get.return_value = self.image + img_factory.new_image.side_effect = create_image + + with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: + dmock.return_value = io.BytesIO(b"TEST_IMAGE") + + with mock.patch.object(putils, 'trycmd') as tmock: + out = json.dumps({'format-specific': + {'data': {'data-file': 'somefile'}}}) + tmock.return_value = (out, '') + e = self.assertRaises(RuntimeError, + executor.begin_processing, + self.task.task_id) + self.assertIn('somefile', str(e)) + def test_import_flow_revert_import_to_fs(self): self.config(engine_mode='serial', group='taskflow_executor') From 4e95007cf35cb4bdad9acfd2aa95115ae4dabdd5 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 16 Apr 2024 10:29:10 -0700 Subject: [PATCH 4/9] Extend format_inspector for QCOW safety This adds two properties to the QcowInspector that makes it able to indicate whether the file specifies a backing_file or data_file in the header. Both conditions are considered unsafe for our usage. To ease checking of this condition, a classmethod is added that takes a local filename and digests just enough of the file to assert that both conditions are false. Change-Id: Iaf86b525397d41bd116999cabe0954a0a7efac65 Related-Bug: #2059809 (cherry picked from commit ae536bb394793c9a7a219cb498e03d5c81dbbbb7) (cherry picked from commit 2eba54e0821106097dfeceb424e53943fd090483) (cherry picked from commit 89dbbc838d606f461087e1494d19ddbcf9db0a38) (cherry picked from commit c6d5a68297ba11c38b22f74d748bfaee34b8ab8c) --- glance/common/format_inspector.py | 132 +++++++++++++++++- .../unit/common/test_format_inspector.py | 87 +++++++++++- 2 files changed, 214 insertions(+), 5 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 550cceadbb..f827e9910f 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -28,6 +28,14 @@ LOG = logging.getLogger(__name__) +def chunked_reader(fileobj, chunk_size=512): + while True: + chunk = fileobj.read(chunk_size) + if not chunk: + break + yield chunk + + class CaptureRegion(object): """Represents a region of a file we want to capture. @@ -176,10 +184,16 @@ def virtual_size(self): @property def actual_size(self): """Returns the total size of the file, usually smaller than - virtual_size. + virtual_size. NOTE: this will only be accurate if the entire + file is read and processed. """ return self._total_count + @property + def complete(self): + """Returns True if we have all the information needed.""" + return all(r.complete for r in self._capture_regions.values()) + def __str__(self): """The string name of this file format.""" return 'raw' @@ -194,6 +208,35 @@ def context_info(self): return {name: len(region.data) for name, region in self._capture_regions.items()} + @classmethod + def from_file(cls, filename): + """Read as much of a file as necessary to complete inspection. + + NOTE: Because we only read as much of the file as necessary, the + actual_size property will not reflect the size of the file, but the + amount of data we read before we satisfied the inspector. + + Raises ImageFormatError if we cannot parse the file. + """ + inspector = cls() + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + inspector.eat_chunk(chunk) + if inspector.complete: + # No need to eat any more data + break + if not inspector.complete or not inspector.format_match: + raise ImageFormatError('File is not in requested format') + return inspector + + def safety_check(self): + """Perform some checks to determine if this file is safe. + + Returns True if safe, False otherwise. It may raise ImageFormatError + if safety cannot be guaranteed because of parsing or other errors. + """ + return True + # The qcow2 format consists of a big-endian 72-byte header, of which # only a small portion has information we care about: @@ -202,15 +245,26 @@ def context_info(self): # 0 0x00 Magic 4-bytes 'QFI\xfb' # 4 0x04 Version (uint32_t, should always be 2 for modern files) # . . . +# 8 0x08 Backing file offset (uint64_t) # 24 0x18 Size in bytes (unint64_t) +# . . . +# 72 0x48 Incompatible features bitfield (6 bytes) # -# https://people.gnome.org/~markmc/qcow-image-format.html +# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt class QcowInspector(FileInspector): """QEMU QCOW2 Format This should only require about 32 bytes of the beginning of the file - to determine the virtual size. + to determine the virtual size, and 104 bytes to perform the safety check. """ + + BF_OFFSET = 0x08 + BF_OFFSET_LEN = 8 + I_FEATURES = 0x48 + I_FEATURES_LEN = 8 + I_FEATURES_DATAFILE_BIT = 3 + I_FEATURES_MAX_BIT = 4 + def __init__(self, *a, **k): super(QcowInspector, self).__init__(*a, **k) self.new_region('header', CaptureRegion(0, 512)) @@ -220,6 +274,10 @@ def _qcow_header_data(self): struct.unpack('>4sIQIIQ', self.region('header').data[:32])) return magic, size + @property + def has_header(self): + return self.region('header').complete + @property def virtual_size(self): if not self.region('header').complete: @@ -236,9 +294,77 @@ def format_match(self): magic, size = self._qcow_header_data() return magic == b'QFI\xFB' + @property + def has_backing_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + bf_offset_bytes = self.region('header').data[ + self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN] + # nonzero means "has a backing file" + bf_offset, = struct.unpack('>Q', bf_offset_bytes) + return bf_offset != 0 + + @property + def has_unknown_features(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # This is the maximum byte number we should expect any bits to be set + max_byte = self.I_FEATURES_MAX_BIT // 8 + + # The flag bytes are in big-endian ordering, so if we process + # them in index-order, they're reversed + for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))): + if byte_num == max_byte: + # If we're in the max-allowed byte, allow any bits less than + # the maximum-known feature flag bit to be set + allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1) + elif byte_num > max_byte: + # If we're above the byte with the maximum known feature flag + # bit, then we expect all zeroes + allow_mask = 0x0 + else: + # Any earlier-than-the-maximum byte can have any of the flag + # bits set + allow_mask = 0xFF + + if i_features[i] & ~allow_mask: + LOG.warning('Found unknown feature bit in byte %i: %s/%s', + byte_num, bin(i_features[byte_num] & ~allow_mask), + bin(allow_mask)) + return True + + return False + + @property + def has_data_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # First byte of bitfield, which is i_features[7] + byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8 + # Third bit of bitfield, which is 0x04 + bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8) + return bool(i_features[byte] & bit) + def __str__(self): return 'qcow2' + def safety_check(self): + return (not self.has_backing_file and + not self.has_data_file and + not self.has_unknown_features) + # The VHD (or VPC as QEMU calls it) format consists of a big-endian # 512-byte "footer" at the beginning of the file with various diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index db6a9830bd..c6f1f87a8f 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -51,12 +51,29 @@ def tearDown(self): except Exception: pass - def _create_img(self, fmt, size): + def _create_img(self, fmt, size, subformat=None, options=None, + backing_file=None): if fmt == 'vhd': # QEMU calls the vhd format vpc fmt = 'vpc' - fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-', + if options is None: + options = {} + opt = '' + prefix = 'glance-unittest-formatinspector-' + + if subformat: + options['subformat'] = subformat + prefix += subformat + '-' + + if options: + opt += '-o ' + ','.join('%s=%s' % (k, v) + for k, v in options.items()) + + if backing_file is not None: + opt += ' -b %s -F raw' % backing_file + + fn = tempfile.mktemp(prefix=prefix, suffix='.%s' % fmt) self._created_files.append(fn) subprocess.check_output( @@ -200,6 +217,72 @@ def test_vmdk_bad_descriptor_mem_limit(self): 'Format used more than 1.5MiB of memory: %s' % ( fmt.context_info)) + def test_from_file_reads_minimum(self): + img = self._create_img('qcow2', 10 * units.Mi) + file_size = os.stat(img).st_size + fmt = format_inspector.QcowInspector.from_file(img) + # We know everything we need from the first 512 bytes of a QCOW image, + # so make sure that we did not read the whole thing when we inspect + # a local file. + self.assertLess(fmt.actual_size, file_size) + + def test_qcow2_safety_checks(self): + # Create backing and data-file names (and initialize the backing file) + backing_fn = tempfile.mktemp(prefix='backing') + self._created_files.append(backing_fn) + with open(backing_fn, 'w') as f: + f.write('foobar') + data_fn = tempfile.mktemp(prefix='data') + self._created_files.append(data_fn) + + # A qcow with no backing or data file is safe + fn = self._create_img('qcow2', 5 * units.Mi, None) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertTrue(inspector.safety_check()) + + # A backing file makes it unsafe + fn = self._create_img('qcow2', 5 * units.Mi, None, + backing_file=backing_fn) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertFalse(inspector.safety_check()) + + # A data-file makes it unsafe + fn = self._create_img('qcow2', 5 * units.Mi, + options={'data_file': data_fn, + 'data_file_raw': 'on'}) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertFalse(inspector.safety_check()) + + # Trying to load a non-QCOW file is an error + self.assertRaises(format_inspector.ImageFormatError, + format_inspector.QcowInspector.from_file, + backing_fn) + + def test_qcow2_feature_flag_checks(self): + data = bytearray(512) + data[0:4] = b'QFI\xFB' + inspector = format_inspector.QcowInspector() + inspector.region('header').data = data + + # All zeros, no feature flags - all good + self.assertFalse(inspector.has_unknown_features) + + # A feature flag set in the first byte (highest-order) is not + # something we know about, so fail. + data[0x48] = 0x01 + self.assertTrue(inspector.has_unknown_features) + + # The first bit in the last byte (lowest-order) is known (the dirty + # bit) so that should pass + data[0x48] = 0x00 + data[0x4F] = 0x01 + self.assertFalse(inspector.has_unknown_features) + + # Currently (as of 2024), the high-order feature flag bit in the low- + # order byte is not assigned, so make sure we reject it. + data[0x4F] = 0x80 + self.assertTrue(inspector.has_unknown_features) + def test_vdi(self): self._test_format('vdi') From 06de67f48562395c5ab4fdde940b2b9d290e6177 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 18 Apr 2024 14:51:52 -0700 Subject: [PATCH 5/9] Add VMDK safety check This makes us check the extent filenames to make sure they don't have any banned characters in them (i.e. slashes). It also makes us reject VMDK files with a footer. Since we process these files as a stream, we can't honor a footer that directs us to find the descriptor block in a location we've already processed. Thus, if a file indicates it has a footer, consider it a policy exception and unsupported. Change-Id: I4a1c6dff7854c49940a0ac7988722aa6befc04fa (cherry picked from commit c1bf35dffb7f4c3090b1f04cf0e27cb431330c3e) (cherry picked from commit d3f1d6159c0218ac01e8d881e2ec4da71fc952ee) (cherry picked from commit 2dd4d138d4b8e1d9ca69fc0dda3711553a65d912) (cherry picked from commit 812e56d19f456e99e2277b99046330e1ad6a4ca7) --- glance/common/format_inspector.py | 64 ++++++++++++++++++- .../unit/common/test_format_inspector.py | 56 ++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index f827e9910f..8329cdfcc8 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -650,6 +650,7 @@ class VMDKInspector(FileInspector): # at 0x200 and 1MB - 1 DESC_OFFSET = 0x200 DESC_MAX_SIZE = (1 << 20) - 1 + GD_AT_END = 0xffffffffffffffff def __init__(self, *a, **k): super(VMDKInspector, self).__init__(*a, **k) @@ -662,8 +663,9 @@ def post_process(self): if not self.region('header').complete: return - sig, ver, _flags, _sectors, _grain, desc_sec, desc_num = struct.unpack( - '<4sIIQQQQ', self.region('header').data[:44]) + (sig, ver, _flags, _sectors, _grain, desc_sec, desc_num, + _numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack( + '<4sIIQQQQIQQ', self.region('header').data[:64]) if sig != b'KDMV': raise ImageFormatError('Signature KDMV not found: %r' % sig) @@ -671,6 +673,11 @@ def post_process(self): if ver not in (1, 2, 3): raise ImageFormatError('Unsupported format version %i' % ver) + if gdOffset == self.GD_AT_END: + # This means we have a footer, which takes precedence over the + # header, which we cannot support since we stream. + raise ImageFormatError('Unsupported VMDK footer') + # Since we parse both desc_sec and desc_num (the location of the # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a # check on the bounds to create a reasonable CaptureRegion. This @@ -718,6 +725,59 @@ def virtual_size(self): return sectors * 512 + def safety_check(self): + if (not self.has_region('descriptor') or + not self.region('descriptor').complete): + return False + + try: + # Descriptor is padded to 512 bytes + desc_data = self.region('descriptor').data.rstrip(b'\x00') + # Descriptor is actually case-insensitive ASCII text + desc_text = desc_data.decode('ascii').lower() + except UnicodeDecodeError: + LOG.error('VMDK descriptor failed to decode as ASCII') + raise ImageFormatError('Invalid VMDK descriptor data') + + extent_access = ('rw', 'rdonly', 'noaccess') + header_fields = [] + extents = [] + ddb = [] + + # NOTE(danms): Cautiously parse the VMDK descriptor. Each line must + # be something we understand, otherwise we refuse it. + for line in [x.strip() for x in desc_text.split('\n')]: + if line.startswith('#') or not line: + # Blank or comment lines are ignored + continue + elif line.startswith('ddb'): + # DDB lines are allowed (but not used by us) + ddb.append(line) + elif '=' in line and ' ' not in line.split('=')[0]: + # Header fields are a single word followed by an '=' and some + # value + header_fields.append(line) + elif line.split(' ')[0] in extent_access: + # Extent lines start with one of the three access modes + extents.append(line) + else: + # Anything else results in a rejection + LOG.error('Unsupported line %r in VMDK descriptor', line) + raise ImageFormatError('Invalid VMDK descriptor data') + + # Check all the extent lines for concerning content + for extent_line in extents: + if '/' in extent_line: + LOG.error('Extent line %r contains unsafe characters', + extent_line) + return False + + if not extents: + LOG.error('VMDK file specified no extents') + return False + + return True + def __str__(self): return 'vmdk' diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index c6f1f87a8f..4f0936dd85 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -283,6 +283,62 @@ def test_qcow2_feature_flag_checks(self): data[0x4F] = 0x80 self.assertTrue(inspector.has_unknown_features) + def test_vmdk_safety_checks(self): + region = format_inspector.CaptureRegion(0, 0) + inspector = format_inspector.VMDKInspector() + inspector.new_region('descriptor', region) + + # This should be a legit VMDK descriptor which comments, blank lines, + # an extent, some ddb content, and some header values. + legit_desc = ['# This is a comment', + '', + ' ', + 'createType=monolithicSparse', + 'RW 1234 SPARSE "foo.vmdk"', + 'ddb.adapterType = "MFM', + '# EOF'] + region.data = ('\n'.join(legit_desc)).encode('ascii') + region.length = len(region.data) + self.assertTrue(inspector.safety_check()) + + # Any of these lines should trigger an error indicating that there is + # something in the descriptor we don't understand + bad_lines = [ + '#\U0001F4A9', + 'header Name=foo', + 'foo bar', + 'WR 123 SPARSE "foo.vmdk"', + ] + + for bad_line in bad_lines: + # Encode as UTF-8 purely so we can test that anything non-ASCII + # will trigger the decode check + region.data = bad_line.encode('utf-8') + region.length = len(region.data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'Invalid VMDK descriptor', + inspector.safety_check) + + # Extents with slashes in the name fail the safety check + region.data = b'RW 123 SPARSE "/etc/shadow"' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + # A descriptor that specifies no extents fails the safety check + region.data = b'# Nothing' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + def test_vmdk_reject_footer(self): + data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0, + format_inspector.VMDKInspector.GD_AT_END) + inspector = format_inspector.VMDKInspector() + inspector.region('header').data = data + inspector.region('header').length = len(data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'footer', + inspector.post_process) + def test_vdi(self): self._test_format('vdi') From 9f671a8cac7f7f93a12ef42d8c17623eb5b4a23b Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 16 Apr 2024 11:20:48 -0700 Subject: [PATCH 6/9] Reject unsafe qcow and vmdk files This causes us to use the format inspector to pre-examine qcow and vmdk files for safe configurations before even using qemu-img on them. Change-Id: I0554706368e573e11f649c09569f7c21cbc8634b Closes-Bug: #2059809 (cherry picked from commit a95f335bca1dfdd1c904ae475e9fe8c6806f2c56) (cherry picked from commit 55fc42563818fcf88b474233df242a796c918b3a) (cherry picked from commit f1f53075d69a9a1c006b3e25506e30eb0210de1f) (cherry picked from commit c1c54abeaba91ae0030d4acf01a91339b60a2d7d) --- .../async_/flows/plugins/image_conversion.py | 44 +++++++++++--- .../flows/plugins/test_image_conversion.py | 57 ++++++++++++++++--- 2 files changed, 87 insertions(+), 14 deletions(-) diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index 4a9f754dc2..6f5199c82d 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -25,6 +25,7 @@ from taskflow import task from glance.async_ import utils +from glance.common import format_inspector from glance.i18n import _, _LI LOG = logging.getLogger(__name__) @@ -87,8 +88,40 @@ def _execute(self, action, file_path, **kwargs): 'target': target_format} self.dest_path = dest_path + source_format = action.image_disk_format + inspector_cls = format_inspector.get_inspector(source_format) + if not inspector_cls: + # We cannot convert from disk_format types that qemu-img doesn't + # support (like iso, ploop, etc). The ones it supports overlaps + # with the ones we have inspectors for, so reject conversion for + # any format we don't have an inspector for. + raise RuntimeError( + 'Unable to convert from format %s' % source_format) + + # Use our own cautious inspector module (if we have one for this + # format) to make sure a file is the format the submitter claimed + # it is and that it passes some basic safety checks _before_ we run + # qemu-img on it. + # See https://bugs.launchpad.net/nova/+bug/2059809 for details. + try: + inspector = inspector_cls.from_file(src_path) + if not inspector.safety_check(): + LOG.error('Image failed %s safety check; aborting conversion', + source_format) + raise RuntimeError('Image has disallowed configuration') + except RuntimeError: + raise + except format_inspector.ImageFormatError as e: + LOG.error('Image claimed to be %s format failed format ' + 'inspection: %s', source_format, e) + raise RuntimeError('Image format detection failed') + except Exception as e: + LOG.exception('Unknown error inspecting image format: %s', e) + raise RuntimeError('Unable to inspect image') + try: stdout, stderr = putils.trycmd("qemu-img", "info", + "-f", source_format, "--output=json", src_path, prlimit=utils.QEMU_IMG_PROC_LIMITS, @@ -105,13 +138,10 @@ def _execute(self, action, file_path, **kwargs): raise RuntimeError(stderr) metadata = json.loads(stdout) - try: - source_format = metadata['format'] - except KeyError: - msg = ("Failed to do introspection as part of image " - "conversion for %(iid)s: Source format not reported") - LOG.error(msg, {'iid': self.image_id}) - raise RuntimeError(msg) + if metadata.get('format') != source_format: + LOG.error('Image claiming to be %s reported as %s by qemu-img', + source_format, metadata.get('format', 'unknown')) + raise RuntimeError('Image metadata disagrees about format') virtual_size = metadata.get('virtual-size', 0) action.set_image_attribute(virtual_size=virtual_size) diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index bf8ca007d4..1942dcc43f 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import fixtures import json import os from unittest import mock @@ -24,6 +25,7 @@ import glance.async_.flows.api_image_import as import_flow import glance.async_.flows.plugins.image_conversion as image_conversion from glance.async_ import utils as async_utils +from glance.common import format_inspector from glance.common import utils from glance import domain from glance import gateway @@ -90,6 +92,11 @@ def setUp(self): self.image_id, self.task.task_id) + self.inspector_mock = mock.MagicMock() + self.useFixture(fixtures.MockPatch('glance.common.format_inspector.' + 'get_inspector', + self.inspector_mock)) + @mock.patch.object(os, 'stat') @mock.patch.object(os, 'remove') def test_image_convert_success(self, mock_os_remove, mock_os_stat): @@ -104,7 +111,7 @@ def test_image_convert_success(self, mock_os_remove, mock_os_stat): image = mock.MagicMock(image_id=self.image_id, virtual_size=None, extra_properties={ 'os_glance_import_task': self.task.task_id}, - disk_format='qcow2') + disk_format='raw') self.img_repo.get.return_value = image with mock.patch.object(processutils, 'execute') as exc_mock: @@ -126,7 +133,7 @@ def test_image_convert_success(self, mock_os_remove, mock_os_stat): self.assertEqual(456, image.virtual_size) self.assertEqual(123, image.size) - def _setup_image_convert_info_fail(self): + def _setup_image_convert_info_fail(self, disk_format='qcow2'): image_convert = image_conversion._ConvertImage(self.context, self.task.task_id, self.task_type, @@ -136,7 +143,7 @@ def _setup_image_convert_info_fail(self): image = mock.MagicMock(image_id=self.image_id, virtual_size=None, extra_properties={ 'os_glance_import_task': self.task.task_id}, - disk_format='qcow2') + disk_format=disk_format) self.img_repo.get.return_value = image return image_convert @@ -148,6 +155,7 @@ def test_image_convert_fails_inspection(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -164,6 +172,7 @@ def test_image_convert_inspection_reports_error(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -200,6 +209,36 @@ def test_image_convert_invalid_qcow_data_file(self): self.assertEqual('QCOW images with data-file set are not allowed', str(e)) + def test_image_convert_no_inspector_match(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value = None + self.assertRaisesRegex(RuntimeError, + 'Unable to convert from format', + convert.execute, 'file:///test/path.hpfs') + + def test_image_convert_fails_inspection_safety_check(self): + convert = self._setup_image_convert_info_fail() + inspector = self.inspector_mock.return_value.from_file.return_value + inspector.safety_check.return_value = False + self.assertRaisesRegex(RuntimeError, + 'Image has disallowed configuration', + convert.execute, 'file:///test/path.qcow') + + def test_image_convert_fails_inspection_format_check(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value.from_file.side_effect = ( + format_inspector.ImageFormatError()) + self.assertRaisesRegex(RuntimeError, + 'Image format detection failed', + convert.execute, 'file:///test/path.qcow') + + def test_image_convert_fails_inspection_error(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value.from_file.side_effect = ValueError + self.assertRaisesRegex(RuntimeError, + 'Unable to inspect image', + convert.execute, 'file:///test/path.qcow') + def _test_image_convert_invalid_vmdk(self): data = {'format': 'vmdk', 'format-specific': { @@ -207,7 +246,7 @@ def _test_image_convert_invalid_vmdk(self): 'create-type': 'monolithicFlat', }}} - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='vmdk') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.return_value = json.dumps(data), '' convert.execute('file:///test/path.vmdk') @@ -236,7 +275,7 @@ def test_image_convert_valid_vmdk(self): self._test_image_convert_invalid_vmdk) def test_image_convert_fails(self): - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='raw') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = [('{"format":"raw"}', ''), OSError('convert_fail')] @@ -244,6 +283,7 @@ def test_image_convert_fails(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_has_calls( [mock.call('qemu-img', 'info', + '-f', 'raw', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -256,7 +296,7 @@ def test_image_convert_fails(self): self.img_repo.save.assert_not_called() def test_image_convert_reports_fail(self): - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='raw') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = [('{"format":"raw"}', ''), ('', 'some error')] @@ -264,6 +304,7 @@ def test_image_convert_reports_fail(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_has_calls( [mock.call('qemu-img', 'info', + '-f', 'raw', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -281,9 +322,10 @@ def test_image_convert_fails_source_format(self): exc_mock.return_value = ('{}', '') exc = self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.raw') - self.assertIn('Source format not reported', str(exc)) + self.assertIn('Image metadata disagrees about format', str(exc)) exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -301,6 +343,7 @@ def test_image_convert_same_format_does_nothing(self): # Make sure we only called qemu-img for inspection, not conversion exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.qcow', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, From 64952eb96bafe5e82ab03911123848d460df3bc8 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 27 Jun 2024 09:33:55 -0700 Subject: [PATCH 7/9] Add QED format detection to format_inspector This merely recognizes this format and always marks it as unsafe because no service supports it. This prevents someone from uploading one that we will ask qemu-img to inspect. Change-Id: Ieea7b7eb0f380571bd4937cded920776e05f7ec4 (cherry picked from commit 4096c5aff1d046d5c28d0e4a69b3c9574e9e8cc8) (cherry picked from commit ba98022b98ef5b9c98e1d7d20c88e1ca4b23fa80) (cherry picked from commit a8dadcd7994c88a61f94341286b4bcd693b51b32) (cherry picked from commit 9b3faf376bfc911c7cabde458507a0c65d63d70c) --- glance/common/format_inspector.py | 18 ++++++++++++++++++ .../tests/unit/common/test_format_inspector.py | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 8329cdfcc8..08cc5a891d 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -366,6 +366,23 @@ def safety_check(self): not self.has_unknown_features) +class QEDInspector(FileInspector): + def __init__(self, tracing=False): + super().__init__(tracing) + self.new_region('header', CaptureRegion(0, 512)) + + @property + def format_match(self): + if not self.region('header').complete: + return False + return self.region('header').data.startswith(b'QED\x00') + + def safety_check(self): + # QED format is not supported by anyone, but we want to detect it + # and mark it as just always unsafe. + return False + + # The VHD (or VPC as QEMU calls it) format consists of a big-endian # 512-byte "footer" at the beginning of the file with various # information, most of which does not matter to us: @@ -879,6 +896,7 @@ def get_inspector(format_name): 'vhdx': VHDXInspector, 'vmdk': VMDKInspector, 'vdi': VDIInspector, + 'qed': QEDInspector, } return formats.get(format_name) diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 4f0936dd85..712db4453a 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -217,6 +217,12 @@ def test_vmdk_bad_descriptor_mem_limit(self): 'Format used more than 1.5MiB of memory: %s' % ( fmt.context_info)) + def test_qed_always_unsafe(self): + img = self._create_img('qed', 10 * units.Mi) + fmt = format_inspector.get_inspector('qed').from_file(img) + self.assertTrue(fmt.format_match) + self.assertFalse(fmt.safety_check()) + def test_from_file_reads_minimum(self): img = self._create_img('qcow2', 10 * units.Mi) file_size = os.stat(img).st_size From be64ede3a35ae6765446d35c01e97e1a23f8fa86 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 18 Apr 2024 08:25:24 -0700 Subject: [PATCH 8/9] Add file format detection to format_inspector Change-Id: If0a4251465507be035ffaf9d855299611637cfa9 (cherry picked from commit 79271eaa5c742a1741321198c43807857fb6ed94) (cherry picked from commit e1c36248c7660dea1bedfa8f1c0711a4b97d279c) (cherry picked from commit d54121d6a937fd50aae1018aede228a3c0985dce) (cherry picked from commit 1656fe8832db6cfff96dad09b13584b0037187c5) --- glance/common/format_inspector.py | 54 ++++++++++++++---- .../unit/common/test_format_inspector.py | 56 ------------------- 2 files changed, 43 insertions(+), 67 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 08cc5a891d..d07350e0d9 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -883,20 +883,52 @@ def close(self): self._source.close() +ALL_FORMATS = { + 'raw': FileInspector, + 'qcow2': QcowInspector, + 'vhd': VHDInspector, + 'vhdx': VHDXInspector, + 'vmdk': VMDKInspector, + 'vdi': VDIInspector, + 'qed': QEDInspector, +} + + def get_inspector(format_name): """Returns a FormatInspector class based on the given name. :param format_name: The name of the disk_format (raw, qcow2, etc). :returns: A FormatInspector or None if unsupported. """ - formats = { - 'raw': FileInspector, - 'qcow2': QcowInspector, - 'vhd': VHDInspector, - 'vhdx': VHDXInspector, - 'vmdk': VMDKInspector, - 'vdi': VDIInspector, - 'qed': QEDInspector, - } - - return formats.get(format_name) + + return ALL_FORMATS.get(format_name) + + +def detect_file_format(filename): + """Attempts to detect the format of a file. + + This runs through a file one time, running all the known inspectors in + parallel. It stops reading the file once one of them matches or all of + them are sure they don't match. + + Returns the FileInspector that matched, if any. None if 'raw'. + """ + inspectors = {k: v() for k, v in ALL_FORMATS.items()} + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + for format, inspector in list(inspectors.items()): + try: + inspector.eat_chunk(chunk) + except ImageFormatError: + # No match, so stop considering this format + inspectors.pop(format) + continue + if (inspector.format_match and inspector.complete and + format != 'raw'): + # First complete match (other than raw) wins + return inspector + if all(i.complete for i in inspectors.values()): + # If all the inspectors are sure they are not a match, avoid + # reading to the end of the file to settle on 'raw'. + break + return inspectors['raw'] diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 712db4453a..bd9011afe6 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -289,62 +289,6 @@ def test_qcow2_feature_flag_checks(self): data[0x4F] = 0x80 self.assertTrue(inspector.has_unknown_features) - def test_vmdk_safety_checks(self): - region = format_inspector.CaptureRegion(0, 0) - inspector = format_inspector.VMDKInspector() - inspector.new_region('descriptor', region) - - # This should be a legit VMDK descriptor which comments, blank lines, - # an extent, some ddb content, and some header values. - legit_desc = ['# This is a comment', - '', - ' ', - 'createType=monolithicSparse', - 'RW 1234 SPARSE "foo.vmdk"', - 'ddb.adapterType = "MFM', - '# EOF'] - region.data = ('\n'.join(legit_desc)).encode('ascii') - region.length = len(region.data) - self.assertTrue(inspector.safety_check()) - - # Any of these lines should trigger an error indicating that there is - # something in the descriptor we don't understand - bad_lines = [ - '#\U0001F4A9', - 'header Name=foo', - 'foo bar', - 'WR 123 SPARSE "foo.vmdk"', - ] - - for bad_line in bad_lines: - # Encode as UTF-8 purely so we can test that anything non-ASCII - # will trigger the decode check - region.data = bad_line.encode('utf-8') - region.length = len(region.data) - self.assertRaisesRegex(format_inspector.ImageFormatError, - 'Invalid VMDK descriptor', - inspector.safety_check) - - # Extents with slashes in the name fail the safety check - region.data = b'RW 123 SPARSE "/etc/shadow"' - region.length = len(region.data) - self.assertFalse(inspector.safety_check()) - - # A descriptor that specifies no extents fails the safety check - region.data = b'# Nothing' - region.length = len(region.data) - self.assertFalse(inspector.safety_check()) - - def test_vmdk_reject_footer(self): - data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0, - format_inspector.VMDKInspector.GD_AT_END) - inspector = format_inspector.VMDKInspector() - inspector.region('header').data = data - inspector.region('header').length = len(data) - self.assertRaisesRegex(format_inspector.ImageFormatError, - 'footer', - inspector.post_process) - def test_vdi(self): self._test_format('vdi') From 1ae4324749595c66f7f0ca836618c7e7c6b5e77b Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Tue, 2 Jul 2024 11:53:01 +0100 Subject: [PATCH 9/9] Backport fix ups for Zed --- glance/tests/unit/common/test_format_inspector.py | 2 +- glance/tests/unit/v2/test_tasks_resource.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index bd9011afe6..e9964fd8fa 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -77,7 +77,7 @@ def _create_img(self, fmt, size, subformat=None, options=None, suffix='.%s' % fmt) self._created_files.append(fn) subprocess.check_output( - 'qemu-img create -f %s %s %i' % (fmt, fn, size), + 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), shell=True) return fn diff --git a/glance/tests/unit/v2/test_tasks_resource.py b/glance/tests/unit/v2/test_tasks_resource.py index 754612b11f..15562724b7 100644 --- a/glance/tests/unit/v2/test_tasks_resource.py +++ b/glance/tests/unit/v2/test_tasks_resource.py @@ -415,8 +415,8 @@ def test_create_with_wrong_import_form(self): "non-local source of image data.") else: supported = ['http', ] - msg = ("The given uri is not valid. Please specify a " - "valid uri from the following list of supported uri " + msg = ("The given URI is not valid. Please specify a " + "valid URI from the following list of supported URI " "%(supported)s") % {'supported': supported} self.assertEqual(msg, final_task.message)