diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 4edd47d98013..da0b14660668 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -14,6 +14,7 @@ from textwrap import dedent from unittest import mock +import packaging import pretend import psycopg import pytest @@ -76,25 +77,40 @@ def _get_tar_testdata(compression_type=""): return temp_f.getvalue() -def _get_whl_testdata(name="fake_package", version="1.0"): +def _format_tags(tags: str) -> bytes: + """ + Build a mock WHEEL file from the given tags. + """ + tags = packaging.tags.parse_tag(tags) + formatted = "Wheel-Version: 1.0\n" + "\n".join(f"Tag: {tag}" for tag in tags) + return formatted.encode("utf-8") + + +def _get_whl_testdata( + name="fake_package", + version="1.0", + tags: bytes | None = _format_tags("py3-none-any"), +): temp_f = io.BytesIO() with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr(f"{name}-{version}.dist-info/METADATA", "Fake metadata") + if tags: + zfp.writestr(f"{name}-{version}.dist-info/WHEEL", tags) zfp.writestr(f"{name}-{version}.dist-info/licenses/LICENSE.MIT", "Fake License") zfp.writestr( f"{name}-{version}.dist-info/licenses/LICENSE.APACHE", "Fake License" ) - zfp.writestr( - f"{name}-{version}.dist-info/RECORD", - dedent( - f"""\ - {name}-{version}.dist-info/METADATA, - {name}-{version}.dist-info/licenses/LICENSE.MIT, - {name}-{version}.dist-info/licenses/LICENSE.APACHE, - {name}-{version}.dist-info/RECORD, - """, - ), - ) + record = dedent( + f"""\ + {name}-{version}.dist-info/METADATA, + {name}-{version}.dist-info/licenses/LICENSE.MIT, + {name}-{version}.dist-info/licenses/LICENSE.APACHE, + {name}-{version}.dist-info/RECORD, + """, + ) + if tags: + record += f"{name}-{version}.dist-info/WHEEL,\n" + zfp.writestr(f"{name}-{version}.dist-info/RECORD", record) return temp_f.getvalue() @@ -349,37 +365,101 @@ def test_zipfile_exceeds_compression_threshold(self, tmpdir): "File exceeds compression ratio of 50", ) - def test_wheel_no_wheel_file(self, tmpdir): - f = str(tmpdir.join("test-1.0-py3-none-any.whl")) - - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") + @pytest.mark.parametrize( + ("tags_filename", "tags_file", "error"), + [ + ( + "py3-none-macosx_11_0_arm64", + b"Tag: py3-none-macosx_13_0_arm64", + "Wheel filename and WHEEL file tags mismatch: " + + "'py3-none-macosx_11_0_arm64' vs. 'py3-none-macosx_13_0_arm64'", + ), + ( + "py2-none-any", + b"Tag: py2-none-any\nTag: py3-none-any", + "WHEEL file has tags not in wheel filename: 'py3-none-any'", + ), + ( + "py2.py3-none-any", + b"Tag: py3-none-any", + "Wheel filename has tags not in WHEEL file: 'py2-none-any'", + ), + ( + "py2.py3-none-any", + b"Tag: py2.py3-none-any", + "Tags in WHEEL file must be expanded, not compressed: " + + "'py2.py3-none-any'", + ), + ( + "py3-none-macosx_11_0_arm64", + b"\xff\x00\xff", + "WHEEL file is not UTF-8 encoded", + ), + ( + "py3-none-macosx_11_0_arm64", + None, + "WHEEL not found at -1.0.dist-info/WHEEL", + ), + ], + ) + def test_upload_wheel_tag_mismatches( + self, + pyramid_config, + db_request, + tags_filename, + tags_file, + error, + ): + user = UserFactory.create() + EmailFactory.create(user=user) + project = ProjectFactory.create() + release = ReleaseFactory.create(project=project, version="1.0") + RoleFactory.create(user=user, project=project) - assert legacy._is_valid_dist_file(f, "bdist_wheel") == ( - False, - "WHEEL not found at test-1.0.dist-info/WHEEL", + name = project.normalized_name.replace("-", "_") + filename = f"{name}-{release.version}-{tags_filename}.whl" + data = _get_whl_testdata( + name=name, + version="1.0", + tags=tags_file, ) + digest = hashlib.md5(data).hexdigest() - def test_wheel_has_wheel_file(self, tmpdir): - f = str(tmpdir.join("test-1.0-py3-none-any.whl")) - - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") - zfp.writestr("test-1.0.dist-info/WHEEL", b"this is the package info") + pyramid_config.testing_securitypolicy(identity=user) + db_request.user = user + db_request.user_agent = "warehouse-tests/6.6.6" + db_request.POST = MultiDict( + { + "metadata_version": "2.4", + "name": project.name, + "version": "1.0", + "summary": "This is my summary!", + "filetype": "bdist_wheel", + "md5_digest": digest, + "content": pretend.stub( + filename=filename, + file=io.BytesIO(data), + type="application/zip", + ), + } + ) + db_request.POST.extend([("pyversion", "py3")]) - assert legacy._is_valid_dist_file(f, "bdist_wheel") == (True, None) + storage_service = pretend.stub(store=lambda path, filepath, meta: None) + db_request.find_service = lambda svc, name=None, context=None: { + IFileStorage: storage_service, + }.get(svc) - def test_invalid_wheel_filename(self, tmpdir): - f = str(tmpdir.join("cheese.whl")) + with pytest.raises(HTTPBadRequest) as excinfo: + legacy.file_upload(db_request) - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") - zfp.writestr("test-1.0.dist-info/WHEEL", b"this is the package info") + resp = excinfo.value - assert legacy._is_valid_dist_file(f, "bdist_wheel") == ( - False, - "Unable to parse name and version from wheel filename", + assert resp.status_code == 400 + rendered_error = f"400 Invalid distribution file. {error}".replace( + "", name ) + assert resp.status == rendered_error def test_incorrect_number_of_top_level_directories_sdist_tar(self, tmpdir): tar_fn = str(tmpdir.join("test.tar.gz")) @@ -2875,7 +2955,9 @@ def test_upload_succeeds_with_wheel( project.normalized_name.replace("-", "_"), release.version, plat ) filebody = _get_whl_testdata( - name=project.normalized_name.replace("-", "_"), version=release.version + name=project.normalized_name.replace("-", "_"), + version=release.version, + tags=_format_tags(f"cp34-none-{plat}"), ) filestoragehash = _storage_hash(filebody) @@ -3225,7 +3307,9 @@ def test_upload_succeeds_with_wheel_after_sdist( release.version, ) filebody = _get_whl_testdata( - name=project.normalized_name.replace("-", "_"), version=release.version + name=project.normalized_name.replace("-", "_"), + version=release.version, + tags=_format_tags("cp34-none-any"), ) filestoragehash = _storage_hash(filebody) @@ -3524,6 +3608,10 @@ def test_upload_warns_with_mismatched_wheel_and_zip_contents( with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr("some_file", "some_data") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", f"{project_name}-{release.version}.dist-info/RECORD,", @@ -3599,11 +3687,16 @@ def test_upload_record_does_not_warn_with_zip_dir( zfp.mkdir(f"{project_name}-{release.version}.dist-info/") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", dedent( f"""\ {project_name}-{release.version}.dist-info/METADATA, + {project_name}-{release.version}.dist-info/WHEEL, {project_name}-{release.version}.dist-info/RECORD, """, ), @@ -3669,11 +3762,16 @@ def test_upload_record_does_not_warn_windows_path_separators( project_name = project.normalized_name.replace("-", "_") with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", dedent( f"""\ {project_name}-{release.version}.dist-info\\METADATA, + {project_name}-{release.version}.dist-info\\WHEEL, {project_name}-{release.version}.dist-info\\RECORD, """, ), @@ -3744,6 +3842,10 @@ def test_upload_record_check_does_not_include_jws_p7s( zfp.mkdir(f"{project_name}-{release.version}.dist-info/") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/{exempt_filename}", "" ) @@ -3752,6 +3854,7 @@ def test_upload_record_check_does_not_include_jws_p7s( dedent( f"""\ {project_name}-{release.version}.dist-info/METADATA, + {project_name}-{release.version}.dist-info/WHEEL, {project_name}-{release.version}.dist-info/RECORD, """, ), @@ -3818,6 +3921,10 @@ def test_upload_record_check_exclusions_only_in_dist_info( with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr("some_file", "some_data") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr(f"not-dist-info/{exempt_filename}", "") zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index c6d1f4677036..292387ebf31c 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -1,4 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import email.parser +import email.policy import hashlib import hmac import os.path @@ -11,6 +13,7 @@ import packaging.requirements import packaging.specifiers +import packaging.tags import packaging.utils import packaging.version import packaging_legacy.version @@ -280,7 +283,7 @@ def _validate_filename(filename, filetype): ) -def _is_valid_dist_file(filename, filetype): +def _is_valid_dist_file(filename: str, filetype: str): """ Perform some basic checks to see whether the indicated file could be a valid distribution file. @@ -335,19 +338,6 @@ def _is_valid_dist_file(filename, filetype): zfp.read(target_file) except KeyError: return False, f"PKG-INFO not found at {target_file}" - if filename.endswith(".whl"): - try: - name, version, _ = os.path.basename(filename).split("-", 2) - except ValueError: - return ( - False, - "Unable to parse name and version from wheel filename", - ) - target_file = os.path.join(f"{name}-{version}.dist-info", "WHEEL") - try: - zfp.read(target_file) - except KeyError: - return False, f"WHEEL not found at {target_file}" # Check the ZIP file record framing # to avoid parser differentials. @@ -390,6 +380,55 @@ def _is_valid_dist_file(filename, filetype): return True, None +def _validate_wheel_file(contents: bytes, tags: frozenset[packaging.tags.Tag]): + """ + Check whether the WHEEL file matches the wheel filename tags. + + A mismatch can happen when manually editing the wheel filename without also editing + the dist-info WHEEL file. + """ + filename_tags = {str(tag) for tag in tags} + dist_info_tags = set() + + try: + content = contents.decode("utf-8") + except UnicodeDecodeError: + raise ValueError("WHEEL file is not UTF-8 encoded") + + data = email.parser.HeaderParser(policy=email.policy.compat32).parsestr(content) + for key, value in data.items(): + if key != "Tag": + continue + if "." in value: + raise ValueError( + f"Tags in WHEEL file must be expanded, not compressed: '{value}'", + ) + dist_info_tags.add(value) + + # Handle the case that there's no overlap, usually two different tags. + if not (dist_info_tags & filename_tags): + formatted_filename_tags = "', '".join(sorted(filename_tags)) + formatted_dist_info_tags = "', '".join(sorted(dist_info_tags)) + raise ValueError( + "Wheel filename and WHEEL file tags mismatch: " + + f"'{formatted_filename_tags}' vs. '{formatted_dist_info_tags}'", + ) + + only_dist_info_tags = dist_info_tags - filename_tags + if only_dist_info_tags: + formatted_tags = "', '".join(sorted(only_dist_info_tags)) + raise ValueError( + f"WHEEL file has tags not in wheel filename: '{formatted_tags}'", + ) + + only_filename_tags = filename_tags - dist_info_tags + if only_filename_tags: + formatted_tags = "', '".join(sorted(only_filename_tags)) + raise ValueError( + f"Wheel filename has tags not in WHEEL file: '{formatted_tags}'", + ) + + def _is_duplicate_file(db_session, filename, hashes): """ Check to see if file already exists, and if it's content matches. @@ -1500,6 +1539,35 @@ def file_upload(request): k: h.hexdigest().lower() for k, h in metadata_file_hashes.items() } + with zipfile.ZipFile(temporary_filename) as zfp: + target_file = os.path.join(f"{name}-{version}.dist-info", "WHEEL") + try: + wheel_file = zfp.read(target_file) + _validate_wheel_file(wheel_file, tags) + except KeyError: + request.metrics.increment( + "warehouse.upload.failed", + tags=[ + "reason:invalid-distribution-file", + f"filetype:{form.filetype.data}", + ], + ) + raise _exc_with_message( + HTTPBadRequest, + f"Invalid distribution file. WHEEL not found at {target_file}", + ) + except ValueError as reason: + request.metrics.increment( + "warehouse.upload.failed", + tags=[ + "reason:invalid-distribution-file", + f"filetype:{form.filetype.data}", + ], + ) + raise _exc_with_message( + HTTPBadRequest, f"Invalid distribution file. {reason}" + ) + # If the user provided attestations, verify them # We persist these attestations subsequently, only after the # release file is persisted.