diff --git a/morgan/__init__.py b/morgan/__init__.py index 755fed9..6597210 100644 --- a/morgan/__init__.py +++ b/morgan/__init__.py @@ -1,7 +1,7 @@ import argparse import configparser import hashlib -import json +import inspect import os import os.path import re @@ -20,13 +20,13 @@ from morgan import configurator, metadata, server from morgan.__about__ import __version__ -from morgan.utils import Cache, to_single_dash +from morgan.utils import RCACHE, Cache, to_single_dash PYPI_ADDRESS = "https://pypi.org/simple/" PREFERRED_HASH_ALG = "sha256" -class Mirrorer: +class Mirrorer: # pylint: disable=too-few-public-methods """ Mirrorer is a class that implements the mirroring capabilities of Morgan. A class is used to maintain state, as the mirrorer needs to keep track of @@ -34,7 +34,7 @@ class Mirrorer: them again as dependencies. """ - def __init__(self, args: argparse.Namespace): + def __init__(self, args: argparse.Namespace, config: str): """ The constructor only needs to path to the package index. """ @@ -45,7 +45,7 @@ def __init__(self, args: argparse.Namespace): self.index_url = args.index_url self.mirror_all_versions: bool = args.mirror_all_versions self.config = configparser.ConfigParser() - self.config.read(args.config) + self.config.read(config) self.envs = {} self._supported_pyversions = [] self._supported_platforms = [] @@ -101,26 +101,6 @@ def mirror(self, requirement_string: str): next_deps.update(more_deps) deps = next_deps.copy() - def copy_server(self): - """ - Copy the server script to the package index. This method will first - attempt to find the server file directly, and if that fails, it will - use the inspect module to get the source code. - """ - - print("Copying server script") - thispath = os.path.realpath(__file__) - serverpath = os.path.join(os.path.dirname(thispath), "server.py") - outpath = os.path.join(self.index_path, "server.py") - if os.path.exists(serverpath): - with open(serverpath, "rb") as inp, open(outpath, "wb") as out: - out.write(inp.read()) - else: - import inspect - - with open(outpath, "w") as out: - out.write(inspect.getsource(server)) - def _mirror( self, requirement: packaging.requirements.Requirement, @@ -134,33 +114,10 @@ def _mirror( else: print("{}".format(requirement)) - data: dict = None - - # get information about this package from the Simple API in JSON - # format as per PEP 691 - request = urllib.request.Request( - "{}{}/".format(self.index_url, requirement.name), - headers={ - "Accept": "application/vnd.pypi.simple.v1+json", - }, - ) - - response_url = "" - with urllib.request.urlopen(request) as response: - data = json.load(response) - response_url = str(response.url) - - # check metadata version ~1.0 - v_str = data["meta"]["api-version"] - if not v_str: - v_str = "1.0" - v_int = [int(i) for i in v_str.split(".")[:2]] - if v_int[0] != 1: - raise Exception(f"Unsupported metadata version {v_str}, only support 1.x") + data: dict = RCACHE.get(self.index_url, requirement.name) + response_url = data['response_url'] files = data["files"] - if files is None or not isinstance(files, list): - raise Exception("Expected response to contain a list of 'files'") # filter and enrich files files = self._filter_files(requirement, required_by, files) @@ -172,7 +129,7 @@ def _mirror( # for any of our environments and don't return an error return None - if len(files) == 0: + if not files: raise Exception(f"No files match requirement {requirement}") # download all files @@ -200,51 +157,6 @@ def _filter_files( required_by: packaging.requirements.Requirement, files: Iterable[dict], ) -> Iterable[dict]: - # remove files with unsupported extensions - files = list( - filter( - lambda file: re.search(r"\.(whl|zip|tar.gz)$", file["filename"]), files - ) - ) - - # parse versions and platform tags for each file - for file in files: - try: - if re.search(r"\.whl$", file["filename"]): - _, file["version"], ___, file["tags"] = ( - packaging.utils.parse_wheel_filename(file["filename"]) - ) - file["is_wheel"] = True - elif re.search(r"\.(tar\.gz|zip)$", file["filename"]): - _, file["version"] = packaging.utils.parse_sdist_filename( - # fix: selenium-2.0-dev-9429.tar.gz -> 9429 - to_single_dash(file["filename"]) - ) - file["is_wheel"] = False - file["tags"] = None - except (packaging.version.InvalidVersion, - packaging.utils.InvalidSdistFilename, - packaging.utils.InvalidWheelFilename): - # old versions - # expandvars-0.6.0-macosx-10.15-x86_64.tar.gz - - # ignore files with invalid version, PyPI no longer allows - # packages with special versioning schemes, and we assume we - # can ignore such files - continue - except Exception: - print("\tSkipping file {}, exception caught".format(file["filename"])) - traceback.print_exc() - continue - - # sort all files by version in reverse order, and ignore yanked files - files = list( - filter( - lambda file: "version" in file and not file.get("yanked", False), files - ) - ) - files.sort(key=lambda file: file["version"], reverse=True) - # keep only files of the latest version that satisfies the # requirement (if requirement doesn't have any version specifiers, # take latest available version) @@ -255,15 +167,15 @@ def _filter_files( ) ) - if len(files) == 0: + if not files: print(f"Skipping {requirement}, no version matches requirement") return None # Now we only have files that satisfy the requirement, and we need to # filter out files that do not match our environments. - files = list(filter(lambda file: self._matches_environments(file), files)) + files = list(filter(self._matches_environments, files)) - if len(files) == 0: + if not files: print(f"Skipping {requirement}, no file matches environments") return None @@ -276,7 +188,8 @@ def _filter_files( return files def _matches_environments(self, fileinfo: dict) -> bool: - if req := fileinfo.get("requires-python", None): + req = fileinfo.get("requires-python", None) + if req: # The Python versions in all of our environments must be supported # by this file in order to match. # Some packages specify their required Python versions with a simple @@ -312,10 +225,7 @@ def _matches_environments(self, fileinfo: dict) -> bool: # check if the version matches any of the supported Pythons, and # only skip it if it does not match any. intrp_ver_matched = any( - map( - lambda supported_python: intrp_set.contains(supported_python), - self._supported_pyversions, - ) + map(intrp_set.contains, self._supported_pyversions) ) if ( @@ -495,25 +405,48 @@ def mirror(args: argparse.Namespace): times on the same index path, files are only downloaded if necessary. """ - m = Mirrorer(args) - for package in m.config["requirements"]: - reqs = m.config["requirements"][package].splitlines() - if not reqs: - # empty requirements - # morgan = - m.mirror(f"{package}") - else: - # multiline requirements - # urllib3 = - # <1.27 - # >=2 - # [brotli] - for req in reqs: - req = req.strip() - m.mirror(f"{package}{req}") + for c in args.config: + print('-----------------------------------------------') + print(f'config: {c}') + print('-----------------------------------------------') + m = Mirrorer(args, c) + for package in m.config["requirements"]: + reqs = m.config["requirements"][package].splitlines() + if not reqs: + # empty requirements + # morgan = + m.mirror(f"{package}") + else: + # multiline requirements + # urllib3 = + # <1.27 + # >=2 + # [brotli] + for req in reqs: + req = req.strip() + m.mirror(f"{package}{req}") if not args.skip_server_copy: - m.copy_server() + copy_server(args.index_path) + + +def copy_server(index_path: str): + """ + Copy the server script to the package index. This method will first + attempt to find the server file directly, and if that fails, it will + use the inspect module to get the source code. + """ + + print("Copying server script") + thispath = os.path.realpath(__file__) + serverpath = os.path.join(os.path.dirname(thispath), "server.py") + outpath = os.path.join(index_path, "server.py") + if os.path.exists(serverpath): + with open(serverpath, "rb") as inp, open(outpath, "wb") as out: + out.write(inp.read()) + else: + with open(outpath, "w") as out: + out.write(inspect.getsource(server)) def main(): @@ -550,12 +483,14 @@ def my_url(arg): type=my_url, help="Base URL of the Python Package Index", ) + + # one request cache for all configs parser.add_argument( "-c", "--config", dest="config", - nargs="?", - help="Config file (default: /morgan.ini)", + nargs="*", + help="Config files (default: /morgan.ini)", ) parser.add_argument( "--skip-server-copy", @@ -610,16 +545,19 @@ def my_url(arg): return if not args.config: - args.config = os.path.join(args.index_path, "morgan.ini") - if not os.path.isfile(args.config): - # If a file named in filenames cannot be opened, that file will be ignored - # https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read - raise argparse.ArgumentTypeError(f"Invalid config: {args.config}") + args.config = [ + os.path.join(args.index_path, "morgan.ini"), + ] + for c in args.config: + if not os.path.isfile(c): + # If a file named in filenames cannot be opened, that file will be ignored + # https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read + raise argparse.ArgumentTypeError(f"Invalid config: {c}") if args.command == "mirror": mirror(args) elif args.command == "copy_server": - Mirrorer(args).copy_server() + copy_server(args.index_path) if __name__ == "__main__": diff --git a/morgan/utils.py b/morgan/utils.py index 425efc1..3d7c307 100644 --- a/morgan/utils.py +++ b/morgan/utils.py @@ -1,6 +1,18 @@ +import json import re +import urllib.parse +import urllib.request +from dataclasses import dataclass, field +from typing import Dict, List from packaging.requirements import Requirement +from packaging.utils import ( + InvalidSdistFilename, + InvalidWheelFilename, + parse_sdist_filename, + parse_wheel_filename, +) +from packaging.version import InvalidVersion def to_single_dash(filename): @@ -42,3 +54,93 @@ def is_simple_case(self, req): if all(spec.operator in ('>', '>=') for spec in specifier._specs): return True return False + + +@dataclass +class RequestCache: # pylint: disable=too-few-public-methods + d: Dict[str, Dict] = field(default_factory=dict) # name: data + + def get(self, url: str, name: str) -> dict: + if name in self.d: + return self.d[name] + + if not url.endswith('/'): + url += '/' + + # get information about this package from the Simple API in JSON + # format as per PEP 691 + request = urllib.request.Request( + f"{url}{name}/", + headers={ + "Accept": "application/vnd.pypi.simple.v1+json", + }, + ) + + with urllib.request.urlopen(request) as response: + data = json.load(response) + data['response_url'] = str(response.url) + + # check metadata version ~1.0 + v_str = data["meta"]["api-version"] # 1.4 + if not v_str: + v_str = "1.0" + v_int = [int(i) for i in v_str.split(".")[:2]] + if v_int[0] != 1: + raise ValueError(f"Unsupported metadata version {v_str}, only support 1.x") + + files = data["files"] + if files is None or not isinstance(files, list): + raise ValueError("Expected response to contain a list of 'files'") + + data["files"] = enrich_files(files) + self.d[name] = data + return data + + +def enrich_files(files: List[Dict]) -> List[Dict]: + ''' + 1) remove files with unsupported extensions or yanked + 2) parse versions and platform tags for each file + (file["version"], file["tags"]) + ''' + + def _ext(file: dict) -> bool: + 'remove files with unsupported extensions or yanked' + f = file['filename'].endswith + y = file.get("yanked", False) + return not y and (f('.whl') or f('.zip') or f('.tar.gz')) + + def _parse(file: dict) -> bool: + 'parse versions and platform tags for each file' + name = file['filename'] + f = name.endswith + try: + if f('.whl'): + _, file["version"], _, file["tags"] = parse_wheel_filename(name) + file["is_wheel"] = True + elif f('.zip') or f('.tar.gz'): + _, file["version"] = parse_sdist_filename( + # fix: selenium-2.0-dev-9429.tar.gz -> 9429 + to_single_dash(name) + ) + file["is_wheel"] = False + file["tags"] = None + except (InvalidVersion, InvalidSdistFilename, InvalidWheelFilename): + # old versions + # expandvars-0.6.0-macosx-10.15-x86_64.tar.gz + + # ignore files with invalid version, PyPI no longer allows + # packages with special versioning schemes, and we assume we + # can ignore such files + return False + return True + + filter1 = (file for file in files if _ext(file)) + filter2 = (file for file in filter1 if _parse(file)) + + files2 = list(filter2) + files2.sort(key=lambda file: file["version"], reverse=True) + return files2 + + +RCACHE = RequestCache() diff --git a/tests/test_init.py b/tests/test_init.py index 7a164c6..ae44dfa 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -6,7 +6,8 @@ import packaging.requirements import pytest -from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server +from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server, copy_server +from morgan.utils import enrich_files class TestParseInterpreter: @@ -89,7 +90,7 @@ def test_mirrorer_initialization(self, temp_index_path): mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) assert mirrorer.index_path == temp_index_path assert mirrorer.index_url == "https://pypi.org/simple/" @@ -106,9 +107,9 @@ def test_server_file_copying(self, temp_index_path): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) - mirrorer.copy_server() + copy_server(args.index_path) expected_serverpath = os.path.join(temp_index_path, "server.py") assert os.path.exists( @@ -129,7 +130,7 @@ def test_file_hashing(self, temp_index_path): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) test_data = b"test content for hashing" test_file = os.path.join(temp_index_path, "test_artifact.whl") @@ -177,7 +178,7 @@ def _make_mirrorer(mirror_all_versions): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=mirror_all_versions, ) - return Mirrorer(args) + return Mirrorer(args, args.config) return _make_mirrorer @@ -195,12 +196,12 @@ def make_file(filename, **overrides): @pytest.fixture def sample_files(self): - return [ + return enrich_files([ self.make_file("sample_package-1.6.0.tar.gz"), self.make_file("sample_package-1.5.2.tar.gz"), self.make_file("sample_package-1.5.1.tar.gz"), self.make_file("sample_package-1.4.9.tar.gz"), - ] + ]) @staticmethod def extract_versions(files):