-
-
Notifications
You must be signed in to change notification settings - Fork 34
ENH Add extra_index_urls
and index_strategy
parameters to micropip.install(<...>)
#224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4aa3377
d9a1669
8ff6319
fefed1d
75b89b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -268,10 +268,12 @@ def _select_parser( | |
raise ValueError(f"Unsupported content type: {content_type}") | ||
|
||
|
||
async def query_package( | ||
# TODO: reduce mccabe complexity | ||
async def query_package( # noqa: C901 | ||
name: str, | ||
index_urls: list[str] | str, | ||
fetch_kwargs: dict[str, Any] | None = None, | ||
strategy: str = "first-index", | ||
) -> ProjectInfo: | ||
""" | ||
Query for a package from package indexes. | ||
|
@@ -286,6 +288,9 @@ async def query_package( | |
it finds a package. If no package is found, an error will be raised. | ||
fetch_kwargs | ||
Keyword arguments to pass to the fetch function. | ||
strategy | ||
Index strategy to use when querying multiple indexes. The default is "first-index". | ||
Valid values are: "first-index", "unsafe-first-match", and "unsafe-best-match". | ||
""" | ||
|
||
_fetch_kwargs = fetch_kwargs.copy() if fetch_kwargs else {} | ||
|
@@ -303,36 +308,101 @@ async def query_package( | |
|
||
index_urls = [PYPI_URL if url == PYPI else url for url in index_urls] | ||
|
||
for url in index_urls: | ||
logger.debug("Looping through index urls: %r", url) | ||
if _contain_placeholder(url): | ||
url = url.format(package_name=name) | ||
logger.debug("Formatting url with package name : %r", url) | ||
else: | ||
url = f"{url}/{name}/" | ||
logger.debug("Url has no placeholder, appending package name : %r", url) | ||
try: | ||
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs) | ||
except HttpStatusError as e: | ||
if e.status_code == 404: | ||
logger.debug("NotFound (404) for %r, trying next index.", url) | ||
projects_info = [] | ||
|
||
# With "first-index" strategy, we'll return the first match we find | ||
# without checking the other indexes at all. | ||
if strategy == "first-index": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each strategy has duplicate logics, so I think we can separate this with multiple functions, extracting out common parts. |
||
for url in index_urls: | ||
logger.debug("Looping through index urls: %r", url) | ||
if _contain_placeholder(url): | ||
url = url.format(package_name=name) | ||
logger.debug("Formatting url with package name : %r", url) | ||
else: | ||
url = f"{url}/{name}/" | ||
logger.debug("Url has no placeholder, appending package name : %r", url) | ||
try: | ||
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs) | ||
except HttpStatusError as e: | ||
if e.status_code == 404: | ||
logger.debug("NotFound (404) for %r, trying next index.", url) | ||
continue | ||
logger.debug( | ||
"Error fetching %r (%s), trying next index.", url, e.status_code | ||
) | ||
raise | ||
|
||
content_type = headers.get("content-type", "").lower() | ||
try: | ||
base_url = urlunparse(urlparse(url)._replace(path="")) | ||
parser = _select_parser(content_type, name, index_base_url=base_url) | ||
except ValueError as e: | ||
raise ValueError(f"Error trying to decode url: {url}") from e | ||
return parser(metadata) | ||
# With "unsafe-first-match" or "unsafe-best-match", we need to check all indexes | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is still better to check the strategy string, as people might pass some strange value here. |
||
for url in index_urls: | ||
logger.debug("Looping through index urls: %r", url) | ||
if _contain_placeholder(url): | ||
url = url.format(package_name=name) | ||
logger.debug("Formatting url with package name : %r", url) | ||
else: | ||
url = f"{url}/{name}/" | ||
logger.debug("Url has no placeholder, appending package name : %r", url) | ||
try: | ||
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs) | ||
except HttpStatusError as e: | ||
if e.status_code == 404: | ||
logger.debug("NotFound (404) for %r, trying next index.", url) | ||
continue | ||
logger.debug( | ||
"Error fetching %r (%s), trying next index.", url, e.status_code | ||
) | ||
continue # try the next index instead of raising | ||
|
||
content_type = headers.get("content-type", "").lower() | ||
try: | ||
base_url = urlunparse(urlparse(url)._replace(path="")) | ||
parser = _select_parser(content_type, name, index_base_url=base_url) | ||
projects_info.append(parser(metadata)) | ||
except ValueError: | ||
# Just log and continue with the next index | ||
msg = f"Error trying to decode url: {url}" | ||
logger.debug(msg) | ||
continue | ||
logger.debug( | ||
"Error fetching %r (%s), trying next index.", url, e.status_code | ||
) | ||
raise | ||
|
||
content_type = headers.get("content-type", "").lower() | ||
try: | ||
base_url = urlunparse(urlparse(url)._replace(path="")) | ||
if not projects_info: | ||
raise ValueError( | ||
f"Can't fetch metadata for '{name}'. " | ||
"Please make sure you have entered a correct package name " | ||
"and correctly specified index_urls (if you changed them)." | ||
) | ||
|
||
parser = _select_parser(content_type, name, index_base_url=base_url) | ||
except ValueError as e: | ||
raise ValueError(f"Error trying to decode url: {url}") from e | ||
return parser(metadata) | ||
else: | ||
raise ValueError( | ||
f"Can't fetch metadata for '{name}'. " | ||
"Please make sure you have entered a correct package name " | ||
"and correctly specified index_urls (if you changed them)." | ||
) | ||
# For "unsafe-first-match", return the first project that has matching versions | ||
if strategy == "unsafe-first-match": | ||
return projects_info[0] | ||
Comment on lines
+382
to
+383
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it the right implementation? If the The compatibility check is not handled in this function (at least in the current implementation). So I think so support this strategy, we'll need to modify the Transaction as well. |
||
|
||
# For "unsafe-best-match", we merge information from all indexes. | ||
merged_project = projects_info[0] | ||
for project in projects_info[1:]: | ||
for version, wheels in project.releases.items(): | ||
if version not in merged_project.releases: | ||
merged_project.releases[version] = wheels | ||
else: | ||
# Extend the existing wheels generator with new wheels | ||
# This is a bit tricky with generators, so we'll convert | ||
# to lists temporarily | ||
existing_wheels = list(merged_project.releases[version]) | ||
new_wheels = list(wheels) | ||
merged_project.releases[version] = ( | ||
wheel for wheel in existing_wheels + new_wheels | ||
) | ||
|
||
return merged_project | ||
|
||
# If we get here, we weren't find the package in any index | ||
raise ValueError( | ||
f"Can't fetch metadata for '{name}'. " | ||
"Please make sure you have entered a correct package name " | ||
"and correctly specified index_urls (if you changed them)." | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,8 @@ def __init__(self, compat: type[CompatibilityLayer] | None = None) -> None: | |
compat = compatibility_layer | ||
|
||
self.index_urls = package_index.DEFAULT_INDEX_URLS[:] | ||
self.extra_index_urls: list[str] = [] | ||
self.index_strategy = "first-index" # default strategy | ||
self.compat_layer: type[CompatibilityLayer] = compat | ||
self.constraints: list[str] = [] | ||
|
||
|
@@ -47,6 +49,8 @@ async def install( | |
pre: bool = False, | ||
index_urls: list[str] | str | None = None, | ||
*, | ||
extra_index_urls: list[str] | str | None = None, | ||
index_strategy: str | None = None, | ||
constraints: list[str] | None = None, | ||
verbose: bool | int | None = None, | ||
) -> None: | ||
|
@@ -132,6 +136,29 @@ async def install( | |
- If a list of URLs is provided, micropip will try each URL in order until \ | ||
it finds a package. If no package is found, an error will be raised. | ||
|
||
extra_index_urls: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if we need to separate How about exposing an API to get the current index URLs for micropip instead? For example, micropip.install("...", extra_index_urls=["A", "B", "C"]) can be replaced with current_index_urls = micropip.get_index_urls()
micropip.install("...", index_urls=current_index_urls + ["A", "B", "C"]) |
||
|
||
A list of URLs or a single URL to use as additional package indexes when looking | ||
up packages. Unlike `index_urls`, these are used in addition to the default | ||
indexes, not instead of them. This is useful for finding packages that may not | ||
be available in the main package index that is queried by `index_urls`. | ||
|
||
- The format and behaviour of each URL is the same as for `index_urls`. | ||
|
||
index_strategy: | ||
|
||
Determines how package versions are selected when they appear in multiple indexes: | ||
|
||
- ``first-index`` (default): Search for each package across all indexes, limiting \ | ||
the candidate versions to those present in the first index that contains the package. | ||
|
||
- ``unsafe-first-match``: Search for each package across all indexes, but prefer \ | ||
the first index with a compatible version, even if newer versions are available \ | ||
on other indexes. | ||
|
||
- ``unsafe-best-match``: Search for each package across all indexes, and select \ | ||
the best version from the combined set of candidate versions (pip's default). | ||
|
||
constraints: | ||
|
||
A list of requirements with versions/URLs which will be used only if | ||
|
@@ -149,6 +176,26 @@ async def install( | |
with setup_logging().ctx_level(verbose) as logger: | ||
if index_urls is None: | ||
index_urls = self.index_urls | ||
base_index_urls = self.index_urls | ||
else: | ||
base_index_urls = ( | ||
index_urls if isinstance(index_urls, list) else [index_urls] | ||
) | ||
|
||
if extra_index_urls is None: | ||
extra_urls = self.extra_index_urls | ||
else: | ||
extra_urls = ( | ||
extra_index_urls | ||
if isinstance(extra_index_urls, list) | ||
else [extra_index_urls] | ||
) | ||
|
||
combined_index_urls = base_index_urls + extra_urls | ||
|
||
strategy = ( | ||
index_strategy if index_strategy is not None else self.index_strategy | ||
) | ||
|
||
if constraints is None: | ||
constraints = self.constraints | ||
|
@@ -177,8 +224,9 @@ async def install( | |
pre=pre, | ||
fetch_kwargs=fetch_kwargs, | ||
verbose=verbose, | ||
index_urls=index_urls, | ||
index_urls=combined_index_urls, | ||
constraints=constraints, | ||
index_strategy=strategy, | ||
) | ||
await transaction.gather_requirements(requirements) | ||
|
||
|
@@ -232,6 +280,60 @@ async def install( | |
|
||
importlib.invalidate_caches() | ||
|
||
def set_extra_index_urls(self, urls: List[str] | str): # noqa: UP006 | ||
""" | ||
Set the extra index URLs to use when looking up packages. | ||
|
||
These URLs are used in addition to the default index URLs, not instead of them. | ||
This is useful for finding packages that may not be available in the main | ||
package index. | ||
|
||
- The index URL should support the \ | ||
`JSON API <https://warehouse.pypa.io/api-reference/json/>`__ . | ||
|
||
- The index URL may contain the placeholder {package_name} which will be \ | ||
replaced with the package name when looking up a package. If it does not \ | ||
contain the placeholder, the package name will be appended to the URL. | ||
|
||
Parameters | ||
---------- | ||
urls | ||
A list of URLs or a single URL to use as extra package indexes. | ||
""" | ||
|
||
if isinstance(urls, str): | ||
urls = [urls] | ||
|
||
self.extra_index_urls = urls[:] | ||
|
||
def set_index_strategy(self, strategy: str): | ||
""" | ||
Set the index strategy to use when resolving packages from multiple indexes. | ||
|
||
Parameters | ||
---------- | ||
strategy | ||
The index strategy to use. Valid values are: | ||
|
||
- ``first-index``: Search for each package across all indexes, limiting \ | ||
the candidate versions to those present in the first index that contains the package. | ||
|
||
- ``unsafe-first-match``: Search for each package across all indexes, but prefer \ | ||
the first index with a compatible version, even if newer versions are available \ | ||
on other indexes. | ||
|
||
- ``unsafe-best-match``: Search for each package across all indexes, and select \ | ||
the best version from the combined set of candidate versions (pip's default). | ||
""" | ||
valid_strategies = ["first-index", "unsafe-first-match", "unsafe-best-match"] | ||
if strategy not in valid_strategies: | ||
raise ValueError( | ||
f"Invalid index strategy: {strategy}. " | ||
f"Valid strategies are: {', '.join(valid_strategies)}" | ||
) | ||
|
||
self.index_strategy = strategy | ||
|
||
def list_packages(self) -> PackageDict: | ||
"""Get the dictionary of installed packages. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For betting typing, how about using
Literal
type?