Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Added the `extra_index_urls` and `index_strategy` parameters to `micropip.install()`
to support lookup and installation from multiple package indexes.
[#224](https://github.com/pyodide/micropip/pull/224)

### Fixed

- micropip now respects the `yanked` flag in the PyPI Simple API.
Expand Down
1 change: 1 addition & 0 deletions micropip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

install = _package_manager_singleton.install
set_index_urls = _package_manager_singleton.set_index_urls
set_extra_index_urls = _package_manager_singleton.set_extra_index_urls
list = _package_manager_singleton.list_packages
set_constraints = _package_manager_singleton.set_constraints
freeze = _package_manager_singleton.freeze
Expand Down
132 changes: 101 additions & 31 deletions micropip/package_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,12 @@ def _select_parser(
raise ValueError(f"Unsupported content type: {content_type}")


async def query_package(
# TODO: reduce mccabe complexity
async def query_package( # noqa: C901
name: str,
index_urls: list[str] | str,
fetch_kwargs: dict[str, Any] | None = None,
strategy: str = "first-index",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For betting typing, how about using Literal type?

) -> ProjectInfo:
"""
Query for a package from package indexes.
Expand All @@ -286,6 +288,9 @@ async def query_package(
it finds a package. If no package is found, an error will be raised.
fetch_kwargs
Keyword arguments to pass to the fetch function.
strategy
Index strategy to use when querying multiple indexes. The default is "first-index".
Valid values are: "first-index", "unsafe-first-match", and "unsafe-best-match".
"""

_fetch_kwargs = fetch_kwargs.copy() if fetch_kwargs else {}
Expand All @@ -303,36 +308,101 @@ async def query_package(

index_urls = [PYPI_URL if url == PYPI else url for url in index_urls]

for url in index_urls:
logger.debug("Looping through index urls: %r", url)
if _contain_placeholder(url):
url = url.format(package_name=name)
logger.debug("Formatting url with package name : %r", url)
else:
url = f"{url}/{name}/"
logger.debug("Url has no placeholder, appending package name : %r", url)
try:
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs)
except HttpStatusError as e:
if e.status_code == 404:
logger.debug("NotFound (404) for %r, trying next index.", url)
projects_info = []

# With "first-index" strategy, we'll return the first match we find
# without checking the other indexes at all.
if strategy == "first-index":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each strategy has duplicate logics, so I think we can separate this with multiple functions, extracting out common parts.

for url in index_urls:
logger.debug("Looping through index urls: %r", url)
if _contain_placeholder(url):
url = url.format(package_name=name)
logger.debug("Formatting url with package name : %r", url)
else:
url = f"{url}/{name}/"
logger.debug("Url has no placeholder, appending package name : %r", url)
try:
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs)
except HttpStatusError as e:
if e.status_code == 404:
logger.debug("NotFound (404) for %r, trying next index.", url)
continue
logger.debug(
"Error fetching %r (%s), trying next index.", url, e.status_code
)
raise

content_type = headers.get("content-type", "").lower()
try:
base_url = urlunparse(urlparse(url)._replace(path=""))
parser = _select_parser(content_type, name, index_base_url=base_url)
except ValueError as e:
raise ValueError(f"Error trying to decode url: {url}") from e
return parser(metadata)
# With "unsafe-first-match" or "unsafe-best-match", we need to check all indexes
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is still better to check the strategy string, as people might pass some strange value here.
Or, we can probably check the value early and failfast.

for url in index_urls:
logger.debug("Looping through index urls: %r", url)
if _contain_placeholder(url):
url = url.format(package_name=name)
logger.debug("Formatting url with package name : %r", url)
else:
url = f"{url}/{name}/"
logger.debug("Url has no placeholder, appending package name : %r", url)
try:
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs)
except HttpStatusError as e:
if e.status_code == 404:
logger.debug("NotFound (404) for %r, trying next index.", url)
continue
logger.debug(
"Error fetching %r (%s), trying next index.", url, e.status_code
)
continue # try the next index instead of raising

content_type = headers.get("content-type", "").lower()
try:
base_url = urlunparse(urlparse(url)._replace(path=""))
parser = _select_parser(content_type, name, index_base_url=base_url)
projects_info.append(parser(metadata))
except ValueError:
# Just log and continue with the next index
msg = f"Error trying to decode url: {url}"
logger.debug(msg)
continue
logger.debug(
"Error fetching %r (%s), trying next index.", url, e.status_code
)
raise

content_type = headers.get("content-type", "").lower()
try:
base_url = urlunparse(urlparse(url)._replace(path=""))
if not projects_info:
raise ValueError(
f"Can't fetch metadata for '{name}'. "
"Please make sure you have entered a correct package name "
"and correctly specified index_urls (if you changed them)."
)

parser = _select_parser(content_type, name, index_base_url=base_url)
except ValueError as e:
raise ValueError(f"Error trying to decode url: {url}") from e
return parser(metadata)
else:
raise ValueError(
f"Can't fetch metadata for '{name}'. "
"Please make sure you have entered a correct package name "
"and correctly specified index_urls (if you changed them)."
)
# For "unsafe-first-match", return the first project that has matching versions
if strategy == "unsafe-first-match":
return projects_info[0]
Comment on lines +382 to +383
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it the right implementation? If the projects_info[0] does not have the compatible version, I think we should fallback to other indices.

The compatibility check is not handled in this function (at least in the current implementation). So I think so support this strategy, we'll need to modify the Transaction as well.


# For "unsafe-best-match", we merge information from all indexes.
merged_project = projects_info[0]
for project in projects_info[1:]:
for version, wheels in project.releases.items():
if version not in merged_project.releases:
merged_project.releases[version] = wheels
else:
# Extend the existing wheels generator with new wheels
# This is a bit tricky with generators, so we'll convert
# to lists temporarily
existing_wheels = list(merged_project.releases[version])
new_wheels = list(wheels)
merged_project.releases[version] = (
wheel for wheel in existing_wheels + new_wheels
)

return merged_project

# If we get here, we weren't find the package in any index
raise ValueError(
f"Can't fetch metadata for '{name}'. "
"Please make sure you have entered a correct package name "
"and correctly specified index_urls (if you changed them)."
)
104 changes: 103 additions & 1 deletion micropip/package_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def __init__(self, compat: type[CompatibilityLayer] | None = None) -> None:
compat = compatibility_layer

self.index_urls = package_index.DEFAULT_INDEX_URLS[:]
self.extra_index_urls: list[str] = []
self.index_strategy = "first-index" # default strategy
self.compat_layer: type[CompatibilityLayer] = compat
self.constraints: list[str] = []

Expand All @@ -47,6 +49,8 @@ async def install(
pre: bool = False,
index_urls: list[str] | str | None = None,
*,
extra_index_urls: list[str] | str | None = None,
index_strategy: str | None = None,
constraints: list[str] | None = None,
verbose: bool | int | None = None,
) -> None:
Expand Down Expand Up @@ -132,6 +136,29 @@ async def install(
- If a list of URLs is provided, micropip will try each URL in order until \
it finds a package. If no package is found, an error will be raised.

extra_index_urls:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if we need to separate extra_index_urls as we already support multiple index_urls.

How about exposing an API to get the current index URLs for micropip instead?

For example,

micropip.install("...", extra_index_urls=["A", "B", "C"])

can be replaced with

current_index_urls = micropip.get_index_urls()
micropip.install("...", index_urls=current_index_urls + ["A", "B", "C"])


A list of URLs or a single URL to use as additional package indexes when looking
up packages. Unlike `index_urls`, these are used in addition to the default
indexes, not instead of them. This is useful for finding packages that may not
be available in the main package index that is queried by `index_urls`.

- The format and behaviour of each URL is the same as for `index_urls`.

index_strategy:

Determines how package versions are selected when they appear in multiple indexes:

- ``first-index`` (default): Search for each package across all indexes, limiting \
the candidate versions to those present in the first index that contains the package.

- ``unsafe-first-match``: Search for each package across all indexes, but prefer \
the first index with a compatible version, even if newer versions are available \
on other indexes.

- ``unsafe-best-match``: Search for each package across all indexes, and select \
the best version from the combined set of candidate versions (pip's default).

constraints:

A list of requirements with versions/URLs which will be used only if
Expand All @@ -149,6 +176,26 @@ async def install(
with setup_logging().ctx_level(verbose) as logger:
if index_urls is None:
index_urls = self.index_urls
base_index_urls = self.index_urls
else:
base_index_urls = (
index_urls if isinstance(index_urls, list) else [index_urls]
)

if extra_index_urls is None:
extra_urls = self.extra_index_urls
else:
extra_urls = (
extra_index_urls
if isinstance(extra_index_urls, list)
else [extra_index_urls]
)

combined_index_urls = base_index_urls + extra_urls

strategy = (
index_strategy if index_strategy is not None else self.index_strategy
)

if constraints is None:
constraints = self.constraints
Expand Down Expand Up @@ -177,8 +224,9 @@ async def install(
pre=pre,
fetch_kwargs=fetch_kwargs,
verbose=verbose,
index_urls=index_urls,
index_urls=combined_index_urls,
constraints=constraints,
index_strategy=strategy,
)
await transaction.gather_requirements(requirements)

Expand Down Expand Up @@ -232,6 +280,60 @@ async def install(

importlib.invalidate_caches()

def set_extra_index_urls(self, urls: List[str] | str): # noqa: UP006
"""
Set the extra index URLs to use when looking up packages.

These URLs are used in addition to the default index URLs, not instead of them.
This is useful for finding packages that may not be available in the main
package index.

- The index URL should support the \
`JSON API <https://warehouse.pypa.io/api-reference/json/>`__ .

- The index URL may contain the placeholder {package_name} which will be \
replaced with the package name when looking up a package. If it does not \
contain the placeholder, the package name will be appended to the URL.

Parameters
----------
urls
A list of URLs or a single URL to use as extra package indexes.
"""

if isinstance(urls, str):
urls = [urls]

self.extra_index_urls = urls[:]

def set_index_strategy(self, strategy: str):
"""
Set the index strategy to use when resolving packages from multiple indexes.

Parameters
----------
strategy
The index strategy to use. Valid values are:

- ``first-index``: Search for each package across all indexes, limiting \
the candidate versions to those present in the first index that contains the package.

- ``unsafe-first-match``: Search for each package across all indexes, but prefer \
the first index with a compatible version, even if newer versions are available \
on other indexes.

- ``unsafe-best-match``: Search for each package across all indexes, and select \
the best version from the combined set of candidate versions (pip's default).
"""
valid_strategies = ["first-index", "unsafe-first-match", "unsafe-best-match"]
if strategy not in valid_strategies:
raise ValueError(
f"Invalid index strategy: {strategy}. "
f"Valid strategies are: {', '.join(valid_strategies)}"
)

self.index_strategy = strategy

def list_packages(self) -> PackageDict:
"""Get the dictionary of installed packages.

Expand Down
11 changes: 9 additions & 2 deletions micropip/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class Transaction:

verbose: bool | int | None = None
constraints: list[str] | None = None
index_strategy: str = "first-index"

def __post_init__(self) -> None:
# If index_urls is None, pyodide-lock.json have to be searched first.
Expand Down Expand Up @@ -231,11 +232,12 @@ async def _add_requirement_from_package_index(self, req: Requirement):
req.name,
self.index_urls,
self.fetch_kwargs,
strategy=self.index_strategy,
)

logger.debug("Transaction: got metadata %r for requirement %r", metadata, req)

wheel = find_wheel(metadata, req)
wheel = find_wheel(metadata, req, self.index_strategy)

logger.debug("Transaction: Selected wheel: %r", wheel)

Expand Down Expand Up @@ -319,12 +321,17 @@ async def add_wheel(
self.wheels.append(wheel)


def find_wheel(metadata: ProjectInfo, req: Requirement) -> WheelInfo:
def find_wheel(
metadata: ProjectInfo, req: Requirement, strategy: str = "first-index"
) -> WheelInfo:
"""Parse metadata to find the latest version of pure python wheel.
Parameters
----------
metadata : ProjectInfo
req : Requirement
strategy : str, optional
The strategy to find the wheel when selecting package versions.
The default is "first-index".

Returns
-------
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,9 @@ def add_pkg_version(
self.metadata_map[filename] = metadata
self.top_level_map[filename] = top_level

async def query_package(self, pkgname, index_urls, kwargs):
async def query_package(
self, pkgname, index_urls, extra_index_urls, strategy, **kwargs
):
from micropip.package_index import ProjectInfo

try:
Expand Down
Loading
Loading