Skip to content

Fix uncached downloads #699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
11 changes: 6 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,16 @@ jobs:
if: matrix.is-dev-version
- run: ruff check
if: matrix.is-dev-version
- run: pyright --version
- name: Run Pyright
if: matrix.is-dev-version
- run: pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }}
if: matrix.is-dev-version
- run: pytest
run: |
pyright --version
pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }}
- run: pytest --cov bioimageio --cov-report xml --cov-append --capture no
env:
BIOIMAGEIO_CACHE_PATH: bioimageio_cache
RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }}
- run: pytest --cov-append scripts # also test docstrings in scripts for dev-version
- run: pytest --cov bioimageio --cov-report xml --cov-append --capture no scripts # also test docstrings in scripts for dev-version
if: ${{matrix.is-dev-version}}
env:
BIOIMAGEIO_CACHE_PATH: bioimageio_cache
Expand Down
26 changes: 21 additions & 5 deletions bioimageio/spec/_internal/_settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
from pathlib import Path
from typing import Optional, Union

import pooch # pyright: ignore [reportMissingTypeStubs]
from pydantic import Field
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing_extensions import Annotated

Expand All @@ -20,6 +21,11 @@ class Settings(BaseSettings, extra="ignore"):
cache_path: Path = pooch.os_cache("bioimageio")
"""bioimageio cache location"""

@field_validator("cache_path", mode="after")
@classmethod
def _expand_user(cls, value: Path):
return Path(os.path.expanduser(str(value)))

collection_http_pattern: str = (
"https://hypha.aicell.io/bioimage-io/artifacts/{bioimageio_id}/files/rdf.yaml"
)
Expand All @@ -41,19 +47,29 @@ class Settings(BaseSettings, extra="ignore"):
)
"""URL to bioimageio id_map_draft.json to resolve draft IDs ending with '/draft'."""

perform_io_checks: bool = True
"""Wether or not to perform validation that requires file io,
e.g. downloading a remote files.

Existence of any local absolute file paths is still being checked."""

resolve_draft: bool = True
"""Flag to resolve draft resource versions following the pattern
<resource id>/draft.

Note that anyone may stage a new draft and that such a draft version
may not have been reviewed yet.
Set this flag to False to avoid this potential security risk
and disallow loading draft versions."""

perform_io_checks: bool = True
"""Wether or not to perform validation that requires file io,
e.g. downloading a remote files.
memory_limit_per_uncached_file: int = 1024**2
"""Maximum size per uncached file in bytes to keep in memory.

Existence of any local absolute file paths is still being checked."""
Uncached files larger than this size will be written to a temporary file.

Note that file downloads with a known SHA-256 hash will be cached to **cache_path**
regardless of size, unless `ValidationContext.disable_cache` is true.
"""

log_warnings: bool = True
"""Log validation warnings to console."""
Expand Down
14 changes: 12 additions & 2 deletions bioimageio/spec/_internal/common_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@
WarningEntry,
)
from .field_warning import issue_warning
from .io import BioimageioYamlContent
from .io import (
BioimageioYamlContent,
FileDescr,
YamlValue,
extract_file_descrs,
populate_cache,
)
from .io_basics import BIOIMAGEIO_YAML, AbsoluteFilePath, FileName, ZipPath
from .io_utils import write_content_to_zip
from .node import Node
Expand Down Expand Up @@ -208,7 +214,11 @@ def load(
) -> Union[Self, InvalidDescr]:
"""factory method to create a resource description object"""
context = context or get_validation_context()
assert isinstance(data, dict)
if context.perform_io_checks:
# download all cachable files
file_descrs = extract_file_descrs(data)
populate_cache(file_descrs)

with context:
rd, errors, val_warnings = cls._load_impl(deepcopy(data))

Expand Down
Loading
Loading