Skip to content

Commit 48ccefc

Browse files
MAINT Store data in bytes not io.BytesIO (#91)
* Store data in bytes instead of io * Fix lint * Fix typo * Address reviews * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix test package config * Use normalized name * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f975747 commit 48ccefc

8 files changed

+81
-88
lines changed

micropip/_compat_in_pyodide.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from io import BytesIO
2-
from typing import IO
1+
from pathlib import Path
32
from urllib.parse import urlparse
43

54
from pyodide._package_loader import get_dynlibs
@@ -20,15 +19,14 @@
2019
# Otherwise, this is pytest test collection so let it go.
2120

2221

23-
async def fetch_bytes(url: str, kwargs: dict[str, str]) -> IO[bytes]:
22+
async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes:
2423
parsed_url = urlparse(url)
2524
if parsed_url.scheme == "emfs":
26-
return open(parsed_url.path, "rb")
25+
return Path(parsed_url.path).read_bytes()
2726
if parsed_url.scheme == "file":
28-
result_bytes = (await loadBinaryFile(parsed_url.path)).to_bytes()
29-
else:
30-
result_bytes = await (await pyfetch(url, **kwargs)).bytes()
31-
return BytesIO(result_bytes)
27+
return (await loadBinaryFile(parsed_url.path)).to_bytes()
28+
29+
return await (await pyfetch(url, **kwargs)).bytes()
3230

3331

3432
async def fetch_string_and_headers(

micropip/_compat_not_in_pyodide.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import re
2-
from io import BytesIO
32
from pathlib import Path
43
from typing import IO, Any
54

@@ -20,9 +19,8 @@ def _fetch(url: str, kwargs: dict[str, Any]) -> addinfourl:
2019
return urlopen(Request(url, **kwargs))
2120

2221

23-
async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> IO[bytes]:
24-
response = _fetch(url, kwargs=kwargs)
25-
return BytesIO(response.read())
22+
async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes:
23+
return _fetch(url, kwargs=kwargs).read()
2624

2725

2826
async def fetch_string_and_headers(

micropip/wheelinfo.py

+20-26
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import asyncio
22
import hashlib
3+
import io
34
import json
45
import zipfile
56
from dataclasses import dataclass
67
from pathlib import Path
7-
from typing import IO, Any
8+
from typing import Any
89
from urllib.parse import ParseResult, urlparse
910

1011
from packaging.requirements import Requirement
@@ -39,7 +40,7 @@ class WheelInfo:
3940

4041
# Fields below are only available after downloading the wheel, i.e. after calling `download()`.
4142

42-
_data: IO[bytes] | None = None # Wheel file contents.
43+
_data: bytes | None = None # Wheel file contents.
4344
_metadata: Metadata | None = None # Wheel metadata.
4445
_requires: list[Requirement] | None = None # List of requirements.
4546

@@ -109,7 +110,7 @@ async def install(self, target: Path) -> None:
109110
raise RuntimeError(
110111
"Micropip internal error: attempted to install wheel before downloading it?"
111112
)
112-
self._validate()
113+
_validate_sha256_checksum(self._data, self.sha256)
113114
self._extract(target)
114115
await self._load_libraries(target)
115116
self._set_installer()
@@ -119,7 +120,7 @@ async def download(self, fetch_kwargs: dict[str, Any]):
119120
return
120121

121122
self._data = await self._fetch_bytes(fetch_kwargs)
122-
with zipfile.ZipFile(self._data) as zf:
123+
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
123124
metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO
124125
self._metadata = Metadata(zipfile.Path(zf, metadata_path))
125126

@@ -153,20 +154,9 @@ async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]):
153154
"Check if the server is sending the correct 'Access-Control-Allow-Origin' header."
154155
) from e
155156

156-
def _validate(self):
157-
if self.sha256 is None:
158-
# No checksums available, e.g. because installing
159-
# from a different location than PyPI.
160-
return
161-
162-
assert self._data
163-
sha256_actual = _generate_package_hash(self._data)
164-
if sha256_actual != self.sha256:
165-
raise ValueError("Contents don't match hash")
166-
167157
def _extract(self, target: Path) -> None:
168158
assert self._data
169-
with zipfile.ZipFile(self._data) as zf:
159+
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
170160
zf.extractall(target)
171161
self._dist_info = target / wheel_dist_info_dir(zf, self.name)
172162

@@ -198,16 +188,20 @@ async def _load_libraries(self, target: Path) -> None:
198188
TODO: integrate with pyodide's dynamic library loading mechanism.
199189
"""
200190
assert self._data
201-
dynlibs = get_dynlibs(self._data, ".whl", target)
191+
dynlibs = get_dynlibs(io.BytesIO(self._data), ".whl", target)
202192
await asyncio.gather(*map(lambda dynlib: loadDynlib(dynlib, False), dynlibs))
203193

204194

205-
def _generate_package_hash(data: IO[bytes]) -> str:
206-
"""
207-
Generate a SHA256 hash of the package data.
208-
"""
209-
sha256_hash = hashlib.sha256()
210-
data.seek(0)
211-
while chunk := data.read(4096):
212-
sha256_hash.update(chunk)
213-
return sha256_hash.hexdigest()
195+
def _validate_sha256_checksum(data: bytes, expected: str | None = None) -> None:
196+
if expected is None:
197+
# No checksums available, e.g. because installing
198+
# from a different location than PyPI.
199+
return
200+
201+
actual = _generate_package_hash(data)
202+
if actual != expected:
203+
raise RuntimeError(f"Invalid checksum: expected {expected}, got {actual}")
204+
205+
206+
def _generate_package_hash(data: bytes) -> str:
207+
return hashlib.sha256(data).hexdigest()

tests/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def write_file(filename, contents):
257257

258258
tmp.seek(0)
259259

260-
return tmp
260+
return tmp.read()
261261

262262

263263
@pytest.fixture

tests/test_data/test_wheel_uninstall/pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[project]
2-
name = "test_wheel_uninstall"
2+
name = "test-wheel-uninstall"
33
description = "Test wheel uninstall"
44
requires-python = ">=3.10"
55
version = "1.0.0"
66

77
[tool.setuptools]
8-
packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall"]
8+
packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall", "deep.data"]
99
py-modules = ["top_level"]
1010

1111
[tool.setuptools.package-data]

tests/test_install.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,6 @@ async def run_test(selenium, url, name, version):
370370

371371
@pytest.mark.asyncio
372372
async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
373-
from io import BytesIO
374-
375373
mock_server_fake_package = mock_package_index_json_api(
376374
pkgs=["fake-pkg-micropip-test"]
377375
)
@@ -381,7 +379,7 @@ async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
381379
async def _mock_fetch_bytes(url, *args):
382380
nonlocal _wheel_url
383381
_wheel_url = url
384-
return BytesIO(b"fake wheel")
382+
return b"fake wheel"
385383

386384
from micropip import wheelinfo
387385

tests/test_uninstall.py

+48-22
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
import pytest
44
from pytest_pyodide import run_in_pyodide, spawn_web_server
55
from conftest import SNOWBALL_WHEEL, TEST_WHEEL_DIR
6-
from packaging.utils import parse_wheel_filename
6+
from packaging.utils import parse_wheel_filename, canonicalize_name
77

88
TEST_PACKAGE_NAME = "test_wheel_uninstall"
9+
TEST_PACKAGE_NAME_NORMALIZED = canonicalize_name(TEST_PACKAGE_NAME)
910

1011

1112
@pytest.fixture(scope="module")
@@ -19,15 +20,15 @@ def test_wheel_url(test_wheel_path):
1920

2021
def test_basic(selenium_standalone_micropip, test_wheel_url):
2122
@run_in_pyodide()
22-
async def run(selenium, pkg_name, wheel_url):
23+
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
2324
import importlib.metadata
2425
import sys
2526

2627
import micropip
2728

2829
await micropip.install(wheel_url)
2930

30-
assert pkg_name in micropip.list()
31+
assert pkg_name_normalized in micropip.list()
3132
assert pkg_name not in sys.modules
3233

3334
__import__(pkg_name)
@@ -52,7 +53,12 @@ async def run(selenium, pkg_name, wheel_url):
5253
# 3. Check that the module is not available with micropip.list()
5354
assert pkg_name not in micropip.list()
5455

55-
run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
56+
run(
57+
selenium_standalone_micropip,
58+
TEST_PACKAGE_NAME,
59+
TEST_PACKAGE_NAME_NORMALIZED,
60+
test_wheel_url,
61+
)
5662

5763

5864
def test_files(selenium_standalone_micropip, test_wheel_url):
@@ -61,13 +67,13 @@ def test_files(selenium_standalone_micropip, test_wheel_url):
6167
"""
6268

6369
@run_in_pyodide()
64-
async def run(selenium, pkg_name, wheel_url):
70+
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
6571
import importlib.metadata
6672

6773
import micropip
6874

6975
await micropip.install(wheel_url)
70-
assert pkg_name in micropip.list()
76+
assert pkg_name_normalized in micropip.list()
7177

7278
dist = importlib.metadata.distribution(pkg_name)
7379
files = dist.files
@@ -86,7 +92,12 @@ async def run(selenium, pkg_name, wheel_url):
8692

8793
assert not dist._path.is_dir(), f"{dist._path} still exists after removal"
8894

89-
run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
95+
run(
96+
selenium_standalone_micropip,
97+
TEST_PACKAGE_NAME,
98+
TEST_PACKAGE_NAME_NORMALIZED,
99+
test_wheel_url,
100+
)
90101

91102

92103
def test_install_again(selenium_standalone_micropip, test_wheel_url):
@@ -95,20 +106,20 @@ def test_install_again(selenium_standalone_micropip, test_wheel_url):
95106
"""
96107

97108
@run_in_pyodide()
98-
async def run(selenium, pkg_name, wheel_url):
109+
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
99110
import sys
100111

101112
import micropip
102113

103114
await micropip.install(wheel_url)
104115

105-
assert pkg_name in micropip.list()
116+
assert pkg_name_normalized in micropip.list()
106117

107118
__import__(pkg_name)
108119

109120
micropip.uninstall(pkg_name)
110121

111-
assert pkg_name not in micropip.list()
122+
assert pkg_name_normalized not in micropip.list()
112123

113124
del sys.modules[pkg_name]
114125

@@ -121,10 +132,15 @@ async def run(selenium, pkg_name, wheel_url):
121132

122133
await micropip.install(wheel_url)
123134

124-
assert pkg_name in micropip.list()
135+
assert pkg_name_normalized in micropip.list()
125136
__import__(pkg_name)
126137

127-
run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
138+
run(
139+
selenium_standalone_micropip,
140+
TEST_PACKAGE_NAME,
141+
TEST_PACKAGE_NAME_NORMALIZED,
142+
test_wheel_url,
143+
)
128144

129145

130146
def test_warning_not_installed(selenium_standalone_micropip):
@@ -156,7 +172,7 @@ def test_warning_file_removed(selenium_standalone_micropip, test_wheel_url):
156172
"""
157173

158174
@run_in_pyodide()
159-
async def run(selenium, pkg_name, wheel_url):
175+
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
160176
from importlib.metadata import distribution
161177
import micropip
162178
import contextlib
@@ -165,17 +181,17 @@ async def run(selenium, pkg_name, wheel_url):
165181
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
166182
await micropip.install(wheel_url)
167183

168-
assert pkg_name in micropip.list()
184+
assert pkg_name_normalized in micropip.list()
169185

170-
dist = distribution(pkg_name)
186+
dist = distribution(pkg_name_normalized)
171187
files = dist.files
172188
file1 = files[0]
173189
file2 = files[1]
174190

175191
file1.locate().unlink()
176192
file2.locate().unlink()
177193

178-
micropip.uninstall(pkg_name)
194+
micropip.uninstall(pkg_name_normalized)
179195

180196
captured = buf.getvalue()
181197
logs = captured.strip().split("\n")
@@ -184,7 +200,12 @@ async def run(selenium, pkg_name, wheel_url):
184200
assert "does not exist" in logs[-1]
185201
assert "does not exist" in logs[-2]
186202

187-
run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
203+
run(
204+
selenium_standalone_micropip,
205+
TEST_PACKAGE_NAME,
206+
TEST_PACKAGE_NAME_NORMALIZED,
207+
test_wheel_url,
208+
)
188209

189210

190211
def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url):
@@ -193,28 +214,33 @@ def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url):
193214
"""
194215

195216
@run_in_pyodide()
196-
async def run(selenium, pkg_name, wheel_url):
217+
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
197218
from importlib.metadata import distribution
198219
import micropip
199220
import contextlib
200221
import io
201222

202223
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
203224
await micropip.install(wheel_url)
204-
assert pkg_name in micropip.list()
225+
assert pkg_name_normalized in micropip.list()
205226

206-
pkg_dir = distribution(pkg_name)._path.parent / "deep"
227+
pkg_dir = distribution(pkg_name_normalized)._path.parent / "deep"
207228
(pkg_dir / "extra-file.txt").touch()
208229

209-
micropip.uninstall(pkg_name)
230+
micropip.uninstall(pkg_name_normalized)
210231

211232
captured = buf.getvalue()
212233
logs = captured.strip().split("\n")
213234

214235
assert len(logs) == 1
215236
assert "is not empty after uninstallation" in logs[0]
216237

217-
run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
238+
run(
239+
selenium_standalone_micropip,
240+
TEST_PACKAGE_NAME,
241+
TEST_PACKAGE_NAME_NORMALIZED,
242+
test_wheel_url,
243+
)
218244

219245

220246
def test_pyodide_repodata(selenium_standalone_micropip):

0 commit comments

Comments
 (0)