Skip to content

Commit d8bab7f

Browse files
committed
Initial commit
0 parents  commit d8bab7f

File tree

11 files changed

+356
-0
lines changed

11 files changed

+356
-0
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
venv/
2+
__pycache__/
3+
.idea/
4+
*.egg-info/

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 Laurie Opperman
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# PyPI mirror
2+
Local PyPI mirror cache
3+
4+
## Installation
5+
```bash
6+
pip install pypi-mirror
7+
```
8+
9+
## Usage
10+
```bash
11+
FLASK_APP=pypi_mirror flask run
12+
```
13+
14+
```bash
15+
pip install --index-url http://127.0.0.1:5000/index/ simplejson
16+
```
17+
18+
### Environment variables
19+
* `PIP_INDEX_URL`: root index URL, default: https://pypi.org/simple/
20+
* `INDEX_TTL`: root index time-to-live (aka cache time-out) in seconds, default: 30
21+
minutes
22+
* `PIP_EXTRA_INDEX_URL`: extra index URLs (white-space separated)
23+
* `EXTRA_INDEX_TTL`: corresponding extra index times-to-live in seconds (white-space
24+
separated), default: 5 minutes

setup.cfg

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[metadata]
2+
name = pypi-mirror
3+
version = 0.1.0a0.dev0
4+
url = https://github.com/EpicWink/pypi-mirror
5+
author = Laurie O
6+
author_email = [email protected]
7+
license = MIT
8+
description = Local PyPI mirror cache
9+
long_description = file: README.md
10+
long_description_content_type = text/markdown
11+
keywords = pypi, index, mirror, cache
12+
classifiers =
13+
Environment :: Console
14+
Intended Audience :: Developers
15+
Programming Language :: Python :: 3 :: Only
16+
Natural Language :: English
17+
Operating System :: POSIX :: Linux
18+
Operating System :: Microsoft :: Windows
19+
20+
[options]
21+
install_requires =
22+
flask
23+
requests
24+
packages = find:
25+
package_dir =
26+
=src
27+
28+
[options.packages.find]
29+
package_dir = src
30+
31+
[options.package_data]
32+
pypi_mirror = templates/*.html

setup.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Install script for ``pypi-mirror``."""
2+
3+
import setuptools
4+
5+
setuptools.setup()

src/pypi_mirror/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Local PyPI mirror cache."""
2+
3+
from . import routes
4+
from .config import app

src/pypi_mirror/config.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""Local PyPI mirror cache."""
2+
3+
import os
4+
5+
import flask
6+
import jinja2
7+
8+
INDEX_URL = os.environ.get("PIP_INDEX_URL", "https://pypi.org/simple/")
9+
EXTRA_INDEX_URL = os.environ.get("PIP_EXTRA_INDEX_URL", "")
10+
INDEX_TTL = os.environ.get("INDEX_TTL", 1800)
11+
EXTRA_INDEX_TTL = os.environ.get(
12+
"EXTRA_INDEX_TTL", " ".join("180" for s in EXTRA_INDEX_URL.split() if s)
13+
)
14+
15+
app = flask.Flask("pypi_mirror")
16+
app.jinja_loader = jinja2.PackageLoader("pypi_mirror")

src/pypi_mirror/data.py

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
"""Local PyPI mirror cache."""
2+
3+
import os
4+
import re
5+
import time
6+
import shutil
7+
import tempfile
8+
import threading
9+
import collections
10+
import typing as t
11+
from urllib import parse as urllib_parse
12+
13+
import bs4
14+
import requests
15+
16+
from . import config
17+
18+
_sha_fragment_re = re.compile("[#&]sha256=([^&]*)")
19+
File = collections.namedtuple("File", ("name", "url", "sha"))
20+
21+
22+
class NotFound(ValueError):
23+
pass
24+
25+
26+
class _IndexCache:
27+
def __init__(self, index_url: str, ttl: int):
28+
self.index_url = index_url
29+
self.ttl = ttl
30+
self._package_dir = tempfile.mkdtemp()
31+
self._index_t = None
32+
self._packages_t = {}
33+
self._files_t = {}
34+
self._index = {}
35+
self._packages = {}
36+
self._files = {}
37+
38+
def __del__(self):
39+
if os.path.isdir(self._package_dir):
40+
shutil.rmtree(self._package_dir)
41+
42+
def _list_packages(self):
43+
if self._index_t is not None and (time.monotonic() - self._index_t) < self.ttl:
44+
return
45+
46+
response = requests.get(self.index_url)
47+
self._index_t = time.monotonic()
48+
49+
soup = bs4.BeautifulSoup(response.text)
50+
for link in soup.find_all("a"):
51+
self._index[link.string] = link["href"]
52+
53+
def list_packages(self) -> t.Iterable[str]:
54+
self._list_packages()
55+
return tuple(self._index)
56+
57+
def _list_files(self, package_name: str):
58+
packages_t = self._packages_t.get(package_name)
59+
if packages_t is not None and (time.monotonic() - packages_t) < self.ttl:
60+
return
61+
62+
self._list_packages()
63+
if package_name not in self._index:
64+
raise NotFound(package_name)
65+
66+
package_url = self._index[package_name]
67+
url = urllib_parse.urljoin(self.index_url, package_url)
68+
response = requests.get(url)
69+
self._packages_t[package_name] = time.monotonic()
70+
71+
soup = bs4.BeautifulSoup(response.text)
72+
self._packages.setdefault(package_name, {})
73+
for link in soup.find_all("a"):
74+
name = link.string
75+
url = link["href"]
76+
match = _sha_fragment_re.search(url)
77+
sha = match.group(1) if match else None
78+
self._packages[package_name][name] = File(name, url, sha)
79+
80+
def list_files(self, package_name: str) -> t.Iterable[File]:
81+
self._list_files(package_name)
82+
return tuple(self._packages[package_name].values())
83+
84+
@staticmethod
85+
def _download_file(
86+
url: str,
87+
path: str,
88+
get_callback: t.Callable[[], t.Any],
89+
done_callback: t.Callable[[], t.Any],
90+
):
91+
response = requests.get(url, stream=True)
92+
get_callback()
93+
with open(path, "wb") as f:
94+
for chunk in response.iter_content(None):
95+
f.write(chunk)
96+
done_callback()
97+
98+
def _get_file(self, package_name: str, file_name: str):
99+
files_t = self._files_t.get(package_name, {}).get(file_name)
100+
if files_t is not None and (time.monotonic() - files_t) < self.ttl:
101+
return
102+
103+
self._list_files(package_name)
104+
if file_name not in self._packages[package_name]:
105+
raise NotFound(file_name)
106+
107+
path = os.path.join(self._package_dir, package_name + "_" + file_name)
108+
url = self._packages[package_name][file_name].url
109+
110+
def get_callback():
111+
self._files_t.setdefault(package_name, {})[file_name] = time.monotonic()
112+
113+
def done_callback():
114+
package_files[file_name] = path
115+
116+
package_files = self._files.setdefault(package_name, {})
117+
if isinstance(package_files.get(file_name), threading.Thread):
118+
package_files[file_name].join(0.9)
119+
time.sleep(0.01) # give control to original master
120+
return
121+
122+
thread = threading.Thread(
123+
target=self._download_file, args=(url, path, get_callback, done_callback)
124+
)
125+
package_files[file_name] = thread
126+
thread.start()
127+
thread.join(0.9)
128+
if thread.is_alive():
129+
self._files[package_name][file_name] = url
130+
131+
def get_file(self, package_name: str, file_name: str) -> str:
132+
self._get_file(package_name, file_name)
133+
return self._files[package_name][file_name]
134+
135+
136+
class Cache:
137+
_index_cache_cls = _IndexCache
138+
139+
def __init__(
140+
self, root_cache: _IndexCache, extra_caches: t.List[_IndexCache] = None
141+
):
142+
self.root_cache = root_cache
143+
self.extra_caches = extra_caches or []
144+
self._packages = {}
145+
self._list_dt = None
146+
self._package_list_dt = {}
147+
148+
@classmethod
149+
def from_config(cls):
150+
root_cache = cls._index_cache_cls(config.INDEX_URL, int(config.INDEX_TTL))
151+
extra_index_urls = [s for s in config.EXTRA_INDEX_URL.split() if s]
152+
extra_ttls = [int(s) for s in config.EXTRA_INDEX_TTL.split() if s]
153+
assert len(extra_index_urls) == len(extra_ttls)
154+
extra_caches = [
155+
cls._index_cache_cls(url, ttl)
156+
for url, ttl in zip(extra_index_urls, extra_ttls)
157+
]
158+
return cls(root_cache, extra_caches=extra_caches)
159+
160+
def list_packages(self) -> t.Iterable[str]:
161+
packages = set(self.root_cache.list_packages())
162+
for cache in self.extra_caches:
163+
packages.update(cache.list_packages())
164+
return sorted(packages)
165+
166+
def list_files(self, package_name: str) -> t.Iterable[File]:
167+
try:
168+
return self.root_cache.list_files(package_name)
169+
except NotFound as e:
170+
exc = e
171+
for cache in self.extra_caches:
172+
try:
173+
return cache.list_files(package_name)
174+
except NotFound:
175+
pass
176+
raise exc
177+
178+
def get_file(self, package_name: str, file_name: str) -> str:
179+
try:
180+
return self.root_cache.get_file(package_name, file_name)
181+
except NotFound as e:
182+
exc = e
183+
for cache in self.extra_caches:
184+
try:
185+
return cache.get_file(package_name, file_name)
186+
except NotFound:
187+
pass
188+
raise exc

src/pypi_mirror/routes.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Local PyPI mirror cache."""
2+
3+
from urllib import parse as urllib_parse
4+
5+
import flask
6+
import collections
7+
8+
from . import config
9+
from . import data
10+
11+
cache = data.Cache.from_config()
12+
cache.list_packages()
13+
Item = collections.namedtuple("Item", ("name", "url"))
14+
15+
16+
@config.app.route("/index/")
17+
def list_packages():
18+
package_names = cache.list_packages()
19+
packages = [Item(n, f"/index/{n}/") for n in package_names]
20+
return flask.render_template("packages.html", packages=packages)
21+
22+
23+
@config.app.route("/index/<package_name>/")
24+
def list_files(package_name: str):
25+
files = cache.list_files(package_name)
26+
files = [
27+
Item(f.name, f"/index/{package_name}/{f.name}#sha256={f.sha}") for f in files
28+
]
29+
return flask.render_template("files.html", package_name=package_name, files=files)
30+
31+
32+
@config.app.route("/index/<package_name>/<file_name>")
33+
def get_file(package_name: str, file_name: str):
34+
path = cache.get_file(package_name, file_name)
35+
scheme = urllib_parse.urlparse(path).scheme
36+
if scheme and scheme != "file":
37+
return flask.redirect(path)
38+
return flask.send_file(path)

src/pypi_mirror/templates/files.html

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>{{ package_name }}</title>
6+
</head>
7+
<body>
8+
{%- for file in files %}
9+
<a href="{{ file.url }}">{{ file.name }}</a><br>
10+
{%- endfor %}
11+
</body>
12+
</html>
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Index</title>
6+
</head>
7+
<body>
8+
{%- for package in packages %}
9+
<a href="{{ package.url }}">{{ package.name }}</a><br>
10+
{%- endfor %}
11+
</body>
12+
</html>

0 commit comments

Comments
 (0)