Skip to content

Commit 8d9caa3

Browse files
fix memory leak #59 (#62)
* fix memory leak #59 by limiting both _write_buffer of RequstHandler, as well as _write_buffer of IOStream * fix memory leak #59 set config=True and timeout for while loop * Add configuration documentation Co-authored-by: Frédéric Collonval <[email protected]>
1 parent ffa8a28 commit 8d9caa3

File tree

3 files changed

+86
-4
lines changed

3 files changed

+86
-4
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,27 @@ Features:
2020

2121
![jupyter-archive in action](https://raw.githubusercontent.com/jupyterlab-contrib/jupyter-archive/master/archive.gif)
2222

23+
## Configuration
24+
25+
The server extension has some [configuration settings](https://jupyter-server.readthedocs.io/en/latest/users/configuration.html) --
26+
the values below are the default one:
27+
28+
```json5
29+
{
30+
"JupyterArchive": {
31+
"stream_max_buffer_size": 104857600, // The max size of tornado IOStream buffer
32+
"handler_max_buffer_length": 10240, // The max length of chunks in tornado RequestHandler
33+
"archive_download_flush_delay": 100 // The delay in ms at which we send the chunk of data to the client.
34+
}
35+
}
36+
```
37+
38+
You can also set new values with the following environment variables:
39+
40+
- `JA_IOSTREAM_MAX_BUFFER_SIZE`
41+
- `JA_HANDLER_MAX_BUFFER_LENGTH`
42+
- `JA_ARCHIVE_DOWNLOAD_FLUSH_DELAY`
43+
2344
## Requirements
2445

2546
- JupyterLab >= 3.0

jupyter_archive/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import json
22
import pathlib
3+
import os
4+
5+
from traitlets.config import Configurable
6+
from traitlets import Int, default
37

48
from ._version import __version__
59
from .handlers import setup_handlers
@@ -14,6 +18,31 @@ def _jupyter_labextension_paths():
1418
return [{"src": "labextension", "dest": data["name"]}]
1519

1620

21+
class JupyterArchive(Configurable):
22+
stream_max_buffer_size = Int(help="The max size of tornado IOStream buffer",
23+
config=True)
24+
25+
@default("stream_max_buffer_size")
26+
def _default_stream_max_buffer_size(self):
27+
# 100 * 1024 * 1024 equals to 100M
28+
return int(os.environ.get("JA_IOSTREAM_MAX_BUFFER_SIZE", 100 * 1024 * 1024))
29+
30+
handler_max_buffer_length = Int(help="The max length of chunks in tornado RequestHandler",
31+
config=True)
32+
33+
@default("handler_max_buffer_length")
34+
def _default_handler_max_buffer_length(self):
35+
# if 8K for one chunk, 10240 * 8K equals to 80M
36+
return int(os.environ.get("JA_HANDLER_MAX_BUFFER_LENGTH", 10240))
37+
38+
archive_download_flush_delay = Int(help="The delay in ms at which we send the chunk of data to the client.",
39+
config=True)
40+
41+
@default("archive_download_flush_delay")
42+
def _default_archive_download_flush_delay(self):
43+
return int(os.environ.get("JA_ARCHIVE_DOWNLOAD_FLUSH_DELAY", 100))
44+
45+
1746
def _jupyter_server_extension_points():
1847
return [{"module": "jupyter_archive"}]
1948

@@ -26,6 +55,8 @@ def _load_jupyter_server_extension(server_app):
2655
server_app: jupyterlab.labapp.LabApp
2756
JupyterLab application instance
2857
"""
58+
config = JupyterArchive(config=server_app.config)
59+
server_app.web_app.settings["jupyter_archive"] = config
2960
setup_handlers(server_app.web_app)
3061

3162

jupyter_archive/handlers.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
import os
22
import pathlib
33
import tarfile
4+
import time
45
import zipfile
56

67
from jupyter_server.base.handlers import JupyterHandler
78
from jupyter_server.utils import url2path, url_path_join
89
from tornado import ioloop, web
910

10-
# The delay in ms at which we send the chunk of data
11-
# to the client.
12-
ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100
1311
SUPPORTED_FORMAT = [
1412
"zip",
1513
"tgz",
@@ -29,6 +27,18 @@ def __init__(self, handler):
2927
self.position = 0
3028

3129
def write(self, data):
30+
if self.handler.canceled:
31+
raise ValueError("File download canceled")
32+
# timeout 600s for this while loop
33+
time_out_cnt = 600 * 1000 / self.handler.archive_download_flush_delay
34+
while len(self.handler._write_buffer) > self.handler.handler_max_buffer_length:
35+
# write_buffer or handler is too large, wait for an flush cycle
36+
time.sleep(self.handler.archive_download_flush_delay / 1000)
37+
if self.handler.canceled:
38+
raise ValueError("File download canceled")
39+
time_out_cnt -= 1
40+
if time_out_cnt <= 0:
41+
raise ValueError("Time out for writing into tornado buffer")
3242
self.position += len(data)
3343
self.handler.write(data)
3444
del data
@@ -78,6 +88,26 @@ def make_reader(archive_path):
7888

7989

8090
class DownloadArchiveHandler(JupyterHandler):
91+
92+
@property
93+
def stream_max_buffer_size(self):
94+
return self.settings["jupyter_archive"].stream_max_buffer_size
95+
96+
@property
97+
def handler_max_buffer_length(self):
98+
return self.settings["jupyter_archive"].handler_max_buffer_length
99+
100+
@property
101+
def archive_download_flush_delay(self):
102+
return self.settings["jupyter_archive"].archive_download_flush_delay
103+
104+
def flush(self, include_footers=False):
105+
# skip flush when stream_buffer is larger than stream_max_buffer_size
106+
stream_buffer = self.request.connection.stream._write_buffer
107+
if stream_buffer and len(stream_buffer) > self.stream_max_buffer_size:
108+
return
109+
return super(DownloadArchiveHandler, self).flush(include_footers)
110+
81111
@web.authenticated
82112
async def get(self, archive_path, include_body=False):
83113

@@ -120,7 +150,7 @@ async def get(self, archive_path, include_body=False):
120150
self.set_header("content-disposition", "attachment; filename={}".format(archive_filename))
121151

122152
self.canceled = False
123-
self.flush_cb = ioloop.PeriodicCallback(self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY)
153+
self.flush_cb = ioloop.PeriodicCallback(self.flush, self.archive_download_flush_delay)
124154
self.flush_cb.start()
125155

126156
args = (

0 commit comments

Comments
 (0)