Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,5 @@ data/
.DS_Store

# vscode settings
.vscode/
.vscode/
.idea
54 changes: 44 additions & 10 deletions python/nwm_client/src/hydrotools/nwm_client/FileDownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
import aiohttp
import aiofiles
from pathlib import Path
from typing import List, Tuple, Union
from typing import List, Tuple, Union, Final, Optional
import warnings
from http import HTTPStatus


DEFAULT_TIMEOUT_SECONDS: Final[int] = 900

class FileDownloader:
"""Provides a convenient interface to download a list of files
asynchronously using HTTP.
Expand All @@ -27,8 +30,9 @@ def __init__(
output_directory: Union[str, Path] = Path("."),
create_directory: bool = False,
ssl_context: ssl.SSLContext = ssl.create_default_context(),
limit: int = 10
) -> None:
limit: int = 10,
timeout: int = DEFAULT_TIMEOUT_SECONDS
) -> None:
"""Initialize File Downloader object with specified output directory.

Parameters
Expand All @@ -42,6 +46,8 @@ def __init__(
SSL configuration context.
limit: int, optional, default 10
Number of simultaneous connections.
timeout: int, optional
The default number of seconds to wait for each network call

Returns
-------
Expand All @@ -59,12 +65,16 @@ def __init__(
# Set limit
self.limit = limit

# Set timeout
self.timeout: int = timeout

async def get_file(
self,
url: str,
filename: str,
session: aiohttp.ClientSession
) -> None:
session: aiohttp.ClientSession,
timeout: Optional[int] = None
) -> None:
"""Download a single file.

Parameters
Expand All @@ -76,13 +86,18 @@ async def get_file(
to self.output_directory/filename
session: aiohttp.ClientSession, required
Session object used for retrieval.
timeout: int, optional
The number of seconds to wait for a network call

Returns
-------
None
"""
if not timeout:
timeout = self.timeout

# Retrieve a single file
async with session.get(url, ssl=self.ssl_context, timeout=900) as response:
async with session.get(url, ssl=self.ssl_context, timeout=timeout) as response:
# Warn if unable to locate file
if response.status != HTTPStatus.OK:
status = HTTPStatus(response.status)
Expand All @@ -106,7 +121,11 @@ async def get_file(
break
await fo.write(chunk)

async def get_files(self, src_dst_list: List[Tuple[str,str]]) -> None:
async def get_files(
self,
src_dst_list: List[Tuple[str,str]],
timeout: Optional[int] = None
) -> None:
"""Asynchronously download multiple files.

Parameters
Expand All @@ -115,17 +134,27 @@ async def get_files(self, src_dst_list: List[Tuple[str,str]]) -> None:
List of tuples containing two strings. The first string is the
source URL from which to retrieve a file, the second string is the
local filename where the file will be saved.
timeout: int, Optional
The number of seconds to wait on each network call

Returns
-------
None
"""
if not timeout:
timeout = self.timeout

# Retrieve each file
connector = aiohttp.TCPConnector(limit=self.limit)
async with aiohttp.ClientSession(connector=connector) as session:
await asyncio.gather(*[self.get_file(url, filename, session) for url, filename in src_dst_list])
await asyncio.gather(*[self.get_file(url, filename, session, timeout) for url, filename in src_dst_list])

def get(self, src_dst_list: List[Tuple[str,str]], overwrite: bool = False) -> None:
def get(
self,
src_dst_list: List[Tuple[str,str]],
overwrite: bool = False,
timeout: Optional[int] = None
) -> None:
"""Setup event loop and asynchronously download multiple files. If
self.create_directory is True, an output directory will be
created if needed.
Expand All @@ -139,6 +168,8 @@ def get(self, src_dst_list: List[Tuple[str,str]], overwrite: bool = False) -> No
overwrite: bool, optional, default False
If True will overwrite destination file, if it exists. If False,
download of this file is skipped.
timeout: int, optional
The number of seconds to wait for network calls

Returns
-------
Expand All @@ -154,6 +185,9 @@ def get(self, src_dst_list: List[Tuple[str,str]], overwrite: bool = False) -> No
>>> [("https://pandas.pydata.org/docs/user_guide/index.html","index.html")]
>>> )
"""
if not timeout:
timeout = self.timeout

# Shorten list to files that do not exist
if not overwrite:
short = []
Expand All @@ -174,7 +208,7 @@ def get(self, src_dst_list: List[Tuple[str,str]], overwrite: bool = False) -> No
raise FileNotFoundError(message)

# Start event loop to retrieve files
asyncio.run(self.get_files(src_dst_list))
asyncio.run(self.get_files(src_dst_list, timeout=timeout))

@property
def output_directory(self) -> Path:
Expand Down
2 changes: 1 addition & 1 deletion python/nwm_client/src/hydrotools/nwm_client/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "9.1.1"
__version__ = "9.2.0"
Loading