-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Big objects #51
base: main
Are you sure you want to change the base?
Big objects #51
Changes from all commits
1feaba8
5434267
3eab9f6
68707c0
d402290
706c6d2
bcb94fb
6f59034
5c53cd1
6d6e0a8
4014f58
ef106b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import pytest | ||
import logging | ||
from utils.utils import create_big_file, convert_unit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. acho que convert_unit não é usada neste arquivo |
||
from utils.crud import fixture_bucket_with_name, fixture_upload_multipart_file | ||
from boto3.s3.transfer import TransferConfig | ||
import uuid | ||
from tqdm import tqdm | ||
import os | ||
|
||
|
||
size_list = [ | ||
{'size': 10, 'unit': 'mb'}, | ||
{'size': 100, 'unit': 'mb'}, | ||
{'size': 1, 'unit': 'gb'}, | ||
{'size': 5, 'unit': 'gb'}, | ||
{'size': 10, 'unit': 'gb'}, | ||
] | ||
|
||
ids_list = [f"{s['size']}{s['unit']}" for s in size_list] | ||
|
||
upload_params = [ | ||
{ | ||
'file_path': f"./tmp_files/big_file_download{size['size']}{size['unit']}", | ||
'file_size': size, | ||
'object_key': "big-object-" + uuid.uuid4().hex[:6], | ||
} | ||
for size in size_list | ||
] | ||
|
||
@pytest.mark.parametrize( | ||
'params, fixture_upload_multipart_file', | ||
[(p, p) for p in upload_params], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. voce passa os mesmos parametros tanto para a fixture (indirect) quanto para o teste, que usa a fixture. Será que não daria para usar só a fixture? Faz ela retornar as 3 coisas (path, size, key) ao inves de somente o size |
||
ids=ids_list, | ||
indirect=['fixture_upload_multipart_file'] | ||
) | ||
|
||
# ## Test multipart download while implicitly tests the upload and delete of big objects | ||
|
||
@pytest.mark.slow | ||
@pytest.mark.big_objects | ||
def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_multipart_file, params): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vamos fazer este teste ser executável via jupyter notebook? acho que da para pegar o exemplo de como estão outros, mas posso te mostrar na reunião também |
||
""" | ||
Test to download a big object to an S3 bucket using multipart download | ||
:param s3_client: fixture of boto3 s3 client | ||
:param fixture_bucket_with_name: fixture to create a bucket with a unique name | ||
:param params: dict: 'file_path': str, 'file_size': dict, 'object_key': str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nao esquece de ajustar aqui caso aceite a sugestão anterior |
||
:return: None | ||
""" | ||
|
||
# Unpacking params | ||
file_path = params.get('file_path') | ||
download_path = file_path + "_downloaded" | ||
object_key = params.get('object_key') | ||
|
||
bucket_name = fixture_bucket_with_name | ||
total_size = create_big_file(file_path, params.get('file_size')) | ||
|
||
|
||
# Config for multhreading of boto3 building multipart upload/download | ||
config = TransferConfig( | ||
multipart_threshold=40 * 1024 * 1024, | ||
max_concurrency=10, | ||
multipart_chunksize=8 * 1024 * 1024, | ||
use_threads=True | ||
) | ||
|
||
# Uploading the big file | ||
uploaded_file_size = fixture_upload_multipart_file | ||
|
||
|
||
# Test download file from s3 bucket | ||
try: | ||
# Graphing the download progress | ||
with tqdm(total=total_size, | ||
desc=bucket_name, | ||
bar_format="Download| {percentage:.1f}%|{bar:25} | {rate_fmt} | Time: {elapsed} | {desc}", | ||
unit='B', | ||
unit_scale=True, unit_divisor=1024) as pbar: | ||
|
||
s3_client.download_file(Bucket=bucket_name, Key=object_key, Filename = download_path, Config=config, Callback=pbar.update) | ||
|
||
# Retrieving sizes | ||
downloaded_file_size = os.path.getsize(download_path) | ||
|
||
# The test was successful only if the size on the bucket size is equal to the ones uploaded and downloaded | ||
assert downloaded_file_size == uploaded_file_size, f"Downloaded size doesn't match: {downloaded_file_size} with Upload size: {uploaded_file_size}" | ||
except Exception as e: | ||
logging.error(f"Error uploading object {object_key}: {e}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. acho que é downloading nesta msg né? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
import logging | ||
import pytest | ||
from concurrent.futures import ThreadPoolExecutor, as_completed | ||
from utils.utils import generate_valid_bucket_name | ||
from utils.utils import generate_valid_bucket_name, convert_unit | ||
from boto3.s3.transfer import TransferConfig | ||
import os | ||
from tqdm import tqdm | ||
|
||
### Functions | ||
|
||
|
@@ -222,6 +224,7 @@ def fixture_bucket_with_name(s3_client, request): | |
# This fixtures automatically creates a bucket based on the name of the test that called it and then returns its name | ||
# Lastly, teardown the bucket by deleting it and its objects | ||
|
||
# request.node get the name of the test currently running | ||
bucket_name = generate_valid_bucket_name(request.node.name.replace("_", "-")) | ||
create_bucket(s3_client, bucket_name) | ||
|
||
|
@@ -249,3 +252,39 @@ def fixture_upload_multiple_objects(s3_client, fixture_bucket_with_name, request | |
objects_names = [{"key": f"multiple-object'-{i}", "path": path} for i in range(qnt)] | ||
return upload_objects_multithreaded(s3_client, fixture_bucket_with_name, objects_names) | ||
|
||
@pytest.fixture | ||
def fixture_upload_multipart_file(s3_client, fixture_bucket_with_name, request) -> int: | ||
""" | ||
Uploads a big file into multiple chunks to s3 bucket | ||
:param s3_client: boto3 s3 client | ||
:param fixture_bucket_with_name: pytest.fixture which setup and tears down bucket | ||
:param request: dict: contains file_path, file_size and object_key | ||
:return int: size in bytes of the obejct | ||
""" | ||
bucket_name = fixture_bucket_with_name | ||
file_path = request.param.get('file_path') | ||
file_size = convert_unit(request.param.get('file_size')) | ||
object_key = request.param.get('object_key') | ||
|
||
# Config for multhreading of boto3 building multipart upload/download | ||
config = TransferConfig( | ||
multipart_threshold=8 * 1024 * 1024, # Minimum size to start multipart upload | ||
max_concurrency=10, | ||
multipart_chunksize=8 * 1024 * 1024, | ||
use_threads=True | ||
) | ||
|
||
# Upload Progress Bar with time stamp | ||
with tqdm(total=file_size, | ||
desc=bucket_name, | ||
bar_format="Upload| {percentage:.1f}%|{bar:25}| {rate_fmt} | Time: {elapsed} | {desc}", | ||
unit='B', | ||
unit_scale=True, unit_divisor=1024) as pbar: | ||
|
||
response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) | ||
elapsed = pbar.format_dict['elapsed'] | ||
|
||
# Checking if the object was uploaded | ||
object_size = s3_client.get_object(Bucket=bucket_name, Key=object_key).get('ContentLength', 0) | ||
|
||
return object_size #return int of size in bytes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aqui o que eu comentei de retornar os 3 dados |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
o que vc acha de usar o path
/tmp
do próprio linux? ao invés de uma pasta local no projeto.