From 1feaba8e2730d88a31a7ae75f1a394c505bd41bf Mon Sep 17 00:00:00 2001 From: luis Date: Fri, 17 Jan 2025 13:50:30 -0300 Subject: [PATCH 01/12] initial commit --- docs/big_objects_test.py | 27 +++++++++++++++++++++++++++ docs/utils/utils.py | 36 +++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 docs/big_objects_test.py diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py new file mode 100644 index 0000000..ecbfd8f --- /dev/null +++ b/docs/big_objects_test.py @@ -0,0 +1,27 @@ +import pytest +import logging +from utils.utils import create_big_file +from utils.crud import (upload_objects_multithreaded, + bucket_with_name) + + + +@pytest.mark.turtle +@pytest.mark.big_objects +def multipart_upload(s3_client, bucket_with_name): + """ + Test to upload a big object to an S3 bucket using multipart upload + :param s3_client: fixture of boto3 s3 client + :param bucket_with_name: fixture to create a bucket with a unique name + :return: None + """ + + bucket_name = bucket_with_name + object_key = "big-object" + file_path = "/tmp/big_file" + create_big_file(file_path, size=100, unit='MB') + + try: + response = multipart_upload(s3_client, bucket_name, object_key, file_path) + except Exception as e: + logging.error(f"Error uploading object {object_key}: {e}") \ No newline at end of file diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 783a51a..6dfae84 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -1,4 +1,5 @@ import uuid +import os # Function is responsible to check and format bucket names into valid ones @@ -25,4 +26,37 @@ def generate_valid_bucket_name(base_name="my-unique-bucket"): new_name.append(char) - return "".join(new_name) \ No newline at end of file + return "".join(new_name) + +# Function which will be using to create mock files with different sizes + +def create_big_file(file_path, size = 1, unit='MB'): + """ + Create a big file with the specified size in the specified path + :param file_path: str: path to the file to be created + :param size: int: size of the file to be created + :param unit: str: unit of the size, default is MB + :return: None + """ + + size = 1024 + + units = { + 'kb': 1024, + 'mb': 1024 * 1024, + 'mb': 1024 * 1024 * 1024, + } + + if unit.lower() not in units: + raise Exception(f"Invalid unit: {unit}") + + # Creating a file of size * unit + size = size * units[unit.lower()] + with open(file_path, 'wb') as file: + file.write(b'a' * size) + + # yielding to the calling function + try: + yield file_path + finally: + os.remove(file_path) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1595151..305714d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ markers = [ "bucket_versioning: Bucket Versioning", "cli: Tests using CLI", "multiple_objects: Tests involving operations with multiple objects on the same bucket", + "big_objects": Tests with files bigger than 5 mb, "rapid: quick expected execution magnitude", "regular: regular time expected execution magnitude", "slow: slow expected execution magnitude", From 54342673972119c6fe8f1b3bd1c642f079de868b Mon Sep 17 00:00:00 2001 From: luis Date: Fri, 17 Jan 2025 16:13:52 -0300 Subject: [PATCH 02/12] changed utils --- docs/big_objects_test.py | 29 +++++++++++++++++++++-------- docs/utils/utils.py | 4 ++-- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index ecbfd8f..fbe1167 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -1,9 +1,9 @@ import pytest import logging -from utils.utils import create_big_file -from utils.crud import (upload_objects_multithreaded, - bucket_with_name) - +from utils.utils import (create_big_file) +from utils.crud import bucket_with_name +from boto3.s3.transfer import TransferConfig +import uuid @pytest.mark.turtle @@ -15,13 +15,26 @@ def multipart_upload(s3_client, bucket_with_name): :param bucket_with_name: fixture to create a bucket with a unique name :return: None """ + file_path = ".../bin/big_file" + bucket_name = bucket_with_name + + size = create_big_file(file_path, size=100, unit='MB') + + object_key = "big_object" + size + uuid.uuid4().hex[:6] + + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + max_concurrency=10, + multipart_chunksize=8 * 1024 * 1024, + use_threads=True + ) bucket_name = bucket_with_name - object_key = "big-object" - file_path = "/tmp/big_file" create_big_file(file_path, size=100, unit='MB') try: - response = multipart_upload(s3_client, bucket_name, object_key, file_path) + response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config) except Exception as e: - logging.error(f"Error uploading object {object_key}: {e}") \ No newline at end of file + logging.error(f"Error uploading object {object_key}: {e}") + + assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" \ No newline at end of file diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 6dfae84..86b64e5 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -36,7 +36,7 @@ def create_big_file(file_path, size = 1, unit='MB'): :param file_path: str: path to the file to be created :param size: int: size of the file to be created :param unit: str: unit of the size, default is MB - :return: None + :return: int: size of the file created """ size = 1024 @@ -57,6 +57,6 @@ def create_big_file(file_path, size = 1, unit='MB'): # yielding to the calling function try: - yield file_path + yield size finally: os.remove(file_path) \ No newline at end of file From 3eab9f699952e018bf4383b80208fa8bc5bfbcb9 Mon Sep 17 00:00:00 2001 From: luis Date: Mon, 20 Jan 2025 12:47:46 -0300 Subject: [PATCH 03/12] added multipart download test --- docs/big_objects_test.py | 47 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index fbe1167..8c0d51a 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -8,7 +8,7 @@ @pytest.mark.turtle @pytest.mark.big_objects -def multipart_upload(s3_client, bucket_with_name): +def test_multipart_upload(s3_client, bucket_with_name): """ Test to upload a big object to an S3 bucket using multipart upload :param s3_client: fixture of boto3 s3 client @@ -19,9 +19,12 @@ def multipart_upload(s3_client, bucket_with_name): bucket_name = bucket_with_name size = create_big_file(file_path, size=100, unit='MB') - object_key = "big_object" + size + uuid.uuid4().hex[:6] + bucket_name = bucket_with_name + + create_big_file(file_path, size=100, unit='MB') + # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( multipart_threshold=8 * 1024 * 1024, max_concurrency=10, @@ -29,12 +32,50 @@ def multipart_upload(s3_client, bucket_with_name): use_threads=True ) + try: + response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config) + except Exception as e: + logging.error(f"Error uploading object {object_key}: {e}") + + assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" + + +def test_multipart_download(s3_client, bucket_with_name): + """ + Test to download a big object to an S3 bucket using multipart download + :param s3_client: fixture of boto3 s3 client + :param bucket_with_name: fixture to create a bucket with a unique name + :return: None + """ + file_path = ".../bin/big_file" + bucket_name = bucket_with_name + + size = create_big_file(file_path, size=100, unit='MB') + object_key = "big_object" + size + uuid.uuid4().hex[:6] + + bucket_name = bucket_with_name create_big_file(file_path, size=100, unit='MB') + # Config for multhreading of boto3 building multipart upload/download + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + max_concurrency=10, + multipart_chunksize=8 * 1024 * 1024, + use_threads=True + ) + + # upload object to s3 try: response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config) except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") - assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" \ No newline at end of file + # Test download file from s3 bucket + try: + response = s3_client.download_file(file_path, bucket_name, object_key, Config=config) + except Exception as e: + logging.error(f"Error uploading object {object_key}: {e}") + + assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" + \ No newline at end of file From 68707c00124a4a6afcdad9bf62b5c2ec882eea2c Mon Sep 17 00:00:00 2001 From: luis Date: Mon, 20 Jan 2025 14:32:16 -0300 Subject: [PATCH 04/12] added parametrization to the big objects tests --- docs/big_objects_test.py | 30 ++++++++++++++++++++++++++---- docs/utils/crud.py | 1 + docs/utils/utils.py | 11 +++++------ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index 8c0d51a..b2bc832 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -5,14 +5,27 @@ from boto3.s3.transfer import TransferConfig import uuid +size_list = [{'size': 10, 'unit': 'mb'}, + {'size': 100, 'unit': 'mb'}, + {'size': 1, 'unit': 'gb'}, + {'size': 5, 'unit': 'gb'}, + {'size': 10, 'unit': 'gb'}, +] + +@pytest.mark.parametrize( + 'size', + (s for s in size_list), + ids=[f"{s['size']}{s['unit']}" for s in size_list] +) @pytest.mark.turtle @pytest.mark.big_objects -def test_multipart_upload(s3_client, bucket_with_name): +def test_multipart_upload(s3_client, bucket_with_name, size): """ Test to upload a big object to an S3 bucket using multipart upload :param s3_client: fixture of boto3 s3 client :param bucket_with_name: fixture to create a bucket with a unique name + :param size: dict: value containing an int size and a string unit :return: None """ file_path = ".../bin/big_file" @@ -39,23 +52,32 @@ def test_multipart_upload(s3_client, bucket_with_name): assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" +@pytest.mark.parametrize( + 'size', + (s for s in size_list), + ids=[f"{s['size']}{s['unit']}" for s in size_list] +) + -def test_multipart_download(s3_client, bucket_with_name): +@pytest.mark.turtle +@pytest.mark.big_objects +def test_multipart_download(s3_client, bucket_with_name, size): """ Test to download a big object to an S3 bucket using multipart download :param s3_client: fixture of boto3 s3 client :param bucket_with_name: fixture to create a bucket with a unique name + :param size: dict: value containing an int size and a string unit :return: None """ file_path = ".../bin/big_file" bucket_name = bucket_with_name - size = create_big_file(file_path, size=100, unit='MB') + size = create_big_file(file_path, size) object_key = "big_object" + size + uuid.uuid4().hex[:6] bucket_name = bucket_with_name - create_big_file(file_path, size=100, unit='MB') + create_big_file(file_path, size) # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( diff --git a/docs/utils/crud.py b/docs/utils/crud.py index bc75d6b..37781a7 100644 --- a/docs/utils/crud.py +++ b/docs/utils/crud.py @@ -222,6 +222,7 @@ def fixture_bucket_with_name(s3_client, request): # This fixtures automatically creates a bucket based on the name of the test that called it and then returns its name # Lastly, teardown the bucket by deleting it and its objects + # request.node get the name of the test currently running bucket_name = generate_valid_bucket_name(request.node.name.replace("_", "-")) create_bucket(s3_client, bucket_name) diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 86b64e5..4155762 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -30,12 +30,11 @@ def generate_valid_bucket_name(base_name="my-unique-bucket"): # Function which will be using to create mock files with different sizes -def create_big_file(file_path, size = 1, unit='MB'): +def create_big_file(file_path, size = {'size': 100, 'unit': 'mb'}): """ Create a big file with the specified size in the specified path :param file_path: str: path to the file to be created - :param size: int: size of the file to be created - :param unit: str: unit of the size, default is MB + :param size: dict: value containing the an int sie and a stirng unit :return: int: size of the file created """ @@ -47,11 +46,11 @@ def create_big_file(file_path, size = 1, unit='MB'): 'mb': 1024 * 1024 * 1024, } - if unit.lower() not in units: - raise Exception(f"Invalid unit: {unit}") + if size['size'].lower() not in units: + raise Exception(f"Invalid unit: {size['unit']}") # Creating a file of size * unit - size = size * units[unit.lower()] + size = size * units[size['unit'].lower()] with open(file_path, 'wb') as file: file.write(b'a' * size) From d40229017ccd05eb7a362fafb09a512f51125a96 Mon Sep 17 00:00:00 2001 From: luis Date: Mon, 20 Jan 2025 23:06:20 -0300 Subject: [PATCH 05/12] trying to create big files --- docs/big_objects_test.py | 118 +++++++++++++++++++-------------------- docs/utils/utils.py | 44 ++++++++++++--- 2 files changed, 91 insertions(+), 71 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index b2bc832..d32c6e4 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -6,15 +6,15 @@ import uuid size_list = [{'size': 10, 'unit': 'mb'}, - {'size': 100, 'unit': 'mb'}, - {'size': 1, 'unit': 'gb'}, - {'size': 5, 'unit': 'gb'}, - {'size': 10, 'unit': 'gb'}, + # {'size': 100, 'unit': 'mb'}, +# {'size': 1, 'unit': 'gb'}, +# {'size': 5, 'unit': 'gb'}, +# {'size': 10, 'unit': 'gb'}, ] @pytest.mark.parametrize( 'size', - (s for s in size_list), + [s for s in size_list], ids=[f"{s['size']}{s['unit']}" for s in size_list] ) @@ -28,57 +28,12 @@ def test_multipart_upload(s3_client, bucket_with_name, size): :param size: dict: value containing an int size and a string unit :return: None """ - file_path = ".../bin/big_file" bucket_name = bucket_with_name + big_file = next(create_big_file(size)) + logging.info(f"Uploading {big_file} to bucket {bucket_name}") - size = create_big_file(file_path, size=100, unit='MB') - object_key = "big_object" + size + uuid.uuid4().hex[:6] - bucket_name = bucket_with_name + object_key = "big_object" + uuid.uuid4().hex[:6] - create_big_file(file_path, size=100, unit='MB') - - # Config for multhreading of boto3 building multipart upload/download - config = TransferConfig( - multipart_threshold=8 * 1024 * 1024, - max_concurrency=10, - multipart_chunksize=8 * 1024 * 1024, - use_threads=True - ) - - try: - response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config) - except Exception as e: - logging.error(f"Error uploading object {object_key}: {e}") - - assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" - -@pytest.mark.parametrize( - 'size', - (s for s in size_list), - ids=[f"{s['size']}{s['unit']}" for s in size_list] -) - - -@pytest.mark.turtle -@pytest.mark.big_objects -def test_multipart_download(s3_client, bucket_with_name, size): - """ - Test to download a big object to an S3 bucket using multipart download - :param s3_client: fixture of boto3 s3 client - :param bucket_with_name: fixture to create a bucket with a unique name - :param size: dict: value containing an int size and a string unit - :return: None - """ - file_path = ".../bin/big_file" - bucket_name = bucket_with_name - - size = create_big_file(file_path, size) - object_key = "big_object" + size + uuid.uuid4().hex[:6] - - - bucket_name = bucket_with_name - create_big_file(file_path, size) - # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( multipart_threshold=8 * 1024 * 1024, @@ -87,17 +42,56 @@ def test_multipart_download(s3_client, bucket_with_name, size): use_threads=True ) - # upload object to s3 try: - response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config) + response = s3_client.upload_fileobj(big_file, bucket_name, object_key, Config=config) + assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") - # Test download file from s3 bucket - try: - response = s3_client.download_file(file_path, bucket_name, object_key, Config=config) - except Exception as e: - logging.error(f"Error uploading object {object_key}: {e}") - assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" - \ No newline at end of file +#@pytest.mark.parametrize( +# 'size', +# [s for s in size_list], +# ids=[f"{s['size']}{s['unit']}" for s in size_list] +#) +# +# +#@pytest.mark.turtle +#@pytest.mark.big_objects +#def test_multipart_download(s3_client, bucket_with_name, size): +# """ +# Test to download a big object to an S3 bucket using multipart download +# :param s3_client: fixture of boto3 s3 client +# :param bucket_with_name: fixture to create a bucket with a unique name +# :param size: dict: value containing an int size and a string unit +# :return: None +# """ +# +# bucket_name = bucket_with_name +# big_file_path = create_big_file(size) +# object_key = "big_object" + uuid.uuid4().hex[:6] +# +# # Config for multhreading of boto3 building multipart upload/download +# config = TransferConfig( +# multipart_threshold=8 * 1024 * 1024, +# max_concurrency=10, +# multipart_chunksize=8 * 1024 * 1024, +# use_threads=True +# ) +# +# # upload object to s3 +# try: +# response = s3_client.upload_file(big_file_path.name, bucket_name, object_key, Config=config) +# assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" +# +# except Exception as e: +# logging.error(f"Error uploading object {object_key}: {e}") +# +# # Test download file from s3 bucket +# try: +# response = s3_client.download_file(big_file_path.name, bucket_name, object_key, Config=config) +# assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" +# except Exception as e: +# logging.error(f"Error uploading object {object_key}: {e}") +# +# \ No newline at end of file diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 4155762..16a7480 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -1,5 +1,6 @@ import uuid import os +import tempfile # Function is responsible to check and format bucket names into valid ones @@ -47,15 +48,40 @@ def create_big_file(file_path, size = {'size': 100, 'unit': 'mb'}): } if size['size'].lower() not in units: - raise Exception(f"Invalid unit: {size['unit']}") + raise ValueError(f"Invalid unit: {size['unit']}") # Creating a file of size * unit - size = size * units[size['unit'].lower()] - with open(file_path, 'wb') as file: - file.write(b'a' * size) + size_bytes = size * units[size['unit'].lower()] + +import os +import tempfile +import logging + +def create_big_file(size={'size': 100, 'unit': 'mb'}): + """ + Create a big file with the specified size using a temporary file. + + :param size: dict: A dictionary containing an int 'size' and a str 'unit'. + :yield: str: Path to the temporary file created. + """ + units = { + 'kb': 1024, + 'mb': 1024 * 1024, + 'gb': 1024 * 1024 * 1024, + } + + # Validate unit + if size['unit'].lower() not in units: + raise ValueError(f"Invalid unit: {size['unit']}") + + # Calculate size in bytes + size_bytes = size['size'] * units[size['unit'].lower()] + logging.error(f"Creating a file of size {size_bytes} bytes") + + # Create a temporary file + temp_file = tempfile.NamedTemporaryFile(delete=True) + temp_file.write(b'a' * size_bytes) + yield temp_file + + temp_file.close() - # yielding to the calling function - try: - yield size - finally: - os.remove(file_path) \ No newline at end of file From 706c6d213305f3a429c19bff83ee2dfb9dca8a4f Mon Sep 17 00:00:00 2001 From: luis Date: Mon, 20 Jan 2025 23:44:56 -0300 Subject: [PATCH 06/12] found a way to check if the file was successfully uploaded --- docs/big_objects_test.py | 11 +++++++---- docs/utils/utils.py | 25 ++++++++++--------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index d32c6e4..7ebae36 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -28,9 +28,9 @@ def test_multipart_upload(s3_client, bucket_with_name, size): :param size: dict: value containing an int size and a string unit :return: None """ + file_path = "./big_file.txt" bucket_name = bucket_with_name - big_file = next(create_big_file(size)) - logging.info(f"Uploading {big_file} to bucket {bucket_name}") + size = create_big_file(file_path, size) object_key = "big_object" + uuid.uuid4().hex[:6] @@ -43,8 +43,11 @@ def test_multipart_upload(s3_client, bucket_with_name, size): ) try: - response = s3_client.upload_fileobj(big_file, bucket_name, object_key, Config=config) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" + response = s3_client.upload_file("./big_file", bucket_name, object_key, Config=config) + response = s3_client.head_object(Bucket=bucket_name, Key=object_key) + logging.error(f"Uploaded object: {object_key} to bucket: {bucket_name}") + object_size = response['ContentLength'] + logging.error(f"Size of the object: {object_size} bytes") except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 16a7480..1307e3c 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -1,6 +1,5 @@ import uuid import os -import tempfile # Function is responsible to check and format bucket names into valid ones @@ -53,11 +52,8 @@ def create_big_file(file_path, size = {'size': 100, 'unit': 'mb'}): # Creating a file of size * unit size_bytes = size * units[size['unit'].lower()] -import os -import tempfile -import logging -def create_big_file(size={'size': 100, 'unit': 'mb'}): +def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}): """ Create a big file with the specified size using a temporary file. @@ -70,18 +66,17 @@ def create_big_file(size={'size': 100, 'unit': 'mb'}): 'gb': 1024 * 1024 * 1024, } - # Validate unit if size['unit'].lower() not in units: raise ValueError(f"Invalid unit: {size['unit']}") - # Calculate size in bytes - size_bytes = size['size'] * units[size['unit'].lower()] - logging.error(f"Creating a file of size {size_bytes} bytes") - - # Create a temporary file - temp_file = tempfile.NamedTemporaryFile(delete=True) - temp_file.write(b'a' * size_bytes) - yield temp_file + if not os.path.exists(file_path): + # Create a file + with open(file_path, 'wb') as f: + f.write(os.urandom(size['size'] * units[size['unit'].lower()])) + + f.close() - temp_file.close() + return size['size'] * units[size['unit'].lower()] + + From bcb94fba9d37ef3faeb2a377846296a72f88c512 Mon Sep 17 00:00:00 2001 From: luis Date: Tue, 21 Jan 2025 12:37:12 -0300 Subject: [PATCH 07/12] improved funcs --- .gitignore | 5 +- docs/big_objects_test.py | 153 ++++++++++++++++++++++++--------------- docs/utils/utils.py | 8 +- 3 files changed, 102 insertions(+), 64 deletions(-) diff --git a/.gitignore b/.gitignore index 76f26c1..49a26e1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ params.yaml **/__pycache__/ # Ignore pytest debug log file -pytestdebug.log \ No newline at end of file +pytestdebug.log + +# Generate Files +/tmp_files/* diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index 7ebae36..a07b900 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -1,16 +1,19 @@ import pytest import logging -from utils.utils import (create_big_file) +from utils.utils import create_big_file from utils.crud import bucket_with_name from boto3.s3.transfer import TransferConfig import uuid +from tqdm import tqdm +import os -size_list = [{'size': 10, 'unit': 'mb'}, - # {'size': 100, 'unit': 'mb'}, -# {'size': 1, 'unit': 'gb'}, -# {'size': 5, 'unit': 'gb'}, -# {'size': 10, 'unit': 'gb'}, -] +size_list = [ + {'size': 10, 'unit': 'mb'}, + #{'size': 100, 'unit': 'mb'}, + #{'size': 1, 'unit': 'gb'}, + #{'size': 5, 'unit': 'gb'}, + #{'size': 10, 'unit': 'gb'}, +] @pytest.mark.parametrize( 'size', @@ -18,6 +21,7 @@ ids=[f"{s['size']}{s['unit']}" for s in size_list] ) + @pytest.mark.turtle @pytest.mark.big_objects def test_multipart_upload(s3_client, bucket_with_name, size): @@ -28,12 +32,62 @@ def test_multipart_upload(s3_client, bucket_with_name, size): :param size: dict: value containing an int size and a string unit :return: None """ - file_path = "./big_file.txt" + file_path = f"./tmp_files/big_file_upload_{size['size']}{size['unit']}" bucket_name = bucket_with_name - size = create_big_file(file_path, size) + size_file = create_big_file(file_path, size) object_key = "big_object" + uuid.uuid4().hex[:6] + # Config for multhreading of boto3 building multipart upload/download + config = TransferConfig( + multipart_threshold=100 * 1024 * 1024, + max_concurrency=10, + multipart_chunksize=100 * 1024 * 1024, + use_threads=True + ) + + try: + # Function to show a progress bar of the upload + with tqdm(total=size_file, + desc=bucket_name, + bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + unit='B', + unit_scale=True, unit_divisor=1024) as pbar: + + response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) + + response = s3_client.head_object(Bucket=bucket_name, Key=object_key) + object_size = response.get('ContentLength', 0) + assert object_size == size_file, "Uploaded object size doesn't match" + + logging.error(f"Uploaded object: {object_key} to bucket: {bucket_name}") + logging.error(f"Size of the object: {size_file}/{object_size} bytes") + except Exception as e: + logging.error(f"Error uploading object {object_key}: {e}") + + +@pytest.mark.parametrize( + 'size', + [s for s in size_list], + ids=[f"{s['size']}{s['unit']}" for s in size_list] +) + +@pytest.mark.turtle +@pytest.mark.big_objects +def test_multipart_download(s3_client, bucket_with_name, size): + """ + Test to download a big object to an S3 bucket using multipart download + :param s3_client: fixture of boto3 s3 client + :param bucket_with_name: fixture to create a bucket with a unique name + :param size: dict: value containing an int size and a string unit + :return: None + """ + + file_path = f"./tmp_files/big_file_download{size['size']}{size['unit']}" + bucket_name = bucket_with_name + size_file = create_big_file(file_path, size) + object_key = "big_object" + uuid.uuid4().hex[:6] + # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( multipart_threshold=8 * 1024 * 1024, @@ -42,59 +96,38 @@ def test_multipart_upload(s3_client, bucket_with_name, size): use_threads=True ) + # upload object to s3 try: - response = s3_client.upload_file("./big_file", bucket_name, object_key, Config=config) - response = s3_client.head_object(Bucket=bucket_name, Key=object_key) - logging.error(f"Uploaded object: {object_key} to bucket: {bucket_name}") - object_size = response['ContentLength'] - logging.error(f"Size of the object: {object_size} bytes") + with tqdm(total=size_file, + desc=bucket_name, + bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + unit='B', + unit_scale=True, unit_divisor=1024) as pbar: + + s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") -#@pytest.mark.parametrize( -# 'size', -# [s for s in size_list], -# ids=[f"{s['size']}{s['unit']}" for s in size_list] -#) -# -# -#@pytest.mark.turtle -#@pytest.mark.big_objects -#def test_multipart_download(s3_client, bucket_with_name, size): -# """ -# Test to download a big object to an S3 bucket using multipart download -# :param s3_client: fixture of boto3 s3 client -# :param bucket_with_name: fixture to create a bucket with a unique name -# :param size: dict: value containing an int size and a string unit -# :return: None -# """ -# -# bucket_name = bucket_with_name -# big_file_path = create_big_file(size) -# object_key = "big_object" + uuid.uuid4().hex[:6] -# -# # Config for multhreading of boto3 building multipart upload/download -# config = TransferConfig( -# multipart_threshold=8 * 1024 * 1024, -# max_concurrency=10, -# multipart_chunksize=8 * 1024 * 1024, -# use_threads=True -# ) -# -# # upload object to s3 -# try: -# response = s3_client.upload_file(big_file_path.name, bucket_name, object_key, Config=config) -# assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" -# -# except Exception as e: -# logging.error(f"Error uploading object {object_key}: {e}") -# -# # Test download file from s3 bucket -# try: -# response = s3_client.download_file(big_file_path.name, bucket_name, object_key, Config=config) -# assert response['ResponseMetadata']['HTTPStatusCode'] == 200, "Expected a 200 response code" -# except Exception as e: -# logging.error(f"Error uploading object {object_key}: {e}") -# -# \ No newline at end of file + + # Test download file from s3 bucket + try: + download_path = file_path + '_downloaded' + + with tqdm(total=size_file, + desc=bucket_name, + bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + unit='B', + unit_scale=True, unit_divisor=1024) as pbar: + + response = s3_client.download_file(Bucket=bucket_name, Key=object_key, Filename = download_path, Config=config, Callback=pbar.update) + try: + assert os.path.getsize(download_path) == size_file, "Download size doesnt match file size" + finally: + if os.path.exists(download_path): + os.remove(download_path) + + except Exception as e: + logging.error(f"Error uploading object {object_key}: {e}") + + \ No newline at end of file diff --git a/docs/utils/utils.py b/docs/utils/utils.py index 1307e3c..fd176a7 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -53,7 +53,7 @@ def create_big_file(file_path, size = {'size': 100, 'unit': 'mb'}): size_bytes = size * units[size['unit'].lower()] -def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}): +def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}) -> int: """ Create a big file with the specified size using a temporary file. @@ -69,6 +69,10 @@ def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}): if size['unit'].lower() not in units: raise ValueError(f"Invalid unit: {size['unit']}") + if not os.path.exists('./tmp_files'): + os.mkdir('./tmp_files') + + if not os.path.exists(file_path): # Create a file with open(file_path, 'wb') as f: @@ -78,5 +82,3 @@ def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}): return size['size'] * units[size['unit'].lower()] - - From 6f59034db2e8a0636dd0a29716e8b3e67d8f6fd0 Mon Sep 17 00:00:00 2001 From: luis Date: Tue, 21 Jan 2025 14:12:40 -0300 Subject: [PATCH 08/12] utilities --- docs/big_objects_test.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index a07b900..6cac6a0 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -9,10 +9,10 @@ size_list = [ {'size': 10, 'unit': 'mb'}, - #{'size': 100, 'unit': 'mb'}, - #{'size': 1, 'unit': 'gb'}, - #{'size': 5, 'unit': 'gb'}, - #{'size': 10, 'unit': 'gb'}, + {'size': 100, 'unit': 'mb'}, + {'size': 1, 'unit': 'gb'}, + {'size': 5, 'unit': 'gb'}, + {'size': 10, 'unit': 'gb'}, ] @pytest.mark.parametrize( @@ -36,7 +36,7 @@ def test_multipart_upload(s3_client, bucket_with_name, size): bucket_name = bucket_with_name size_file = create_big_file(file_path, size) - object_key = "big_object" + uuid.uuid4().hex[:6] + object_key = "big_object_" + uuid.uuid4().hex[:6] # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( @@ -47,21 +47,21 @@ def test_multipart_upload(s3_client, bucket_with_name, size): ) try: - # Function to show a progress bar of the upload + # Upload Progress Bar with time stamp with tqdm(total=size_file, desc=bucket_name, - bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | Time: {elapsed} | {desc}", unit='B', unit_scale=True, unit_divisor=1024) as pbar: - + response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) + elapsed = pbar.format_dict['elapsed'] response = s3_client.head_object(Bucket=bucket_name, Key=object_key) object_size = response.get('ContentLength', 0) assert object_size == size_file, "Uploaded object size doesn't match" - logging.error(f"Uploaded object: {object_key} to bucket: {bucket_name}") - logging.error(f"Size of the object: {size_file}/{object_size} bytes") + logging.error(f"Object: {object_key}, size: {object_size}, bucket: {bucket_name}") except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") @@ -96,7 +96,7 @@ def test_multipart_download(s3_client, bucket_with_name, size): use_threads=True ) - # upload object to s3 + # upload object to s3 try: with tqdm(total=size_file, desc=bucket_name, From 5c53cd144d2d0048a1f99ed52676ac6375413466 Mon Sep 17 00:00:00 2001 From: luis Date: Wed, 22 Jan 2025 17:47:15 -0300 Subject: [PATCH 09/12] added multipart uplaod fixture and combined two big files tests in one --- docs/big_objects_test.py | 139 ++++++++++++++------------------------- docs/utils/crud.py | 40 ++++++++++- docs/utils/utils.py | 47 ++++++------- pyproject.toml | 2 +- 4 files changed, 110 insertions(+), 118 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index 6cac6a0..709516a 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -1,92 +1,60 @@ import pytest import logging -from utils.utils import create_big_file -from utils.crud import bucket_with_name +from utils.utils import create_big_file, convert_unit +from utils.crud import fixture_bucket_with_name, fixture_upload_multipart_file from boto3.s3.transfer import TransferConfig import uuid from tqdm import tqdm import os + size_list = [ - {'size': 10, 'unit': 'mb'}, - {'size': 100, 'unit': 'mb'}, - {'size': 1, 'unit': 'gb'}, - {'size': 5, 'unit': 'gb'}, - {'size': 10, 'unit': 'gb'}, -] + {'size': 10, 'unit': 'mb'}, + {'size': 100, 'unit': 'mb'}, + # {'size': 1, 'unit': 'gb'}, + # {'size': 5, 'unit': 'gb'}, + # {'size': 10, 'unit': 'gb'}, +] + +ids_list = [f"{s['size']}{s['unit']}" for s in size_list] + +upload_params = [ + { + 'file_path': f"./tmp_files/big_file_download{size['size']}{size['unit']}", + 'file_size': size, + 'object_key': "big-object-" + uuid.uuid4().hex[:6], + } + for size in size_list +] @pytest.mark.parametrize( - 'size', - [s for s in size_list], - ids=[f"{s['size']}{s['unit']}" for s in size_list] + 'params, fixture_upload_multipart_file', + [(p, p) for p in upload_params], + ids=ids_list, + indirect=['fixture_upload_multipart_file'] ) +# ## Test multipart download while implicitly tests the upload and delete of big objects -@pytest.mark.turtle +@pytest.mark.slow @pytest.mark.big_objects -def test_multipart_upload(s3_client, bucket_with_name, size): +def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_multipart_file, params): """ - Test to upload a big object to an S3 bucket using multipart upload + Test to download a big object to an S3 bucket using multipart download :param s3_client: fixture of boto3 s3 client - :param bucket_with_name: fixture to create a bucket with a unique name + :param fixture_bucket_with_name: fixture to create a bucket with a unique name :param size: dict: value containing an int size and a string unit :return: None """ - file_path = f"./tmp_files/big_file_upload_{size['size']}{size['unit']}" - bucket_name = bucket_with_name - size_file = create_big_file(file_path, size) - object_key = "big_object_" + uuid.uuid4().hex[:6] - - # Config for multhreading of boto3 building multipart upload/download - config = TransferConfig( - multipart_threshold=100 * 1024 * 1024, - max_concurrency=10, - multipart_chunksize=100 * 1024 * 1024, - use_threads=True - ) + # Unpacking params + file_path = params.get('file_path') + download_path = file_path + "_downloaded" + object_key = params.get('object_key') - try: - # Upload Progress Bar with time stamp - with tqdm(total=size_file, - desc=bucket_name, - bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | Time: {elapsed} | {desc}", - unit='B', - unit_scale=True, unit_divisor=1024) as pbar: - - response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) - elapsed = pbar.format_dict['elapsed'] - - response = s3_client.head_object(Bucket=bucket_name, Key=object_key) - object_size = response.get('ContentLength', 0) - assert object_size == size_file, "Uploaded object size doesn't match" - - logging.error(f"Object: {object_key}, size: {object_size}, bucket: {bucket_name}") - except Exception as e: - logging.error(f"Error uploading object {object_key}: {e}") - - -@pytest.mark.parametrize( - 'size', - [s for s in size_list], - ids=[f"{s['size']}{s['unit']}" for s in size_list] -) - -@pytest.mark.turtle -@pytest.mark.big_objects -def test_multipart_download(s3_client, bucket_with_name, size): - """ - Test to download a big object to an S3 bucket using multipart download - :param s3_client: fixture of boto3 s3 client - :param bucket_with_name: fixture to create a bucket with a unique name - :param size: dict: value containing an int size and a string unit - :return: None - """ + bucket_name = fixture_bucket_with_name + total_size = create_big_file(file_path, params.get('file_size')) - file_path = f"./tmp_files/big_file_download{size['size']}{size['unit']}" - bucket_name = bucket_with_name - size_file = create_big_file(file_path, size) - object_key = "big_object" + uuid.uuid4().hex[:6] # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( @@ -96,38 +64,29 @@ def test_multipart_download(s3_client, bucket_with_name, size): use_threads=True ) - # upload object to s3 - try: - with tqdm(total=size_file, - desc=bucket_name, - bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", - unit='B', - unit_scale=True, unit_divisor=1024) as pbar: - - s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) - except Exception as e: - logging.error(f"Error uploading object {object_key}: {e}") - + # Uploading the big file + uploaded_file_size = fixture_upload_multipart_file # Test download file from s3 bucket try: - download_path = file_path + '_downloaded' - - with tqdm(total=size_file, + with tqdm(total=total_size, desc=bucket_name, - bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + bar_format="Download| {percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", unit='B', unit_scale=True, unit_divisor=1024) as pbar: - response = s3_client.download_file(Bucket=bucket_name, Key=object_key, Filename = download_path, Config=config, Callback=pbar.update) - try: - assert os.path.getsize(download_path) == size_file, "Download size doesnt match file size" - finally: - if os.path.exists(download_path): - os.remove(download_path) + s3_client.download_file(Bucket=bucket_name, Key=object_key, Filename = download_path, Config=config, Callback=pbar.update) + # Retrieving sizes + downloaded_file_size = os.path.getsize(download_path) + + # The test was successful only if the size on the bucket size is equal to the ones uploaded and downloaded + assert downloaded_file_size == uploaded_file_size, f"Downloaded size doesn't match: {downloaded_file_size} with Upload size: {uploaded_file_size}" except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") - \ No newline at end of file + + ## Regular download x multipart download time + + diff --git a/docs/utils/crud.py b/docs/utils/crud.py index 37781a7..8f9db8f 100644 --- a/docs/utils/crud.py +++ b/docs/utils/crud.py @@ -1,8 +1,10 @@ import logging import pytest from concurrent.futures import ThreadPoolExecutor, as_completed -from utils.utils import generate_valid_bucket_name +from utils.utils import generate_valid_bucket_name, convert_unit +from boto3.s3.transfer import TransferConfig import os +from tqdm import tqdm ### Functions @@ -250,3 +252,39 @@ def fixture_upload_multiple_objects(s3_client, fixture_bucket_with_name, request objects_names = [{"key": f"multiple-object'-{i}", "path": path} for i in range(qnt)] return upload_objects_multithreaded(s3_client, fixture_bucket_with_name, objects_names) +@pytest.fixture +def fixture_upload_multipart_file(s3_client, fixture_bucket_with_name, request) -> int: + """ + Uploads a big file into multiple chunks to s3 bucket + :param s3_client: boto3 s3 client + :param fixture_bucket_with_name: pytest.fixture which setup and tears down bucket + :param request: dict: contains file_path, file_size and object_key + :return int: size of the file in the bucket + """ + bucket_name = fixture_bucket_with_name + file_path = request.param.get('file_path') + file_size = convert_unit(request.param.get('file_size')) + object_key = request.param.get('object_key') + + # Config for multhreading of boto3 building multipart upload/download + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, # Minimum size to start multipart upload + max_concurrency=10, + multipart_chunksize=8 * 1024 * 1024, + use_threads=True + ) + + # Upload Progress Bar with time stamp + with tqdm(total=file_size, + desc=bucket_name, + bar_format="Upload| {percentage:.1f}%|{bar:25}| {rate_fmt} | Time: {elapsed} | {desc}", + unit='B', + unit_scale=True, unit_divisor=1024) as pbar: + + response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config, Callback=pbar.update) + elapsed = pbar.format_dict['elapsed'] + + # Checking if the object was uploaded + object_size = s3_client.get_object(Bucket=bucket_name, Key=object_key).get('ContentLength', 0) + + return object_size #return size \ No newline at end of file diff --git a/docs/utils/utils.py b/docs/utils/utils.py index fd176a7..e4889b1 100644 --- a/docs/utils/utils.py +++ b/docs/utils/utils.py @@ -1,5 +1,6 @@ import uuid import os +import logging # Function is responsible to check and format bucket names into valid ones @@ -28,46 +29,40 @@ def generate_valid_bucket_name(base_name="my-unique-bucket"): return "".join(new_name) -# Function which will be using to create mock files with different sizes -def create_big_file(file_path, size = {'size': 100, 'unit': 'mb'}): + +def convert_unit(size = {'size': 100, 'unit': 'mb'}) -> int: """ - Create a big file with the specified size in the specified path - :param file_path: str: path to the file to be created - :param size: dict: value containing the an int sie and a stirng unit - :return: int: size of the file created + Converts a dict containing a int and a str into a int representing the size in bytes + :param size: dict: {'size': int, 'unit': ('kb', 'mb', 'gb')} + :return: int: value in bytes of size """ - size = 1024 - - units = { + units_dict = { 'kb': 1024, 'mb': 1024 * 1024, - 'mb': 1024 * 1024 * 1024, + 'gb': 1024 * 1024 * 1024, } + + unit = size['unit'].lower() - if size['size'].lower() not in units: + # Check if it is a valid unit to be converted + if unit not in units_dict: raise ValueError(f"Invalid unit: {size['unit']}") - - # Creating a file of size * unit - size_bytes = size * units[size['unit'].lower()] - -def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}) -> int: + return size['size'] * units_dict.get(unit) + + + +def create_big_file(file_path: str, size={'size': 100, 'unit': 'mb'}) -> int: """ Create a big file with the specified size using a temporary file. :param size: dict: A dictionary containing an int 'size' and a str 'unit'. :yield: str: Path to the temporary file created. """ - units = { - 'kb': 1024, - 'mb': 1024 * 1024, - 'gb': 1024 * 1024 * 1024, - } - if size['unit'].lower() not in units: - raise ValueError(f"Invalid unit: {size['unit']}") + total_size = convert_unit(size) if not os.path.exists('./tmp_files'): os.mkdir('./tmp_files') @@ -76,9 +71,9 @@ def create_big_file(file_path, size={'size': 100, 'unit': 'mb'}) -> int: if not os.path.exists(file_path): # Create a file with open(file_path, 'wb') as f: - f.write(os.urandom(size['size'] * units[size['unit'].lower()])) - + f.write(os.urandom(total_size)) f.close() - return size['size'] * units[size['unit'].lower()] + return total_size + diff --git a/pyproject.toml b/pyproject.toml index 305714d..03e4de7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ markers = [ "bucket_versioning: Bucket Versioning", "cli: Tests using CLI", "multiple_objects: Tests involving operations with multiple objects on the same bucket", - "big_objects": Tests with files bigger than 5 mb, + "big_objects: Tests with files bigger than 5 mb", "rapid: quick expected execution magnitude", "regular: regular time expected execution magnitude", "slow: slow expected execution magnitude", From 6d6e0a87245ce3a683798d0ce8e48546341bd79f Mon Sep 17 00:00:00 2001 From: luis Date: Thu, 23 Jan 2025 10:30:41 -0300 Subject: [PATCH 10/12] added commentaries --- docs/big_objects_test.py | 12 ++++-------- docs/utils/crud.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index 709516a..20f1629 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -43,7 +43,7 @@ def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_ Test to download a big object to an S3 bucket using multipart download :param s3_client: fixture of boto3 s3 client :param fixture_bucket_with_name: fixture to create a bucket with a unique name - :param size: dict: value containing an int size and a string unit + :param params: dict: 'file_path': str, 'file_size': dict, 'object_key': str :return: None """ @@ -58,7 +58,7 @@ def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_ # Config for multhreading of boto3 building multipart upload/download config = TransferConfig( - multipart_threshold=8 * 1024 * 1024, + multipart_threshold=40 * 1024 * 1024, max_concurrency=10, multipart_chunksize=8 * 1024 * 1024, use_threads=True @@ -70,9 +70,10 @@ def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_ # Test download file from s3 bucket try: + # Graphing the download progress with tqdm(total=total_size, desc=bucket_name, - bar_format="Download| {percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", + bar_format="Download| {percentage:.1f}%|{bar:25} | {rate_fmt} | Time: {elapsed} | {desc}", unit='B', unit_scale=True, unit_divisor=1024) as pbar: @@ -85,8 +86,3 @@ def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_ assert downloaded_file_size == uploaded_file_size, f"Downloaded size doesn't match: {downloaded_file_size} with Upload size: {uploaded_file_size}" except Exception as e: logging.error(f"Error uploading object {object_key}: {e}") - - - ## Regular download x multipart download time - - diff --git a/docs/utils/crud.py b/docs/utils/crud.py index 8f9db8f..87e5fe4 100644 --- a/docs/utils/crud.py +++ b/docs/utils/crud.py @@ -259,7 +259,7 @@ def fixture_upload_multipart_file(s3_client, fixture_bucket_with_name, request) :param s3_client: boto3 s3 client :param fixture_bucket_with_name: pytest.fixture which setup and tears down bucket :param request: dict: contains file_path, file_size and object_key - :return int: size of the file in the bucket + :return int: size in bytes of the obejct """ bucket_name = fixture_bucket_with_name file_path = request.param.get('file_path') @@ -287,4 +287,4 @@ def fixture_upload_multipart_file(s3_client, fixture_bucket_with_name, request) # Checking if the object was uploaded object_size = s3_client.get_object(Bucket=bucket_name, Key=object_key).get('ContentLength', 0) - return object_size #return size \ No newline at end of file + return object_size #return int of size in bytes \ No newline at end of file From 4014f58631abbc6ef3a977ae1b227aa95f5785bd Mon Sep 17 00:00:00 2001 From: luis Date: Thu, 23 Jan 2025 10:49:54 -0300 Subject: [PATCH 11/12] added tqdm as depedency --- docs/big_objects_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py index 20f1629..6c0fce2 100644 --- a/docs/big_objects_test.py +++ b/docs/big_objects_test.py @@ -11,9 +11,9 @@ size_list = [ {'size': 10, 'unit': 'mb'}, {'size': 100, 'unit': 'mb'}, - # {'size': 1, 'unit': 'gb'}, - # {'size': 5, 'unit': 'gb'}, - # {'size': 10, 'unit': 'gb'}, + {'size': 1, 'unit': 'gb'}, + {'size': 5, 'unit': 'gb'}, + {'size': 10, 'unit': 'gb'}, ] ids_list = [f"{s['size']}{s['unit']}" for s in size_list] From ef106b1e311dc95272f6e3666c356c90feac1de7 Mon Sep 17 00:00:00 2001 From: luis Date: Thu, 23 Jan 2025 10:53:56 -0300 Subject: [PATCH 12/12] uv dependeccy reinstalled --- pyproject.toml | 1 + uv.lock | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 03e4de7..5acf31b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "pytest-rerunfailures<16.0,>=15.0", "pytest-xdist<4.0.0,>=3.6.1", "pytest-repeat<1.0.0,>=0.9.3", + "tqdm>=4.67.1", ] name = "s3-specs" version = "0.1.0" diff --git a/uv.lock b/uv.lock index 644b876..26d908b 100644 --- a/uv.lock +++ b/uv.lock @@ -267,7 +267,7 @@ name = "ipykernel" version = "6.29.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "platform_system == 'Darwin'" }, + { name = "appnope", marker = "sys_platform == 'darwin'" }, { name = "comm" }, { name = "debugpy" }, { name = "ipython" }, @@ -958,6 +958,7 @@ dependencies = [ { name = "pytest-rerunfailures" }, { name = "pytest-xdist" }, { name = "requests" }, + { name = "tqdm" }, { name = "uuid" }, ] @@ -978,6 +979,7 @@ requires-dist = [ { name = "pytest-rerunfailures", specifier = ">=15.0,<16.0" }, { name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" }, { name = "requests", specifier = ">=2.32.3,<3.0.0" }, + { name = "tqdm", specifier = ">=4.67.1" }, { name = "uuid", specifier = ">=1.30,<2.0" }, ] @@ -1062,6 +1064,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/cc/58b1adeb1bb46228442081e746fcdbc4540905c87e8add7c277540934edb/tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38", size = 438907 }, ] +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + [[package]] name = "traitlets" version = "5.14.3"