MagaluCloud · 2xlhe · Jan 17, 2025 · Jan 17, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,7 @@ params.yaml
 **/__pycache__/
 
 # Ignore pytest debug log file
-pytestdebug.log
+pytestdebug.log
+
+# Generate Files
+/tmp_files/*
diff --git a/docs/big_objects_test.py b/docs/big_objects_test.py
@@ -0,0 +1,88 @@
+import pytest
+import logging
+from utils.utils import create_big_file, convert_unit
+from utils.crud import fixture_bucket_with_name, fixture_upload_multipart_file
+from boto3.s3.transfer import TransferConfig
+import uuid
+from tqdm import tqdm
+import os
+
+
+size_list = [
+    {'size': 10, 'unit': 'mb'},
+    {'size': 100, 'unit': 'mb'},
+    {'size': 1, 'unit': 'gb'},
+    {'size': 5, 'unit': 'gb'},
+    {'size': 10, 'unit': 'gb'},
+]
+
+ids_list = [f"{s['size']}{s['unit']}" for s in size_list]
+
+upload_params = [
+    {
+        'file_path': f"./tmp_files/big_file_download{size['size']}{size['unit']}",
+        'file_size': size,
+        'object_key': "big-object-" + uuid.uuid4().hex[:6],
+    }
+    for size in size_list
+]
+
+@pytest.mark.parametrize(
+    'params, fixture_upload_multipart_file',
+    [(p, p) for p in upload_params],  
+    ids=ids_list,
+    indirect=['fixture_upload_multipart_file']
+)
+
+# ## Test multipart download while implicitly tests the upload and delete of big objects
+
+@pytest.mark.slow
+@pytest.mark.big_objects
+def test_multipart_download(s3_client, fixture_bucket_with_name, fixture_upload_multipart_file, params):
+    """
+    Test to download a big object to an S3 bucket using multipart download
+    :param s3_client: fixture of boto3 s3 client
+    :param fixture_bucket_with_name: fixture to create a bucket with a unique name
+    :param params: dict: 'file_path': str, 'file_size': dict, 'object_key': str
+    :return: None
+    """
+
+    # Unpacking params
+    file_path = params.get('file_path')
+    download_path = file_path + "_downloaded"
+    object_key = params.get('object_key')
+
+    bucket_name = fixture_bucket_with_name
+    total_size = create_big_file(file_path, params.get('file_size'))
+
+
+    # Config for multhreading of boto3 building multipart upload/download
+    config = TransferConfig(
+        multipart_threshold=40 * 1024 * 1024,
+        max_concurrency=10,
+        multipart_chunksize=8 * 1024 * 1024,
+        use_threads=True
+    )
+
+    # Uploading the big file
+    uploaded_file_size = fixture_upload_multipart_file
+
+
+    # Test download file from s3 bucket
+    try:
+        # Graphing the download progress
+        with tqdm(total=total_size, 
+                  desc=bucket_name, 
+                  bar_format="Download| {percentage:.1f}%|{bar:25} | {rate_fmt} | Time: {elapsed} | {desc}",  
+                  unit='B', 
+                  unit_scale=True, unit_divisor=1024) as pbar:
+
+            s3_client.download_file(Bucket=bucket_name, Key=object_key, Filename = download_path, Config=config, Callback=pbar.update)  
+
+            # Retrieving sizes
+            downloaded_file_size = os.path.getsize(download_path)
+
+            # The test was successful only if the size on the bucket size is equal to the ones uploaded and downloaded
+            assert downloaded_file_size == uploaded_file_size, f"Downloaded size doesn't match: {downloaded_file_size} with Upload size: {uploaded_file_size}"
+    except Exception as e:
+        logging.error(f"Error uploading object {object_key}: {e}")
diff --git a/docs/utils/crud.py b/docs/utils/crud.py
@@ -1,8 +1,10 @@
 import logging
 import pytest
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from utils.utils import generate_valid_bucket_name
+from utils.utils import generate_valid_bucket_name, convert_unit
+from boto3.s3.transfer import TransferConfig
 import os
+from tqdm import tqdm
 
 ### Functions
 
@@ -222,6 +224,7 @@ def fixture_bucket_with_name(s3_client, request):
     # This fixtures automatically creates a bucket based on the name of the test that called it and then returns its name
     # Lastly, teardown the bucket by deleting it and its objects
 
+    # request.node get the name of the test currently running
     bucket_name = generate_valid_bucket_name(request.node.name.replace("_", "-"))
     create_bucket(s3_client, bucket_name)
 
@@ -249,3 +252,39 @@ def fixture_upload_multiple_objects(s3_client, fixture_bucket_with_name, request
     objects_names = [{"key": f"multiple-object'-{i}", "path": path} for i in range(qnt)]
     return upload_objects_multithreaded(s3_client, fixture_bucket_with_name, objects_names)
 
+@pytest.fixture
+def fixture_upload_multipart_file(s3_client, fixture_bucket_with_name, request) -> int:
+    """
+    Uploads a big file into multiple chunks to s3 bucket
+    :param s3_client: boto3 s3 client
+    :param fixture_bucket_with_name: pytest.fixture which setup and tears down bucket
+    :param request: dict: contains file_path, file_size and object_key
+    :return int: size in bytes of the obejct
+    """
+    bucket_name = fixture_bucket_with_name
+    file_path = request.param.get('file_path')
+    file_size = convert_unit(request.param.get('file_size'))
+    object_key = request.param.get('object_key')
+
+    # Config for multhreading of boto3 building multipart upload/download
+    config = TransferConfig(
+        multipart_threshold=8 * 1024 * 1024, # Minimum size to start multipart upload
+        max_concurrency=10,
+        multipart_chunksize=8 * 1024 * 1024,
+        use_threads=True
+    )
+
+    # Upload Progress Bar with time stamp
+    with tqdm(total=file_size, 
+                desc=bucket_name, 
+                bar_format="Upload| {percentage:.1f}%|{bar:25}| {rate_fmt} | Time: {elapsed} | {desc}",  
+                unit='B', 
+                unit_scale=True, unit_divisor=1024) as pbar:
+
+        response = s3_client.upload_file(file_path, bucket_name, object_key, Config=config,  Callback=pbar.update)  
+        elapsed = pbar.format_dict['elapsed']
+
+        # Checking if the object was uploaded
+        object_size = s3_client.get_object(Bucket=bucket_name, Key=object_key).get('ContentLength', 0)
+
+    return object_size #return int of size in bytes
diff --git a/docs/utils/utils.py b/docs/utils/utils.py
@@ -1,4 +1,6 @@
 import uuid
+import os
+import logging
 
 # Function is responsible to check and format bucket names into valid ones
 
@@ -25,4 +27,53 @@ def generate_valid_bucket_name(base_name="my-unique-bucket"):
             new_name.append(char)
 
 
-    return "".join(new_name)
+    return "".join(new_name)
+
+
+
+def convert_unit(size = {'size': 100, 'unit': 'mb'}) -> int:
+    """
+    Converts a dict containing a int and a str into a int representing the size in bytes
+    :param size: dict: {'size': int, 'unit': ('kb', 'mb', 'gb')}
+    :return: int: value in bytes of size
+    """
+
+    units_dict = {
+        'kb': 1024,
+        'mb': 1024 * 1024,
+        'gb': 1024 * 1024 * 1024,
+    }
+
+    unit = size['unit'].lower()
+
+    # Check if it is a valid unit to be converted
+    if unit not in units_dict:
+        raise ValueError(f"Invalid unit: {size['unit']}")
+
+    return size['size'] * units_dict.get(unit)
+
+
+
+def create_big_file(file_path: str, size={'size': 100, 'unit': 'mb'}) -> int:
+    """
+    Create a big file with the specified size using a temporary file.
+
+    :param size: dict: A dictionary containing an int 'size' and a str 'unit'.
+    :yield: str: Path to the temporary file created.
+    """
+
+    total_size = convert_unit(size)
+
+    if not os.path.exists('./tmp_files'):
+        os.mkdir('./tmp_files')
+
+
+    if not os.path.exists(file_path):
+        # Create a file
+        with open(file_path, 'wb') as f:
+            f.write(os.urandom(total_size))
+        f.close()
+
+    return total_size
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
     "pytest-rerunfailures<16.0,>=15.0",
     "pytest-xdist<4.0.0,>=3.6.1",
     "pytest-repeat<1.0.0,>=0.9.3",
+    "tqdm>=4.67.1",
 ]
 name = "s3-specs"
 version = "0.1.0"
@@ -50,6 +51,7 @@ markers = [
     "bucket_versioning: Bucket Versioning",
     "cli: Tests using CLI",
     "multiple_objects: Tests involving operations with multiple objects on the same bucket",
+    "big_objects: Tests with files bigger than 5 mb",
     "rapid: quick expected execution magnitude",
     "regular: regular time expected execution magnitude",
     "slow: slow expected execution magnitude",

diff --git a/uv.lock b/uv.lock