1111from typing import IO , Any , Dict , List , Tuple , cast
1212from pathlib import Path
1313from functools import partial
14- from concurrent .futures import ThreadPoolExecutor , as_completed
14+ from concurrent .futures import Future , ThreadPoolExecutor , as_completed
1515
1616import httpx
1717from tqdm import tqdm
@@ -463,6 +463,22 @@ def _initiate_upload(
463463 body = response .text ,
464464 )
465465
466+ def _submit_part (
467+ self ,
468+ executor : ThreadPoolExecutor ,
469+ file_handle : IO [bytes ],
470+ part_info : Dict [str , Any ],
471+ part_size : int ,
472+ ) -> Tuple [Future [str ], int ]:
473+ """Submit a single part for upload and return its future and part number."""
474+
475+ part_number = part_info .get ("PartNumber" , part_info .get ("part_number" , 1 ))
476+ file_handle .seek ((part_number - 1 ) * part_size )
477+ part_data = file_handle .read (part_size )
478+
479+ future = executor .submit (self ._upload_single_part , part_info , part_data )
480+ return future , part_number
481+
466482 def _upload_parts_concurrent (self , file : Path , upload_info : Dict [str , Any ], part_size : int ) -> List [Dict [str , Any ]]:
467483 """Upload file parts concurrently with progress tracking"""
468484
@@ -471,25 +487,32 @@ def _upload_parts_concurrent(self, file: Path, upload_info: Dict[str, Any], part
471487
472488 with ThreadPoolExecutor (max_workers = self .max_concurrent_parts ) as executor :
473489 with tqdm (total = len (parts ), desc = "Uploading parts" , unit = "part" , disable = bool (DISABLE_TQDM )) as pbar :
474- future_to_part : Dict [Any , int ] = {}
475-
476490 with open (file , "rb" ) as f :
477- for part_info in parts :
478- part_number = part_info .get ("PartNumber" , part_info .get ("part_number" , 1 ))
479- f .seek ((part_number - 1 ) * part_size )
480- part_data = f .read (part_size )
491+ future_to_part : Dict [Future [str ], int ] = {}
492+ part_index = 0
481493
482- future = executor .submit (self ._upload_single_part , part_info , part_data )
494+ while part_index < len (parts ) and len (future_to_part ) < self .max_concurrent_parts :
495+ part_info = parts [part_index ]
496+ future , part_number = self ._submit_part (executor , f , part_info , part_size )
483497 future_to_part [future ] = part_number
498+ part_index += 1
499+
500+ while future_to_part :
501+ done_future = next (as_completed (future_to_part ))
502+ part_number = future_to_part .pop (done_future )
484503
485- for future in as_completed (future_to_part ):
486- part_number = future_to_part [future ]
487- try :
488- etag = future .result ()
489- completed_parts .append ({"part_number" : part_number , "etag" : etag })
490- pbar .update (1 )
491- except Exception as e :
492- raise Exception (f"Failed to upload part { part_number } : { e } " ) from e
504+ try :
505+ etag = done_future .result ()
506+ completed_parts .append ({"part_number" : part_number , "etag" : etag })
507+ pbar .update (1 )
508+ except Exception as e :
509+ raise Exception (f"Failed to upload part { part_number } : { e } " ) from e
510+
511+ if part_index < len (parts ):
512+ part_info = parts [part_index ]
513+ future , next_part_number = self ._submit_part (executor , f , part_info , part_size )
514+ future_to_part [future ] = next_part_number
515+ part_index += 1
493516
494517 completed_parts .sort (key = lambda x : x ["part_number" ])
495518 return completed_parts
@@ -834,25 +857,46 @@ async def _upload_parts_concurrent(
834857
835858 with ThreadPoolExecutor (max_workers = self .max_concurrent_parts ) as executor :
836859 with tqdm (total = len (parts ), desc = "Uploading parts" , unit = "part" , disable = bool (DISABLE_TQDM )) as pbar :
837- # Submit all upload tasks
838- futures : List [Tuple [Any , int ]] = []
839860 with open (file , "rb" ) as f :
840- for part_info in parts :
861+ future_to_part : Dict [asyncio .Future [str ], int ] = {}
862+ part_index = 0
863+
864+ while part_index < len (parts ) and len (future_to_part ) < self .max_concurrent_parts :
865+ part_info = parts [part_index ]
841866 part_number = part_info .get ("PartNumber" , part_info .get ("part_number" , 1 ))
842867 f .seek ((part_number - 1 ) * part_size )
843868 part_data = f .read (part_size )
844869
845870 future = loop .run_in_executor (executor , self ._upload_single_part_sync , part_info , part_data )
846- futures .append ((future , part_number ))
847-
848- # Collect results
849- for future , part_number in futures :
850- try :
851- etag = await future
852- completed_parts .append ({"part_number" : part_number , "etag" : etag })
853- pbar .update (1 )
854- except Exception as e :
855- raise Exception (f"Failed to upload part { part_number } : { e } " ) from e
871+ future_to_part [future ] = part_number
872+ part_index += 1
873+
874+ while future_to_part :
875+ done , _ = await asyncio .wait (
876+ tuple (future_to_part .keys ()),
877+ return_when = asyncio .FIRST_COMPLETED ,
878+ )
879+
880+ for done_future in done :
881+ part_number = future_to_part .pop (done_future )
882+
883+ try :
884+ etag = await done_future
885+ completed_parts .append ({"part_number" : part_number , "etag" : etag })
886+ pbar .update (1 )
887+ except Exception as e :
888+ raise Exception (f"Failed to upload part { part_number } : { e } " ) from e
889+
890+ if part_index < len (parts ):
891+ part_info = parts [part_index ]
892+ next_part_number = part_info .get ("PartNumber" , part_info .get ("part_number" , 1 ))
893+ f .seek ((next_part_number - 1 ) * part_size )
894+ part_data = f .read (part_size )
895+ future = loop .run_in_executor (
896+ executor , self ._upload_single_part_sync , part_info , part_data
897+ )
898+ future_to_part [future ] = next_part_number
899+ part_index += 1
856900
857901 completed_parts .sort (key = lambda x : x ["part_number" ])
858902 return completed_parts
0 commit comments