Skip to content

Commit fa27ec6

Browse files
committed
Fix handling of files with URL-encoded names and/or invalid characters
1 parent db09f3e commit fa27ec6

File tree

1 file changed

+33
-19
lines changed

1 file changed

+33
-19
lines changed

download/ia-download.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from concurrent.futures import ThreadPoolExecutor
3030
from tqdm import tqdm
3131
import sys
32+
import urllib.parse
3233

3334
# Get the URL from the command-line arguments
3435
if len(sys.argv) < 2:
@@ -55,33 +56,46 @@
5556
if not os.path.exists(download_dir):
5657
os.makedirs(download_dir)
5758

59+
# Function to sanitize filenames by replacing invalid characters
60+
def sanitize_filename(filename):
61+
invalid_chars = '<>:"/\\|?*'
62+
for char in invalid_chars:
63+
filename = filename.replace(char, '-')
64+
return filename
65+
5866
# Define a function to download a single file with progress bar
5967
def download_file(file_url):
60-
filename = file_url.split('/')[-1]
68+
decoded_url = urllib.parse.unquote(file_url)
69+
raw_filename = decoded_url.split('/')[-1]
70+
filename = sanitize_filename(raw_filename)
71+
6172
filepath = os.path.join(download_dir, filename)
62-
response = requests.get(file_url, stream=True)
6373
with open(filepath, 'wb') as f:
64-
total_size = int(response.headers.get('content-length', 0))
65-
block_size = 1024
66-
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=filename)
67-
try:
68-
for data in response.iter_content(block_size):
69-
if tqdm._instances:
70-
progress_bar.update(len(data))
71-
f.write(data)
72-
else:
73-
# Progress bar has been closed by user
74-
break
75-
except KeyboardInterrupt:
76-
# User pressed the q key to quit
77-
progress_bar.close()
78-
print('\nDownload interrupted.')
79-
sys.exit()
74+
download_and_write_file(f, file_url, filename)
75+
76+
def download_and_write_file(file_handle, file_url, filename):
77+
response = requests.get(file_url, stream=True)
78+
total_size = int(response.headers.get('content-length', 0))
79+
block_size = 1024
80+
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=filename)
81+
try:
82+
for data in response.iter_content(block_size):
83+
if tqdm._instances:
84+
progress_bar.update(len(data))
85+
file_handle.write(data)
86+
else:
87+
# Progress bar has been closed by user
88+
break
89+
except KeyboardInterrupt:
90+
# User pressed the q key to quit
91+
progress_bar.close()
92+
print('\nDownload interrupted.')
93+
sys.exit()
8094

8195
# Use multithreading to download the files
8296
with ThreadPoolExecutor(max_workers=10) as executor:
8397
for link in file_links:
8498
file_url = url + link.get('href')
8599
executor.submit(download_file, file_url)
86100

87-
print(f'All {file_type[1:]} files downloaded.')
101+
print(f'All {file_type[1:]} files downloaded.')

0 commit comments

Comments
 (0)