Skip to content

Commit 669c634

Browse files
committed
feat: Added benchmark setup script (WIP)
1 parent 0d2a2a4 commit 669c634

File tree

4 files changed

+159
-2
lines changed

4 files changed

+159
-2
lines changed

scribblebench/setup_benchmark.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import argparse
88
from git import Repo
99
import os
10+
from utils.download_kits23 import download_dataset
11+
from natsort import natsorted
1012

1113

1214
def setup_word_dataset(dataset_dir):
@@ -146,7 +148,7 @@ def setup_mscmr_dataset(dataset_dir):
146148
zip_ref.extractall(mscmr_preprocessed_dir)
147149

148150
####################################################################################################################
149-
#### Preprocess WORD dataset
151+
#### Preprocess MSCMR dataset
150152
####################################################################################################################
151153

152154
print("Preprocessing MSCMR dataset...")
@@ -171,7 +173,7 @@ def setup_mscmr_dataset(dataset_dir):
171173

172174
names = [path.name[:-7] for path in (mscmr_raw_dir / "TestSet" / "labels").rglob("*.nii.gz")]
173175
for name in names:
174-
shutil.move(mscmr_raw_dir / "TestSet" / "labels" / f"{name}.nii.gz", mscmr_preprocessed_dir / "labelsTs" / f"{name}_0000.nii.gz")
176+
shutil.move(mscmr_raw_dir / "TestSet" / "labels" / f"{name}.nii.gz", mscmr_preprocessed_dir / "labelsTs" / f"{name}.nii.gz")
175177

176178
# These two images have no dense GT so it is not possible to generate scribbles for them
177179
os.remove(mscmr_preprocessed_dir / "imagesTr" / "subject2_DE_0000.nii.gz")
@@ -194,14 +196,90 @@ def setup_mscmr_dataset(dataset_dir):
194196
print("Finished setting up MSCMR dataset.")
195197

196198

199+
def setup_kits_dataset(dataset_dir):
200+
dataset_dir = Path(dataset_dir) / "ScribbleBench"
201+
raw_dir = dataset_dir / "raw"
202+
kits_raw_dir = raw_dir / "KiTS2023" / "dataset"
203+
preprocessed_dir = dataset_dir
204+
kits_preprocessed_dir = preprocessed_dir / "KiTS2023"
205+
preprocessed_dir.mkdir(parents=True, exist_ok=True)
206+
kits_preprocessed_dir.mkdir(parents=True, exist_ok=True)
207+
208+
test_set = ['case_00007', 'case_00013', 'case_00003', 'case_00018', 'case_00019', 'case_00016', 'case_00026', 'case_00014',
209+
'case_00038', 'case_00000', 'case_00045', 'case_00050', 'case_00061', 'case_00070', 'case_00069', 'case_00074',
210+
'case_00087', 'case_00090', 'case_00086', 'case_00084', 'case_00059', 'case_00106', 'case_00105', 'case_00102',
211+
'case_00112', 'case_00117', 'case_00114', 'case_00121', 'case_00101', 'case_00096', 'case_00115', 'case_00126',
212+
'case_00127', 'case_00120', 'case_00124', 'case_00116', 'case_00133', 'case_00145', 'case_00147', 'case_00152',
213+
'case_00144', 'case_00118', 'case_00132', 'case_00135', 'case_00141', 'case_00146', 'case_00164', 'case_00167',
214+
'case_00172', 'case_00179', 'case_00181', 'case_00184', 'case_00192', 'case_00194', 'case_00195', 'case_00165',
215+
'case_00199', 'case_00210', 'case_00188', 'case_00191', 'case_00211', 'case_00212', 'case_00222', 'case_00217',
216+
'case_00221', 'case_00227', 'case_00232', 'case_00236', 'case_00231', 'case_00234', 'case_00214', 'case_00238',
217+
'case_00223', 'case_00237', 'case_00240', 'case_00216', 'case_00266', 'case_00269', 'case_00268', 'case_00275',
218+
'case_00279', 'case_00253', 'case_00273', 'case_00282', 'case_00287', 'case_00286', 'case_00281', 'case_00284',
219+
'case_00291', 'case_00283', 'case_00276', 'case_00404', 'case_00411', 'case_00416', 'case_00418', 'case_00422',
220+
'case_00423', 'case_00290', 'case_00424', 'case_00295', 'case_00426', 'case_00428', 'case_00293', 'case_00429',
221+
'case_00433', 'case_00441', 'case_00443', 'case_00437', 'case_00444', 'case_00452', 'case_00449', 'case_00453',
222+
'case_00463', 'case_00468', 'case_00476', 'case_00483', 'case_00485', 'case_00480', 'case_00491', 'case_00474',
223+
'case_00486', 'case_00496', 'case_00494', 'case_00492', 'case_00503', 'case_00442', 'case_00518', 'case_00521',
224+
'case_00522', 'case_00525', 'case_00515', 'case_00533', 'case_00532', 'case_00539', 'case_00517', 'case_00546',
225+
'case_00550', 'case_00554', 'case_00558', 'case_00557', 'case_00544', 'case_00567', 'case_00574', 'case_00555',
226+
'case_00576', 'case_00575', 'case_00564']
227+
228+
####################################################################################################################
229+
#### Download KiTS2023 dataset
230+
####################################################################################################################
231+
232+
print("Downloading KiTS2023 dataset...")
233+
repo_url = "https://github.com/neheller/kits23.git"
234+
Repo.clone_from(repo_url, str(kits_raw_dir.parent))
235+
download_dataset(kits_raw_dir)
236+
237+
####################################################################################################################
238+
#### Preprocess KiTS2023 dataset
239+
####################################################################################################################
240+
241+
print("Preprocessing KiTS2023 dataset...")
242+
243+
(kits_preprocessed_dir / "imagesTr").mkdir(parents=True, exist_ok=True)
244+
(kits_preprocessed_dir / "imagesTs").mkdir(parents=True, exist_ok=True)
245+
(kits_preprocessed_dir / "labelsTr").mkdir(parents=True, exist_ok=True)
246+
(kits_preprocessed_dir / "labelsTs").mkdir(parents=True, exist_ok=True)
247+
248+
names = [p.name for p in kits_raw_dir.iterdir() if p.is_dir()]
249+
names = natsorted(names)
250+
for name in names:
251+
postfix = "Tr" if name not in test_set else "Ts"
252+
shutil.move(kits_raw_dir / name / "imaging.nii.gz", kits_preprocessed_dir / f"images{postfix}" / f"{name}_0000.nii.gz")
253+
shutil.move(kits_raw_dir / name / "segmentation.nii.gz", kits_preprocessed_dir / f"labels{postfix}" / f"{name}.nii.gz")
254+
255+
dataset_json_url = "https://syncandshare.desy.de/index.php/s/Cfpwyg5dmi9a2Df/download/dataset.json"
256+
response = requests.get(dataset_json_url)
257+
response.raise_for_status() # Raise an error on bad status
258+
with open(kits_preprocessed_dir / "dataset.json", "wb") as f:
259+
f.write(response.content)
260+
261+
####################################################################################################################
262+
#### Delete raw dataset files
263+
####################################################################################################################
264+
265+
print("Deleting raw dataset files...")
266+
shutil.rmtree(raw_dir, ignore_errors=True)
267+
268+
print("Finished setting up KiTS2023 dataset.")
269+
270+
197271
if __name__ == '__main__':
198272
parser = argparse.ArgumentParser()
199273
parser.add_argument('-d', "--dataset_dir", required=True, type=str, help="Path to the dir used for setting up ScribbleBench.")
200274
parser.add_argument('--word', required=False, default=False, action="store_true", help="Download and preprocess the WORD dataset for ScribbleBench.")
201275
parser.add_argument('--mscmr', required=False, default=False, action="store_true", help="Download and preprocess the MSCMR dataset for ScribbleBench.")
276+
parser.add_argument('--kits', required=False, default=False, action="store_true", help="Download and preprocess the KiTS2023 dataset for ScribbleBench.")
202277
args = parser.parse_args()
203278

204279
if args.word:
205280
setup_word_dataset(args.dataset_dir)
206281
if args.word:
207282
setup_mscmr_dataset(args.dataset_dir)
283+
if args.kits:
284+
setup_kits_dataset(args.dataset_dir)
285+

scribblebench/utils/__init__.py

Whitespace-only changes.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""A script to download the KiTS23 dataset into this repository"""
2+
import sys
3+
from tqdm import tqdm
4+
from pathlib import Path
5+
import urllib.request
6+
import shutil
7+
from time import sleep
8+
9+
TRAINING_CASE_NUMBERS = list(range(300)) + list(range(400, 589))
10+
11+
12+
def get_destination(case_id: str, save_dir, create: bool = False):
13+
destination = save_dir / case_id / "imaging.nii.gz"
14+
if create:
15+
destination.parent.mkdir(exist_ok=True)
16+
return destination
17+
18+
19+
def cleanup(tmp_pth: Path, e: Exception):
20+
if tmp_pth.exists():
21+
tmp_pth.unlink()
22+
23+
if e is None:
24+
print("\nInterrupted.\n")
25+
sys.exit()
26+
raise(e)
27+
28+
29+
def download_case(case_num: int, save_dir, pbar: tqdm, retry=True):
30+
remote_name = f"master_{case_num:05d}.nii.gz"
31+
url = f"https://kits19.sfo2.digitaloceanspaces.com/{remote_name}"
32+
destination = get_destination(f"case_{case_num:05d}", save_dir, True)
33+
tmp_pth = destination.parent / f".partial.{destination.name}"
34+
try:
35+
urllib.request.urlretrieve(url, str(tmp_pth))
36+
shutil.move(str(tmp_pth), str(destination))
37+
except KeyboardInterrupt as e:
38+
pbar.close()
39+
while True:
40+
try:
41+
sleep(0.1)
42+
cleanup(tmp_pth, None)
43+
except KeyboardInterrupt:
44+
pass
45+
except Exception as e:
46+
if retry:
47+
print(f"\nFailed to download case_{case_num:05d}. Retrying...")
48+
sleep(5)
49+
download_case(case_num, pbar, retry=False)
50+
pbar.close()
51+
while True:
52+
try:
53+
cleanup(tmp_pth, e)
54+
except KeyboardInterrupt:
55+
pass
56+
57+
58+
def download_dataset(save_dir):
59+
save_dir = Path(save_dir)
60+
save_dir.mkdir(parents=True, exist_ok=True)
61+
62+
# Determine which cases still need to be downloaded
63+
left_to_download = []
64+
for case_num in TRAINING_CASE_NUMBERS:
65+
case_id = f"case_{case_num:05d}"
66+
dst = get_destination(case_id, save_dir)
67+
if not dst.exists():
68+
left_to_download = left_to_download + [case_num]
69+
70+
# Show progressbar as cases are downloaded
71+
print(f"\nFound {len(left_to_download)} cases to download\n")
72+
for case_num in (pbar := tqdm(left_to_download)):
73+
pbar.set_description(f"Dowloading case_{case_num:05d}...")
74+
download_case(case_num, save_dir, pbar)
75+
76+
77+
if __name__ == "__main__":
78+
download_dataset()

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ install_requires =
3636
medvol
3737
gdown
3838
GitPython
39+
natsort
3940
python_requires = >=3.8
4041
include_package_data = True
4142
; package_dir =

0 commit comments

Comments
 (0)