77import argparse
88from git import Repo
99import os
10+ from utils .download_kits23 import download_dataset
11+ from natsort import natsorted
1012
1113
1214def setup_word_dataset (dataset_dir ):
@@ -146,7 +148,7 @@ def setup_mscmr_dataset(dataset_dir):
146148 zip_ref .extractall (mscmr_preprocessed_dir )
147149
148150 ####################################################################################################################
149- #### Preprocess WORD dataset
151+ #### Preprocess MSCMR dataset
150152 ####################################################################################################################
151153
152154 print ("Preprocessing MSCMR dataset..." )
@@ -171,7 +173,7 @@ def setup_mscmr_dataset(dataset_dir):
171173
172174 names = [path .name [:- 7 ] for path in (mscmr_raw_dir / "TestSet" / "labels" ).rglob ("*.nii.gz" )]
173175 for name in names :
174- shutil .move (mscmr_raw_dir / "TestSet" / "labels" / f"{ name } .nii.gz" , mscmr_preprocessed_dir / "labelsTs" / f"{ name } _0000 .nii.gz" )
176+ shutil .move (mscmr_raw_dir / "TestSet" / "labels" / f"{ name } .nii.gz" , mscmr_preprocessed_dir / "labelsTs" / f"{ name } .nii.gz" )
175177
176178 # These two images have no dense GT so it is not possible to generate scribbles for them
177179 os .remove (mscmr_preprocessed_dir / "imagesTr" / "subject2_DE_0000.nii.gz" )
@@ -194,14 +196,90 @@ def setup_mscmr_dataset(dataset_dir):
194196 print ("Finished setting up MSCMR dataset." )
195197
196198
199+ def setup_kits_dataset (dataset_dir ):
200+ dataset_dir = Path (dataset_dir ) / "ScribbleBench"
201+ raw_dir = dataset_dir / "raw"
202+ kits_raw_dir = raw_dir / "KiTS2023" / "dataset"
203+ preprocessed_dir = dataset_dir
204+ kits_preprocessed_dir = preprocessed_dir / "KiTS2023"
205+ preprocessed_dir .mkdir (parents = True , exist_ok = True )
206+ kits_preprocessed_dir .mkdir (parents = True , exist_ok = True )
207+
208+ test_set = ['case_00007' , 'case_00013' , 'case_00003' , 'case_00018' , 'case_00019' , 'case_00016' , 'case_00026' , 'case_00014' ,
209+ 'case_00038' , 'case_00000' , 'case_00045' , 'case_00050' , 'case_00061' , 'case_00070' , 'case_00069' , 'case_00074' ,
210+ 'case_00087' , 'case_00090' , 'case_00086' , 'case_00084' , 'case_00059' , 'case_00106' , 'case_00105' , 'case_00102' ,
211+ 'case_00112' , 'case_00117' , 'case_00114' , 'case_00121' , 'case_00101' , 'case_00096' , 'case_00115' , 'case_00126' ,
212+ 'case_00127' , 'case_00120' , 'case_00124' , 'case_00116' , 'case_00133' , 'case_00145' , 'case_00147' , 'case_00152' ,
213+ 'case_00144' , 'case_00118' , 'case_00132' , 'case_00135' , 'case_00141' , 'case_00146' , 'case_00164' , 'case_00167' ,
214+ 'case_00172' , 'case_00179' , 'case_00181' , 'case_00184' , 'case_00192' , 'case_00194' , 'case_00195' , 'case_00165' ,
215+ 'case_00199' , 'case_00210' , 'case_00188' , 'case_00191' , 'case_00211' , 'case_00212' , 'case_00222' , 'case_00217' ,
216+ 'case_00221' , 'case_00227' , 'case_00232' , 'case_00236' , 'case_00231' , 'case_00234' , 'case_00214' , 'case_00238' ,
217+ 'case_00223' , 'case_00237' , 'case_00240' , 'case_00216' , 'case_00266' , 'case_00269' , 'case_00268' , 'case_00275' ,
218+ 'case_00279' , 'case_00253' , 'case_00273' , 'case_00282' , 'case_00287' , 'case_00286' , 'case_00281' , 'case_00284' ,
219+ 'case_00291' , 'case_00283' , 'case_00276' , 'case_00404' , 'case_00411' , 'case_00416' , 'case_00418' , 'case_00422' ,
220+ 'case_00423' , 'case_00290' , 'case_00424' , 'case_00295' , 'case_00426' , 'case_00428' , 'case_00293' , 'case_00429' ,
221+ 'case_00433' , 'case_00441' , 'case_00443' , 'case_00437' , 'case_00444' , 'case_00452' , 'case_00449' , 'case_00453' ,
222+ 'case_00463' , 'case_00468' , 'case_00476' , 'case_00483' , 'case_00485' , 'case_00480' , 'case_00491' , 'case_00474' ,
223+ 'case_00486' , 'case_00496' , 'case_00494' , 'case_00492' , 'case_00503' , 'case_00442' , 'case_00518' , 'case_00521' ,
224+ 'case_00522' , 'case_00525' , 'case_00515' , 'case_00533' , 'case_00532' , 'case_00539' , 'case_00517' , 'case_00546' ,
225+ 'case_00550' , 'case_00554' , 'case_00558' , 'case_00557' , 'case_00544' , 'case_00567' , 'case_00574' , 'case_00555' ,
226+ 'case_00576' , 'case_00575' , 'case_00564' ]
227+
228+ ####################################################################################################################
229+ #### Download KiTS2023 dataset
230+ ####################################################################################################################
231+
232+ print ("Downloading KiTS2023 dataset..." )
233+ repo_url = "https://github.com/neheller/kits23.git"
234+ Repo .clone_from (repo_url , str (kits_raw_dir .parent ))
235+ download_dataset (kits_raw_dir )
236+
237+ ####################################################################################################################
238+ #### Preprocess KiTS2023 dataset
239+ ####################################################################################################################
240+
241+ print ("Preprocessing KiTS2023 dataset..." )
242+
243+ (kits_preprocessed_dir / "imagesTr" ).mkdir (parents = True , exist_ok = True )
244+ (kits_preprocessed_dir / "imagesTs" ).mkdir (parents = True , exist_ok = True )
245+ (kits_preprocessed_dir / "labelsTr" ).mkdir (parents = True , exist_ok = True )
246+ (kits_preprocessed_dir / "labelsTs" ).mkdir (parents = True , exist_ok = True )
247+
248+ names = [p .name for p in kits_raw_dir .iterdir () if p .is_dir ()]
249+ names = natsorted (names )
250+ for name in names :
251+ postfix = "Tr" if name not in test_set else "Ts"
252+ shutil .move (kits_raw_dir / name / "imaging.nii.gz" , kits_preprocessed_dir / f"images{ postfix } " / f"{ name } _0000.nii.gz" )
253+ shutil .move (kits_raw_dir / name / "segmentation.nii.gz" , kits_preprocessed_dir / f"labels{ postfix } " / f"{ name } .nii.gz" )
254+
255+ dataset_json_url = "https://syncandshare.desy.de/index.php/s/Cfpwyg5dmi9a2Df/download/dataset.json"
256+ response = requests .get (dataset_json_url )
257+ response .raise_for_status () # Raise an error on bad status
258+ with open (kits_preprocessed_dir / "dataset.json" , "wb" ) as f :
259+ f .write (response .content )
260+
261+ ####################################################################################################################
262+ #### Delete raw dataset files
263+ ####################################################################################################################
264+
265+ print ("Deleting raw dataset files..." )
266+ shutil .rmtree (raw_dir , ignore_errors = True )
267+
268+ print ("Finished setting up KiTS2023 dataset." )
269+
270+
197271if __name__ == '__main__' :
198272 parser = argparse .ArgumentParser ()
199273 parser .add_argument ('-d' , "--dataset_dir" , required = True , type = str , help = "Path to the dir used for setting up ScribbleBench." )
200274 parser .add_argument ('--word' , required = False , default = False , action = "store_true" , help = "Download and preprocess the WORD dataset for ScribbleBench." )
201275 parser .add_argument ('--mscmr' , required = False , default = False , action = "store_true" , help = "Download and preprocess the MSCMR dataset for ScribbleBench." )
276+ parser .add_argument ('--kits' , required = False , default = False , action = "store_true" , help = "Download and preprocess the KiTS2023 dataset for ScribbleBench." )
202277 args = parser .parse_args ()
203278
204279 if args .word :
205280 setup_word_dataset (args .dataset_dir )
206281 if args .word :
207282 setup_mscmr_dataset (args .dataset_dir )
283+ if args .kits :
284+ setup_kits_dataset (args .dataset_dir )
285+
0 commit comments