@@ -48,7 +48,7 @@ def _download_cell_line(self, cell_line_source: Literal["DepMap", "Cancerrxgene"
4848 depmap_cell_line_path = Path (settings .cachedir ) / "depmap_23Q4_info.csv"
4949 if not Path (depmap_cell_line_path ).exists ():
5050 _download (
51- url = "https://ndownloader.figshare. com/files/43746708 " ,
51+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/depmap_23Q4_info.csv " ,
5252 output_file_name = "depmap_23Q4_info.csv" ,
5353 output_path = settings .cachedir ,
5454 block_size = 4096 ,
@@ -59,52 +59,24 @@ def _download_cell_line(self, cell_line_source: Literal["DepMap", "Cancerrxgene"
5959 else :
6060 # Download cell line metadata from The Genomics of Drug Sensitivity in Cancer Project
6161 # Source: https://www.cancerrxgene.org/celllines
62- cancerxgene_cell_line_path = Path (settings .cachedir ) / "cell_line_cancer_project.csv"
6362 transformed_cancerxgene_cell_line_path = Path (settings .cachedir ) / "cancerrxgene_info.csv"
64-
65- if not Path (transformed_cancerxgene_cell_line_path ).exists ():
66- if not Path (cancerxgene_cell_line_path ).exists ():
67- _download (
68- url = "https://www.cancerrxgene.org/api/celllines?list=all&sEcho=1&iColumns=7&sColumns=&"
69- "iDisplayStart=0&iDisplayLength=25&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&"
70- "mDataProp_4=4&mDataProp_5=5&mDataProp_6=6&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&"
71- "bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&"
72- "bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&"
73- "bSearchable_4=true&sSearch_5=&bRegex_5=false&bSearchable_5=true&sSearch_6=&bRegex_6=false&"
74- "bSearchable_6=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&"
75- "bSortable_2=true&bSortable_3=true&bSortable_4=true&bSortable_5=true&bSortable_6=true&export=csv" ,
76- output_file_name = "cell_line_cancer_project.csv" ,
77- output_path = settings .cachedir ,
78- block_size = 4096 ,
79- is_zip = False ,
80- )
81- self .cancerxgene = pd .read_csv (cancerxgene_cell_line_path )
82- self .cancerxgene .columns = self .cancerxgene .columns .str .strip ()
83- self .cancerxgene ["stripped_cell_line_name" ] = (
84- self .cancerxgene ["Cell line Name" ]
85- .str .replace (r"\-|\." , "" , regex = True )
86- .str .upper ()
87- .astype ("category" )
63+ if not transformed_cancerxgene_cell_line_path .exists ():
64+ _download (
65+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws.com/pertpy/cancerrxgene_info.csv" ,
66+ output_file_name = "cancerrxgene_info.csv" ,
67+ output_path = settings .cachedir ,
68+ block_size = 4096 ,
69+ is_zip = False ,
8870 )
89- # pivot the data frame so that each cell line has only one row of metadata
90- index_col = set (self .cancerxgene .columns ) - {
91- "Datasets" ,
92- "number of drugs" ,
93- }
94- self .cancerxgene = self .cancerxgene .pivot (index = index_col , columns = "Datasets" , values = "number of drugs" )
95- self .cancerxgene .columns .name = None
96- self .cancerxgene = self .cancerxgene .reset_index ().rename (columns = {"Cell line Name" : "cell_line_name" })
97- self .cancerxgene .to_csv (transformed_cancerxgene_cell_line_path )
98- else :
99- self .cancerxgene = pd .read_csv (transformed_cancerxgene_cell_line_path , index_col = 0 )
71+ self .cancerxgene = pd .read_csv (transformed_cancerxgene_cell_line_path , index_col = 0 )
10072
10173 def _download_gene_annotation (self ) -> None :
10274 # Download metadata for driver genes from DepMap.Sanger
10375 # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Gene annotation)
10476 gene_annotation_file_path = Path (settings .cachedir ) / "genes_info.csv"
10577 if not Path (gene_annotation_file_path ).exists ():
10678 _download (
107- url = "https://cog.sanger.ac.uk/cmp/download/gene_identifiers_20191101 .csv" ,
79+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws.com/pertpy/genes_info .csv" ,
10880 output_file_name = "genes_info.csv" ,
10981 output_path = settings .cachedir ,
11082 block_size = 4096 ,
@@ -121,7 +93,7 @@ def _download_bulk_rna(self, cell_line_source: Literal["broad", "sanger"] = "bro
12193 bulk_rna_sanger_file_path = Path (settings .cachedir ) / "rnaseq_sanger_info.csv"
12294 if not Path (bulk_rna_sanger_file_path ).exists ():
12395 _download (
124- url = "https://figshare. com/ndownloader/files/42467103 " ,
96+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/rnaseq_sanger_info.csv " ,
12597 output_file_name = "rnaseq_sanger_info.csv" ,
12698 output_path = settings .cachedir ,
12799 block_size = 4096 ,
@@ -134,7 +106,7 @@ def _download_bulk_rna(self, cell_line_source: Literal["broad", "sanger"] = "bro
134106 bulk_rna_broad_file_path = Path (settings .cachedir ) / "rnaseq_depmap_info.csv"
135107 if not Path (bulk_rna_broad_file_path ).exists ():
136108 _download (
137- url = "https://figshare. com/ndownloader/files/34989922 " ,
109+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/rnaseq_depmap_info.csv " ,
138110 output_file_name = "rnaseq_depmap_info.csv" ,
139111 output_path = settings .cachedir ,
140112 block_size = 4096 ,
@@ -148,7 +120,7 @@ def _download_proteomics(self) -> None:
148120 proteomics_file_path = Path (settings .cachedir ) / "proteomics_info.csv"
149121 if not Path (proteomics_file_path ).exists ():
150122 _download (
151- url = "https://figshare. com/ndownloader/files/42468393 " ,
123+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/proteomics_info.csv " ,
152124 output_file_name = "proteomics_info.csv" ,
153125 output_path = settings .cachedir ,
154126 block_size = 4096 ,
@@ -164,7 +136,7 @@ def _download_gdsc(self, gdsc_dataset: Literal[1, 2] = 1) -> None:
164136 drug_response_gdsc1_file_path = Path (settings .cachedir ) / "gdsc1_info.csv"
165137 if not Path (drug_response_gdsc1_file_path ).exists ():
166138 _download (
167- url = "https://figshare. com/ndownloader/files/43757235 " ,
139+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/gdsc1_info.csv " ,
168140 output_file_name = "gdsc1_info.csv" ,
169141 output_path = settings .cachedir ,
170142 block_size = 4096 ,
@@ -175,7 +147,7 @@ def _download_gdsc(self, gdsc_dataset: Literal[1, 2] = 1) -> None:
175147 drug_response_gdsc2_file_path = Path (settings .cachedir ) / "gdsc2_info.csv"
176148 if not Path (drug_response_gdsc2_file_path ).exists ():
177149 _download (
178- url = "https://figshare. com/ndownloader/files/43757232 " ,
150+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/gdsc2_info.csv " ,
179151 output_file_name = "gdsc2_info.csv" ,
180152 output_path = settings .cachedir ,
181153 block_size = 4096 ,
@@ -189,7 +161,7 @@ def _download_prism(self) -> None:
189161 drug_response_prism_file_path = Path (settings .cachedir ) / "prism_info.csv"
190162 if not Path (drug_response_prism_file_path ).exists ():
191163 _download (
192- url = "https://figshare. com/ndownloader/files/20237739 " ,
164+ url = "https://scverse-exampledata.s3.eu-west-1.amazonaws. com/pertpy/prism_info.csv " ,
193165 output_file_name = "prism_info.csv" ,
194166 output_path = settings .cachedir ,
195167 block_size = 4096 ,
@@ -253,7 +225,7 @@ def annotate(
253225 query_id = "stripped_cell_line_name"
254226 logger .error (
255227 "`stripped_cell_line_name` is used as reference and query identifier to annotate cell line metadata from Cancerrxgene. "
256- "Ensure that stripped cell line names are available in 'adata.obs.' or use the DepMap as `cell_line_source` to annotate the cell line first."
228+ "Ensure that stripped cell line names are available in 'adata.obs.' or use the DepMap as `cell_line_source` first."
257229 )
258230 if self .cancerxgene is None :
259231 self ._download_cell_line (cell_line_source = "Cancerrxgene" )
0 commit comments