66by genomes.yml that have already been executed and appear in the target
77installed data table configuration.
88"""
9+ import json
910import logging
1011import os
1112import re
3839from ._config_models import (
3940 DataManager ,
4041 DataManagers ,
42+ DictOrValue ,
4143 read_data_managers ,
4244)
4345from .common_parser import get_common_args
@@ -96,8 +98,7 @@ def tool_id_for(indexer: str, data_managers: DataManagers, mode: str) -> str:
9698class RunDataManager (BaseModel ):
9799 id : str
98100 items : Optional [List [Any ]] = None
99- params : Optional [List [Any ]] = None
100- data_table_reload : Optional [List [str ]] = None
101+ params : Optional [DictOrValue ] = None
101102
102103
103104class RunDataManagers (BaseModel ):
@@ -172,36 +173,34 @@ def walk_over_incomplete_runs(split_options: SplitOptions):
172173 if do_fetch and not split_options .is_build_complete (build_id , fetch_indexer ):
173174 log .info (f"Fetching: { build_id } " )
174175 fetch_tool_id = tool_id_for (fetch_indexer , data_managers , split_options .tool_id_mode )
175- fetch_params = []
176- fetch_params .append ({"dbkey_source|dbkey_source_selector" : "new" })
177- fetch_params .append ({"dbkey_source|dbkey" : genome ["id" ]})
178176 description = genome .get ("description" )
177+ fetch_params = {
178+ "dbkey_source" : {"dbkey_source_selector" : "new" , "dbkey" : genome ["id" ]},
179+ "sequence_id" : genome ["id" ],
180+ "sequence_name" : description ,
181+ }
179182 source = genome .get ("source" )
180183 if source == "ucsc" :
181184 if not description :
182- description = ucsc_description_for_build (genome ["id" ])
183- fetch_params .append ({"reference_source|reference_source_selector" : "ucsc" })
184- fetch_params .append ({"reference_source|requested_dbkey" : genome ["id" ]})
185- fetch_params .append ({"sequence_name" : description })
185+ fetch_params ["sequence_name" ] = ucsc_description_for_build (genome ["id" ])
186+ fetch_params ["reference_source" ] = {
187+ "reference_source_selector" : "ucsc" ,
188+ "requested_dbkey" : genome ["id" ],
189+ }
186190 elif re .match ("^[A-Z_]+[0-9.]+" , source ):
187- fetch_params . append ({ "reference_source|reference_source_selector" : "ncbi" })
188- fetch_params . append ({ "reference_source|requested_identifier " : source })
189- fetch_params . append ({ "sequence_name " : genome [ "description" ]})
190- fetch_params . append ({ "sequence.id" : genome [ "id" ]})
191+ fetch_params [ "reference_source" ] = {
192+ "reference_source_selector " : "ncbi" ,
193+ "requested_identifier " : source ,
194+ }
191195 elif re .match ("^http" , source ):
192- fetch_params .append ({"reference_source|reference_source_selector" : "url" })
193- fetch_params .append ({"reference_source|user_url" : source })
194- fetch_params .append ({"sequence_name" : genome ["description" ]})
195- fetch_params .append ({"sequence.id" : genome ["id" ]})
196+ fetch_params ["reference_source" ] = {"reference_source_selector" : "url" , "user_url" : source }
196197
197198 if description :
198- fetch_params . append ({ "dbkey_source| dbkey_name" : description })
199+ fetch_params [ "dbkey_source" ][ " dbkey_name"] = description
199200
200201 fetch_run_data_manager = RunDataManager (
201202 id = fetch_tool_id ,
202203 params = fetch_params ,
203- # Not needed according to Marius
204- # data_table_reload=["all_fasta", "__dbkeys__"],
205204 )
206205 yield (build_id , fetch_indexer , fetch_run_data_manager )
207206 else :
@@ -223,18 +222,17 @@ def walk_over_incomplete_runs(split_options: SplitOptions):
223222
224223 tool_id = tool_id_for (indexer , data_managers , split_options .tool_id_mode )
225224 data_manager = data_managers .__root__ [indexer ]
226- params = data_manager .parameters
225+ params = {}
226+ if data_manager .parameters :
227+ params = json .loads (data_manager .parameters .json ()) or {}
228+ genome_params = genome .pop ("parameters" , None ) or {}
229+ params .update (genome_params )
227230 if params is None :
228- params = [
229- {"all_fasta_source" : "{{ item.id }}" },
230- {"sequence_name" : "{{ item.name }}" },
231- {"sequence_id" : "{{ item.id }}" },
232- ]
233- # why is this not pulled from the data managers conf? -nate
234- if re .search ("bwa" , tool_id ):
235- params .append ({"index_algorithm" : "bwtsw" })
236- if re .search ("color_space" , tool_id ):
237- continue
231+ params = {
232+ "all_fasta_source" : "{{ item.id }}" ,
233+ "sequence_name" : "{{ item.name }}" ,
234+ "sequence_id" : "{{ item.id }}" ,
235+ }
238236
239237 item = deepcopy (genome )
240238 item .pop ("indexers" , None )
0 commit comments