11import argparse
22import json
33import os
4+ import re
45import subprocess
5- from typing import Dict
6+ import time
7+ from typing import Dict , List
68
79import requests
810import yaml
3335)
3436
3537
36- def read_existing_yaml (workflows_path ) :
38+ def read_existing_yaml (workflows_path : str ) -> Dict [ str , Workflow ] :
3739 if os .path .exists (workflows_path ):
3840 with open (workflows_path ) as fh :
3941 workflows = Workflows .model_validate (yaml .safe_load (fh )).workflows
@@ -44,7 +46,9 @@ def read_existing_yaml(workflows_path):
4446 return by_trs_id
4547
4648
47- def get_workflow_categories_from_collections (collections ):
49+ def get_workflow_categories_from_collections (
50+ collections : List [str ],
51+ ) -> List [WorkflowCategoryId ]:
4852 return sorted (
4953 list (
5054 set (
@@ -57,10 +61,10 @@ def get_workflow_categories_from_collections(collections):
5761 )
5862
5963
60- def get_input_types (workflow_definition ) :
64+ def get_input_types (workflow_definition : dict ) -> List [ WorkflowParameter ] :
6165 # get all input types
6266 INPUT_TYPES = ["data_input" , "data_collection_input" , "parameter_input" ]
63- inputs : list [WorkflowParameter ] = []
67+ inputs : List [WorkflowParameter ] = []
6468 for step in workflow_definition ["steps" ].values ():
6569 step_label = step ["label" ]
6670 step_type = step ["type" ]
@@ -94,18 +98,74 @@ def get_input_types(workflow_definition):
9498 return inputs
9599
96100
97- def generate_current_workflows ():
101+ def verify_trs_version_exists (trs_id : str , skip_validation : bool = False ) -> bool :
102+ """Check if a workflow version exists on Dockstore via TRS API."""
103+ if skip_validation :
104+ return True
105+
106+ # Parse the TRS ID to extract components
107+ match = re .match (
108+ r"#workflow/github\.com/iwc-workflows/([^/]+)/([^/]+)/versions/v(.+)" , trs_id
109+ )
110+ if not match :
111+ print (f"Warning: Cannot parse TRS ID for validation: { trs_id } " )
112+ return True # We can't look this up, but someone put it in -- don't fail
113+
114+ repo , workflow_name , version = match .groups ()
115+
116+ # The workflow ID format for Dockstore is the full TRS ID without the version part
117+ workflow_id = f"#workflow/github.com/iwc-workflows/{ repo } /{ workflow_name } "
118+ # URL encode the workflow ID and version
119+ encoded_id = requests .utils .quote (workflow_id , safe = "" )
120+ encoded_version = requests .utils .quote (f"v{ version } " , safe = "" )
121+
122+ dockstore_url = f"https://dockstore.org/api/ga4gh/trs/v2/tools/{ encoded_id } /versions/{ encoded_version } "
123+
124+ try :
125+ response = requests .get (dockstore_url , timeout = 10 )
126+ if response .status_code == 200 :
127+ return True
128+ elif response .status_code == 404 :
129+ return False
130+ else :
131+ print (
132+ f"Warning: Unexpected status { response .status_code } checking { trs_id } at Dockstore"
133+ )
134+ return True # Don't drop workflows on weirdness
135+ except requests .RequestException as e :
136+ print (f"Warning: Error checking version { trs_id } : { e } " )
137+ return True
138+ finally :
139+ # Don't slam dockstore
140+ time .sleep (0.1 )
141+
142+
143+ def generate_current_workflows (skip_validation : bool = False ) -> Dict [str , Workflow ]:
98144 manifest_data = requests .get (URL ).json ()
99145 by_trs_id : Dict [str , Workflow ] = {}
146+ version_warnings = []
147+
100148 for repo in manifest_data :
101149 for workflow in repo ["workflows" ]:
102150 if "tests" not in workflow :
103151 # probably fixed on main branch of iwc ?
104152 # this branch is pretty out of date
105153 continue
154+
155+ trs_id = (
156+ f"{ workflow ['trsID' ]} /versions/v{ workflow ['definition' ]['release' ]} "
157+ )
158+
159+ if not verify_trs_version_exists (trs_id , skip_validation ):
160+ # This is just informational - we'll keep the workflow with whatever
161+ # version is already in workflows.yml (handled in merge_into_existing)
162+ version_warnings .append (
163+ f"Info: IWC manifest has v{ workflow ['definition' ]['release' ]} for { workflow ['trsID' ]} but it's not on Dockstore yet"
164+ )
165+
106166 workflow_input = Workflow (
107167 active = False ,
108- trs_id = f" { workflow [ 'trsID' ] } /versions/v { workflow [ 'definition' ][ 'release' ] } " ,
168+ trs_id = trs_id ,
109169 workflow_name = workflow ["definition" ]["name" ],
110170 categories = get_workflow_categories_from_collections (
111171 workflow ["collections" ]
@@ -118,6 +178,12 @@ def generate_current_workflows():
118178 parameters = get_input_types (workflow ["definition" ]),
119179 )
120180 by_trs_id [workflow ["trsID" ]] = workflow_input
181+
182+ if version_warnings and not skip_validation :
183+ print ("\n Version status notes:" )
184+ for warning in version_warnings :
185+ print (f" { warning } " )
186+
121187 return by_trs_id
122188
123189
@@ -149,29 +215,82 @@ def add_missing_parameters(
149215 existing_workflow_input .parameters .append (param )
150216
151217
152- def merge_into_existing (workflows_path ):
218+ def merge_into_existing (
219+ workflows_path : str , skip_validation : bool = False
220+ ) -> Dict [str , Workflow ]:
153221 existing = read_existing_yaml (workflows_path )
154- current = generate_current_workflows ()
222+ current = generate_current_workflows (skip_validation )
155223 merged : Dict [str , Workflow ] = {}
224+ invalid_versions = []
225+ versions_kept = []
226+
156227 for versionless_trs_id , current_workflow_input in current .items ():
157228 existing_workflow_input = existing .get (versionless_trs_id )
158- if existing_workflow_input :
159- # we'll keep whatever has been specified in the brc repo,
160- # and only update values that are in the iwc manifest
161- exisiting_dict = existing_workflow_input .model_dump ()
162- new_dict = current_workflow_input .model_dump ()
163- for key in MANIFEST_SOURCE_OF_TRUTH :
164- exisiting_dict [key ] = new_dict [key ]
165- ensure_parameters_exist (current_workflow_input , existing_workflow_input )
166- updated_existing_workflow = Workflow (** exisiting_dict )
167- add_missing_parameters (current_workflow_input , updated_existing_workflow )
168- current_workflow_input = updated_existing_workflow
229+ if not existing_workflow_input :
230+ merged [versionless_trs_id ] = current_workflow_input
231+ continue
232+
233+ iwc_version_valid = verify_trs_version_exists (
234+ current_workflow_input .trs_id , skip_validation
235+ )
236+ existing_version_valid = verify_trs_version_exists (
237+ existing_workflow_input .trs_id , skip_validation
238+ )
239+
240+ # Decide which version to use
241+ if not iwc_version_valid and existing_version_valid :
242+ # IWC version not on Dockstore yet, but existing version is valid
243+ versions_kept .append (
244+ f"Keeping { existing_workflow_input .trs_id } (IWC has newer unreleased version)"
245+ )
246+ current_workflow_input .trs_id = existing_workflow_input .trs_id
247+ elif not existing_version_valid :
248+ # Existing version is invalid (manually edited to bad version)
249+ if iwc_version_valid :
250+ print (
251+ f"Error: Invalid version { existing_workflow_input .trs_id } doesn't exist on Dockstore"
252+ )
253+ print (f" -> Reverting to IWC version: { current_workflow_input .trs_id } " )
254+ invalid_versions .append (existing_workflow_input .trs_id )
255+ else :
256+ # Both versions are invalid - this shouldn't happen often
257+ print (
258+ f"Error: Neither existing nor IWC version exists on Dockstore for { versionless_trs_id } "
259+ )
260+ # Keep what we have
261+ current_workflow_input .trs_id = existing_workflow_input .trs_id
262+
263+ # Build the merged workflow
264+ existing_dict = existing_workflow_input .model_dump ()
265+ new_dict = current_workflow_input .model_dump ()
266+
267+ # Update manifest-controlled fields
268+ for key in MANIFEST_SOURCE_OF_TRUTH :
269+ existing_dict [key ] = new_dict [key ]
270+
271+ ensure_parameters_exist (current_workflow_input , existing_workflow_input )
272+ updated_existing_workflow = Workflow (** existing_dict )
273+ add_missing_parameters (current_workflow_input , updated_existing_workflow )
274+ current_workflow_input = updated_existing_workflow
169275 merged [versionless_trs_id ] = current_workflow_input
276+
277+ if versions_kept and not skip_validation :
278+ print (
279+ f"\n Kept { len (versions_kept )} existing versions (newer IWC versions not on Dockstore yet)"
280+ )
281+ for msg in versions_kept :
282+ print (f" { msg } " )
283+
284+ if invalid_versions :
285+ print (f"\n Fixed { len (invalid_versions )} invalid versions in workflows.yml" )
286+
170287 return merged
171288
172289
173- def to_workflows_yaml (workflows_path : str , exclude_other : bool ):
174- by_trs_id = merge_into_existing (workflows_path )
290+ def to_workflows_yaml (
291+ workflows_path : str , exclude_other : bool , skip_validation : bool = False
292+ ):
293+ by_trs_id = merge_into_existing (workflows_path , skip_validation )
175294 # sort by trs id, should play nicer with git diffs
176295 sorted_workflows = list (dict (sorted (by_trs_id .items ())).values ())
177296 if exclude_other :
@@ -209,5 +328,14 @@ def to_workflows_yaml(workflows_path: str, exclude_other: bool):
209328 action = "store_true" ,
210329 help = "Exclude other items from processing." ,
211330 )
331+ parser .add_argument (
332+ "--skip-validation" ,
333+ action = "store_true" ,
334+ help = "Skip validation of workflow versions against TRS API." ,
335+ )
212336 args = parser .parse_args ()
213- to_workflows_yaml (args .workflows_path , exclude_other = args .exclude_other )
337+ to_workflows_yaml (
338+ args .workflows_path ,
339+ exclude_other = args .exclude_other ,
340+ skip_validation = args .skip_validation ,
341+ )
0 commit comments