@@ -28,8 +28,7 @@ def granary_data(tmp_path: Path):
2828 f"{ granary_key_prefix } /manifest_14.json" ,
2929 f"{ granary_key_prefix } /manifest_21.json" ,
3030 f"{ granary_key_prefix } /manifest_26.json" ,
31- f"{ granary_key_prefix } /manifest_34.json" ,
32- f"{ granary_key_prefix } /manifest_39.json" ,
31+ f"{ granary_key_prefix } /manifest_41.json" ,
3332 f"{ granary_key_prefix } /audio/zCW0Pa0BI4Q.wav" ,
3433 f"{ granary_key_prefix } /audio/zHWk3Ae7qJ0.wav" ,
3534 f"{ granary_key_prefix } /audio/zHtFdl5K8qg.wav" ,
@@ -45,12 +44,7 @@ def granary_data(tmp_path: Path):
4544 rel_path = file_key .replace (granary_key_prefix + "/" , "" )
4645 dest_path = tmp_path / rel_path
4746 dest_path .parent .mkdir (parents = True , exist_ok = True )
48-
49- try :
50- s3 .download_file (bucket , file_key , str (dest_path ))
51- except ClientError as e :
52- code = e .response .get ("Error" , {}).get ("Code" , "" )
53- pytest .skip (f"Cannot download s3://{ bucket } /{ file_key } ({ code } )." )
47+ s3 .download_file (bucket , file_key , str (dest_path ))
5448
5549 if file_key .endswith (".wav" ):
5650 f .write (json .dumps ({"source_audio_filepath" : str (dest_path )}) + "\n " )
@@ -70,34 +64,15 @@ def test_granary_pipeline_end_to_end(granary_data):
7064 cfg .sdp_dir = Path (__file__ ).parents [1 ]
7165
7266 #disable some processors
73- ## step 3: FasterWhisperInference
74- cfg .processors [3 ].should_run = False
75- cfg .processors [4 ].input_manifest_file = os .path .join (granary_data , "manifest_03.json" )
76-
77- ## step 14: FasterWhisperInference
78- cfg .processors [6 ].should_run = False
79- cfg .processors [7 ].input_manifest_file = os .path .join (granary_data , "manifest_06.json" )
80-
81- ## step 21: FasterWhisperInference
82- cfg .processors [14 ].should_run = False
83- cfg .processors [15 ].input_manifest_file = os .path .join (granary_data , "manifest_14.json" )
84-
85- ## step 21: vLLMInference
86- cfg .processors [21 ].should_run = False
87- cfg .processors [22 ].input_manifest_file = os .path .join (granary_data , "manifest_21.json" )
88-
89- ## step 26: vLLMInference
90- cfg .processors [26 ].should_run = False
91- cfg .processors [27 ].input_manifest_file = os .path .join (granary_data , "manifest_26.json" )
67+ processors_to_disable = [3 , 6 , 14 , # FasterWhisperInference
68+ 21 , 26 , # vLLMInference
69+ 41 , # CometoidWMTQualityEstimation
70+ ]
9271
93- ## steps 33-34: CharacterHistogramLangValidator
94- cfg .processors [33 ].should_run = False
95- cfg .processors [34 ].should_run = False
96- cfg .processors [35 ].input_manifest_file = os .path .join (granary_data , "manifest_34.json" )
97-
98- ## step 39: CometoidWMTQualityEstimation
99- cfg .processors [39 ].should_run = False
100- cfg .processors [40 ].input_manifest_file = os .path .join (granary_data , "manifest_39.json" )
72+ for processor_idx in processors_to_disable :
73+ processor_id = str (processor_idx ).zfill (2 )
74+ cfg .processors [processor_idx ].should_run = False
75+ cfg .processors [processor_idx + 1 ].input_manifest_file = os .path .join (granary_data , f"manifest_{ processor_id } .json" )
10176
10277 run_processors (cfg )
10378
0 commit comments