Skip to content

Commit 4c6dfeb

Browse files
committed
Test tests s3
Signed-off-by: Sasha Meister <ameister@nvidia.com>
1 parent 2064127 commit 4c6dfeb

File tree

4 files changed

+13
-42
lines changed

4 files changed

+13
-42
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ jobs:
9393
sudo cp incommon-rsa-ca2.pem /usr/local/share/ca-certificates/incommon-rsa-server-ca-2.crt # [cert for CORAL]
9494
sudo update-ca-certificates # [cert for CORAL]
9595
set -o pipefail # this will make sure next line returns non-0 exit code if tests fail
96-
python -m pytest tests/ --junitxml=pytest.xml --ignore=tests/test_tts_sdp_end_to_end.py --cov-report=term-missing:skip-covered --cov=sdp --durations=30 -rs | tee pytest-coverage.txt
96+
python -m pytest tests/test_granary_pipeline_end_to_end.py tests/test_data_to_data.py --junitxml=pytest.xml --ignore=tests/test_tts_sdp_end_to_end.py --cov-report=term-missing:skip-covered --cov=sdp --durations=30 -rs | tee pytest-coverage.txt
9797
9898
9999
# TODO: add some way to see if e2e tests were skipped

dataset_configs/multilingual/granary/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ processors:
562562
model_name_or_path: cometoid-wmt23
563563
device_type: gpu
564564
num_devices: -1
565-
chunksize: 10
565+
chunksize: 100
566566

567567
- _target_: sdp.processors.PreserveByValue
568568
output_manifest_file: ${output_dir}/${params.source_lang}/manifest_42.json

tests/test_data_to_data.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,7 @@ def en_hist_dir(tmp_path_factory):
306306
local_path = tmp_dir / "en"
307307

308308
if not local_path.exists():
309-
try:
310-
s3.download_file(bucket, key, str(local_path))
311-
except ClientError as e:
312-
code = e.response.get("Error", {}).get("Code", "")
313-
pytest.skip(f"Cannot download s3://{bucket}/{key} ({code}).")
309+
s3.download_file(bucket, key, str(local_path))
314310

315311
assert local_path.exists(), "Histogram file was not downloaded"
316312
return str(tmp_dir)

tests/test_granary_pipeline_end_to_end.py

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ def granary_data(tmp_path: Path):
2828
f"{granary_key_prefix}/manifest_14.json",
2929
f"{granary_key_prefix}/manifest_21.json",
3030
f"{granary_key_prefix}/manifest_26.json",
31-
f"{granary_key_prefix}/manifest_34.json",
32-
f"{granary_key_prefix}/manifest_39.json",
31+
f"{granary_key_prefix}/manifest_41.json",
3332
f"{granary_key_prefix}/audio/zCW0Pa0BI4Q.wav",
3433
f"{granary_key_prefix}/audio/zHWk3Ae7qJ0.wav",
3534
f"{granary_key_prefix}/audio/zHtFdl5K8qg.wav",
@@ -45,12 +44,7 @@ def granary_data(tmp_path: Path):
4544
rel_path = file_key.replace(granary_key_prefix + "/", "")
4645
dest_path = tmp_path / rel_path
4746
dest_path.parent.mkdir(parents=True, exist_ok=True)
48-
49-
try:
50-
s3.download_file(bucket, file_key, str(dest_path))
51-
except ClientError as e:
52-
code = e.response.get("Error", {}).get("Code", "")
53-
pytest.skip(f"Cannot download s3://{bucket}/{file_key} ({code}).")
47+
s3.download_file(bucket, file_key, str(dest_path))
5448

5549
if file_key.endswith(".wav"):
5650
f.write(json.dumps({"source_audio_filepath": str(dest_path)}) + "\n")
@@ -70,34 +64,15 @@ def test_granary_pipeline_end_to_end(granary_data):
7064
cfg.sdp_dir = Path(__file__).parents[1]
7165

7266
#disable some processors
73-
## step 3: FasterWhisperInference
74-
cfg.processors[3].should_run = False
75-
cfg.processors[4].input_manifest_file = os.path.join(granary_data, "manifest_03.json")
76-
77-
## step 14: FasterWhisperInference
78-
cfg.processors[6].should_run = False
79-
cfg.processors[7].input_manifest_file = os.path.join(granary_data, "manifest_06.json")
80-
81-
## step 21: FasterWhisperInference
82-
cfg.processors[14].should_run = False
83-
cfg.processors[15].input_manifest_file = os.path.join(granary_data, "manifest_14.json")
84-
85-
## step 21: vLLMInference
86-
cfg.processors[21].should_run = False
87-
cfg.processors[22].input_manifest_file = os.path.join(granary_data, "manifest_21.json")
88-
89-
## step 26: vLLMInference
90-
cfg.processors[26].should_run = False
91-
cfg.processors[27].input_manifest_file = os.path.join(granary_data, "manifest_26.json")
67+
processors_to_disable = [3, 6, 14, # FasterWhisperInference
68+
21, 26, # vLLMInference
69+
41, # CometoidWMTQualityEstimation
70+
]
9271

93-
## steps 33-34: CharacterHistogramLangValidator
94-
cfg.processors[33].should_run = False
95-
cfg.processors[34].should_run = False
96-
cfg.processors[35].input_manifest_file = os.path.join(granary_data, "manifest_34.json")
97-
98-
## step 39: CometoidWMTQualityEstimation
99-
cfg.processors[39].should_run = False
100-
cfg.processors[40].input_manifest_file = os.path.join(granary_data, "manifest_39.json")
72+
for processor_idx in processors_to_disable:
73+
processor_id = str(processor_idx).zfill(2)
74+
cfg.processors[processor_idx].should_run = False
75+
cfg.processors[processor_idx + 1].input_manifest_file = os.path.join(granary_data, f"manifest_{processor_id}.json")
10176

10277
run_processors(cfg)
10378

0 commit comments

Comments
 (0)