Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions checks/apps/pytorch/pytorch_allreduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: BSD-3-Clause

import pathlib
import re
import re # noqa: F401
import sys

import reframe as rfm
Expand All @@ -26,22 +26,27 @@ class PyTorchNCCLAllReduce(rfm.RunOnlyRegressionTest, ContainerEngineMixin):
valid_prog_environs = ['builtin']
num_nodes = variable(int, value=8)
sourcesdir = None
curated_images = ['nvcr.io#nvidia/pytorch:24.12-py3']
curated_images = ['nvcr.io#nvidia/pytorch:25.06-py3']

# NOTE: only the "-py3" image is supported by the test
supported_flavors = ["-py3"]


pytorch_tags = nvidia_image_tags('pytorch')
latest_tags = []

for flavor in supported_flavors:
versions = []
for tag in pytorch_tags:
if re.match(rf'^\d+\.\d+{flavor}$', tag):
versions.append(tag[:-len(flavor)])
if versions:
latest_version = max(versions)
latest_tags += [f'{latest_version+flavor}']
# FIXME: 25.08-py3 version and above use Cuda 13 see:
# https://jira.cscs.ch/browse/VCUE-1039

# for flavor in supported_flavors:
# versions = []
# for tag in pytorch_tags:
# if re.match(rf'^\d+\.\d+{flavor}$', tag):
# versions.append(tag[:-len(flavor)])
# if versions:
# versions.sort(reverse=True)
# for v in versions:
# latest_tags += [f'{latest_version+flavor}']

latest_images = [f'nvcr.io#nvidia/pytorch:{tag}' for tag in latest_tags]
image = parameter(curated_images + latest_images)
Expand Down
23 changes: 13 additions & 10 deletions checks/apps/pytorch/pytorch_nvidia.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re
import re # noqa: F401
import sys
import pathlib
import reframe as rfm
Expand Down Expand Up @@ -38,23 +38,26 @@ class PyTorchDdpCeNv(PyTorchTestBase, ContainerEngineMixin):
'NVIDIA NGC')
valid_systems = ['+ce +nvgpu']
aws_ofi_nccl = parameter([True])
curated_images = ['nvcr.io#nvidia/pytorch:24.01-py3']
curated_images = ['nvcr.io#nvidia/pytorch:25.06-py3']

# NOTE: only the "-py3" image is supported by the test
supported_flavors = ["-py3"]

pytorch_tags = nvidia_image_tags('pytorch')
latest_tags = []

for flavor in supported_flavors:
versions = []
for tag in pytorch_tags:
if re.match(rf'^\d+\.\d+{flavor}$', tag):
versions.append(tag[:-len(flavor)])
# FIXME: 25.08-py3 version and above use Cuda 13 see:
# https://jira.cscs.ch/browse/VCUE-1039

if versions:
latest_version = max(versions)
latest_tags += [f'{latest_version}{flavor}']
# for flavor in supported_flavors:
# versions = []
# for tag in pytorch_tags:
# if re.match(rf'^\d+\.\d+{flavor}$', tag):
# versions.append(tag[:-len(flavor)])

# if versions:
# latest_version = max(versions)
# latest_tags += [f'{latest_version}{flavor}']

latest_images = [f'nvcr.io#nvidia/pytorch:{tag}' for tag in latest_tags]
image = parameter(curated_images + latest_images)
Expand Down