Skip to content

Commit b3da347

Browse files
Test FSx performance tuning
Signed-off-by: Hanwen <[email protected]>
1 parent b0c2e7d commit b3da347

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

tests/integration-tests/tests/efa/test_efa.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
import pytest
1616
from assertpy import assert_that
17-
from remote_command_executor import RemoteCommandExecutor
18-
from utils import get_compute_nodes_instance_ids
17+
from remote_command_executor import RemoteCommandExecutor, RemoteCommandExecutionError
18+
from utils import get_compute_nodes_instance_ids, get_instance_info
1919

2020
from tests.common.assertions import assert_no_errors_in_logs
2121
from tests.common.mpi_common import _test_mpi
@@ -66,6 +66,8 @@ def test_efa(
6666

6767
run_system_analyzer(cluster, scheduler_commands_factory, request, partition="efa-enabled")
6868

69+
_test_fsx_performance_tuning_for_large_instances(instance, remote_command_executor)
70+
6971
if instance in osu_benchmarks_instances:
7072
benchmark_failures = []
7173

@@ -138,6 +140,23 @@ def _test_efa_installation(scheduler_commands, remote_command_executor, efa_inst
138140
assert_that(result.stdout).does_not_contain("1d0f:efa")
139141

140142

143+
def _test_fsx_performance_tuning_for_large_instances(instance_type, remote_command_executor):
144+
instance_info = get_instance_info(instance_type)
145+
vcpu = instance_info.get("VCpuInfo").get("DefaultVCpus")
146+
memory = instance_info.get("MemoryInfo").get("SizeInMiB") # 256 GiB * 1024 = 262144 MiB
147+
if vcpu > 64:
148+
remote_command_executor.run_remote_command("lctl get_param osc.*OST*.max_rpcs_in_flight | grep 32$")
149+
remote_command_executor.run_remote_command("lctl get_param mdc.*.max_rpcs_in_flight | grep 64$")
150+
remote_command_executor.run_remote_command("lctl get_param mdc.*.max_mod_rpcs_in_flight | grep 50$")
151+
else:
152+
with pytest.raises(RemoteCommandExecutionError):
153+
remote_command_executor.run_remote_command("lctl get_param osc.*OST*.max_rpcs_in_flight | grep 32$")
154+
with pytest.raises(RemoteCommandExecutionError):
155+
remote_command_executor.run_remote_command("lctl get_param mdc.*.max_rpcs_in_flight | grep 64$")
156+
with pytest.raises(RemoteCommandExecutionError):
157+
remote_command_executor.run_remote_command("lctl get_param mdc.*.max_mod_rpcs_in_flight | grep 50$")
158+
159+
141160
def _test_osu_benchmarks_pt2pt(
142161
mpi_version, remote_command_executor, scheduler_commands, test_datadir, instance, slots_per_instance, partition=None
143162
):

tests/integration-tests/tests/efa/test_efa/test_efa/pcluster.config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,8 @@ SharedStorage:
3636
- MountDir: /shared
3737
Name: name1
3838
StorageType: Ebs
39+
- MountDir: /fsx
40+
Name: name2
41+
StorageType: FsxLustre
42+
FsxLustreSettings:
43+
StorageCapacity: 1200

0 commit comments

Comments
 (0)