|
14 | 14 |
|
15 | 15 | import pytest |
16 | 16 | from assertpy import assert_that |
17 | | -from remote_command_executor import RemoteCommandExecutor |
18 | | -from utils import get_compute_nodes_instance_ids |
| 17 | +from remote_command_executor import RemoteCommandExecutor, RemoteCommandExecutionError |
| 18 | +from utils import get_compute_nodes_instance_ids, get_instance_info |
19 | 19 |
|
20 | 20 | from tests.common.assertions import assert_no_errors_in_logs |
21 | 21 | from tests.common.mpi_common import _test_mpi |
@@ -66,6 +66,8 @@ def test_efa( |
66 | 66 |
|
67 | 67 | run_system_analyzer(cluster, scheduler_commands_factory, request, partition="efa-enabled") |
68 | 68 |
|
| 69 | + _test_fsx_performance_tuning_for_large_instances(instance, remote_command_executor) |
| 70 | + |
69 | 71 | if instance in osu_benchmarks_instances: |
70 | 72 | benchmark_failures = [] |
71 | 73 |
|
@@ -138,6 +140,23 @@ def _test_efa_installation(scheduler_commands, remote_command_executor, efa_inst |
138 | 140 | assert_that(result.stdout).does_not_contain("1d0f:efa") |
139 | 141 |
|
140 | 142 |
|
| 143 | +def _test_fsx_performance_tuning_for_large_instances(instance_type, remote_command_executor): |
| 144 | + instance_info = get_instance_info(instance_type) |
| 145 | + vcpu = instance_info.get("VCpuInfo").get("DefaultVCpus") |
| 146 | + memory = instance_info.get("MemoryInfo").get("SizeInMiB") # 256 GiB * 1024 = 262144 MiB |
| 147 | + if vcpu > 64: |
| 148 | + remote_command_executor.run_remote_command("lctl get_param osc.*OST*.max_rpcs_in_flight | grep 32$") |
| 149 | + remote_command_executor.run_remote_command("lctl get_param mdc.*.max_rpcs_in_flight | grep 64$") |
| 150 | + remote_command_executor.run_remote_command("lctl get_param mdc.*.max_mod_rpcs_in_flight | grep 50$") |
| 151 | + else: |
| 152 | + with pytest.raises(RemoteCommandExecutionError): |
| 153 | + remote_command_executor.run_remote_command("lctl get_param osc.*OST*.max_rpcs_in_flight | grep 32$") |
| 154 | + with pytest.raises(RemoteCommandExecutionError): |
| 155 | + remote_command_executor.run_remote_command("lctl get_param mdc.*.max_rpcs_in_flight | grep 64$") |
| 156 | + with pytest.raises(RemoteCommandExecutionError): |
| 157 | + remote_command_executor.run_remote_command("lctl get_param mdc.*.max_mod_rpcs_in_flight | grep 50$") |
| 158 | + |
| 159 | + |
141 | 160 | def _test_osu_benchmarks_pt2pt( |
142 | 161 | mpi_version, remote_command_executor, scheduler_commands, test_datadir, instance, slots_per_instance, partition=None |
143 | 162 | ): |
|
0 commit comments