@@ -1260,6 +1260,7 @@ def install_rccl_tests(
12601260 with_mpi = False ,
12611261 mpi_home = None ,
12621262 rccl_home = None ,
1263+ rocm_home = None ,
12631264):
12641265 """Install rccl-tests from ROCm/rocm-systems monorepo.
12651266
@@ -1270,16 +1271,23 @@ def install_rccl_tests(
12701271 repo_branch (str, optional): Git branch to clone. Defaults to 'develop'
12711272 with_mpi (bool, optional): Build with MPI support. Defaults to False
12721273 mpi_home (str, optional): Path to MPI installation. Required if with_mpi=True
1273- rccl_home (str, optional): Path to RCCL installation. Required if with_mpi=True
1274+ rccl_home (str, optional): Path to RCCL library install (e.g. /opt/rocm)
1275+ rocm_home (str, optional): Path to ROCm root for hipcc/amdclang (e.g. /opt/rocm).
1276+ Defaults to rccl_home, then /opt/rocm. Must not be the rccl-tests work dir.
12741277
12751278 Returns:
12761279 str: Path to the install directory containing all *_perf binaries
12771280
12781281 Raises:
12791282 Exception: If clone, build, or verification fails
12801283 """
1281- if with_mpi and (not mpi_home or not rccl_home ):
1282- raise ValueError ("mpi_home and rccl_home are required when with_mpi=True" )
1284+ rocm = (rocm_home or "/opt/rocm" ).rstrip ("/" )
1285+ if rccl_home is None :
1286+ rccl = rocm
1287+ else :
1288+ rccl = str (rccl_home ).rstrip ("/" )
1289+ if with_mpi and not mpi_home :
1290+ raise ValueError ("mpi_home is required when with_mpi=True" )
12831291 sparse_dir = "projects/rccl-tests"
12841292 clone_root = f"{ install_path } /rocm-systems"
12851293
@@ -1308,13 +1316,13 @@ def install_rccl_tests(
13081316 raise Exception (f"Failed to clone rccl-tests: { e } " )
13091317
13101318 # Build rccl-tests using install.sh script
1311- gpu_target = "$(/opt/ rocm/bin/rocm_agent_enumerator | grep -v gfx000 | head -1)"
1319+ gpu_target = f "$({ rocm } /bin/rocm_agent_enumerator | grep -v gfx000 | head -1)"
13121320
13131321 if with_mpi :
1314- build_cmd = f"cd { clone_root } /{ sparse_dir } && ./install.sh --mpi --mpi_home { mpi_home } --rccl_home { rccl_home } --rocm_home { rccl_home } --gpu_targets { gpu_target } "
1322+ build_cmd = f"cd { clone_root } /{ sparse_dir } && ./install.sh --mpi --mpi_home { mpi_home } --rccl_home { rccl } --rocm_home { rocm } --gpu_targets { gpu_target } "
13151323 log .info ("[rccl-tests] Building rccl-tests with MPI support using install.sh in shared storage..." )
13161324 else :
1317- build_cmd = f"cd { clone_root } /{ sparse_dir } && ./install.sh --rccl_home { rccl_home } --rocm_home { rccl_home } --gpu_targets { gpu_target } "
1325+ build_cmd = f"cd { clone_root } /{ sparse_dir } && ./install.sh --rccl_home { rccl } --rocm_home { rocm } --gpu_targets { gpu_target } "
13181326 log .info ("[rccl-tests] Building rccl-tests without MPI using install.sh in shared storage..." )
13191327
13201328 try :
0 commit comments