2323# containing Derecho-specific torch & MPI bits.
2424# (install torchmetrics at this point too, installing it later
2525# via pip risks an undesirable torch update.)
26- ml conda
27-
26+ module load conda
27+ module list
2828topdir=$( git rev-parse --show-toplevel)
2929CREDIT_ENV_NAME=${CREDIT_ENV_NAME:- " credit-derecho" }
3030yml=$( mktemp --tmpdir=${topdir} credit-derecho-tmp-XXXXXXXXXX.yml)
@@ -49,7 +49,6 @@ dependencies:
4949 - torchmetrics
5050 - pip:
5151 - pipdeptree
52- - -e .
5352EOF
5453
5554# create the environment
@@ -73,12 +72,23 @@ find ${CONDA_PREFIX} -name "libnccl.*"
7372# (echo-opt -> xgboost -> nvidia-nccl-cu12 -> problem.)
7473pip uninstall -y $( pip list | grep nvidia-nccl | awk ' {print $1}' ) || true
7574
75+ # -----------------------------------------------------------
76+ # install credit (editable) with constraints to prevent pip
77+ # from overwriting the conda-installed torch/torchvision/torchmetrics.
78+ constraint_file=$( mktemp --tmpdir=${topdir} credit-constraints-XXXXXXXXXX.txt)
79+ pip list --format=freeze | grep -iE " ^(torch|torchvision|torchmetrics)==" > " ${constraint_file} "
80+ echo " Using pip constraints:"
81+ cat " ${constraint_file} "
82+ pip install --constraint " ${constraint_file} " -e .
83+ rm -f " ${constraint_file} "
84+
7685conda-tree deptree --small
7786pipdeptree --depth 3
7887
7988echo " NCCLs - after cleanup:"
8089find ${CONDA_PREFIX} -name " libnccl.*"
8190
91+ python -c " import torch; print('torch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
8292python -c " import credit"
8393
8494echo
0 commit comments