diff --git a/extra/CI/integration/test_gpu_support.py b/extra/CI/integration/test_gpu_support.py index 5e60392f..a2fea4e8 100755 --- a/extra/CI/integration/test_gpu_support.py +++ b/extra/CI/integration/test_gpu_support.py @@ -21,11 +21,7 @@ class TestGPUSupport(unittest.TestCase): "libnvidia-ml.so", "libnvidia-fatbinaryloader.so", "libnvidia-opencl.so" } - _GPU_BINS = { "nvidia-cuda-mps-control", - "nvidia-cuda-mps-server", - "nvidia-debugdump", - "nvidia-persistenced", - "nvidia-smi"} + _GPU_BINS = { "nvidia-smi" } _GPU_ENV_LD_LIB_PATH = {"/opt/shifter/site-resources/gpu/lib", "/opt/shifter/site-resources/gpu/lib64"} _GPU_ENV_PATH = {"/opt/shifter/site-resources/gpu/bin"} diff --git a/src/activate_gpu_support.sh b/src/activate_gpu_support.sh index 92968bf1..ccb20d43 100755 --- a/src/activate_gpu_support.sh +++ b/src/activate_gpu_support.sh @@ -4,7 +4,6 @@ #this script with an empty environment export PATH=/usr/local/bin:/usr/bin:/bin:/sbin -cuda_devices= container_root_dir= container_site_resources= is_verbose_active= @@ -22,11 +21,7 @@ nvidia_compute_libs="cuda \ nvidia-opencl" #the NVIDIA binaries that will be bind mounted into the container -nvidia_binaries="nvidia-cuda-mps-control \ - nvidia-cuda-mps-server \ - nvidia-debugdump \ - nvidia-persistenced \ - nvidia-smi" +nvidia_binaries="nvidia-smi" log() { @@ -72,19 +67,18 @@ bind_mount_file_into_container() parse_command_line_arguments() { - if [ ! $# -eq 4 ]; then + if [ ! $# -eq 3 ]; then log ERROR "Internal error: received bad number of command line arguments" exit 1 fi - cuda_devices=$1 - container_root_dir=$2 - container_site_resources=$3 + container_root_dir=$1 + container_site_resources=$2 container_bin_path=$container_site_resources/gpu/bin container_lib_path=$container_site_resources/gpu/lib container_lib64_path=$container_site_resources/gpu/lib64 - local verbose=$4 + local verbose=$3 if [ $verbose = "verbose-on" ]; then is_verbose_active=true elif [ $verbose = "verbose-off" ]; then @@ -121,7 +115,7 @@ add_nvidia_compute_libs_to_container() for lib in $nvidia_compute_libs; do local libs_host=$( ldconfig -p | grep "lib${lib}.so" | awk '{print $4}' ) if [ -z "$libs_host" ]; then - log WARNING "Could not find library: $lib" + log INFO "Could not find library: $lib" continue fi @@ -145,7 +139,7 @@ add_nvidia_binaries_to_container() for bin in $nvidia_binaries; do local bin_host="$( which $bin )" if [ -z $bin_host ]; then - log WARNING "Could not find binary: $bin" + log INFO "Could not find binary: $bin" continue fi local bin_container=$container_bin_path/$bin @@ -153,21 +147,9 @@ add_nvidia_binaries_to_container() done } -load_nvidia_uvm_if_necessary() -{ - # /dev/nvidia-uvm is available when the NVIDIA UVM kernel module is correctly loaded. - # Load the kernel module through nvidia-modprobe if /dev/nvidia-uvm doesn't exist. - if [ ! -e /dev/nvidia-uvm ]; then - log INFO "/dev/nvidia-uvm doesn't exist. Creating it with nvidia-modprobe." - nvidia-modprobe -u -c=0 - exit_if_previous_command_failed "Cannot nvidia-modprobe -u -c=0" - fi -} - parse_command_line_arguments $* validate_command_line_arguments -log INFO "Activating support for CUDA devices $cuda_devices." +log INFO "Activating GPU support" check_prerequisites add_nvidia_compute_libs_to_container add_nvidia_binaries_to_container -load_nvidia_uvm_if_necessary diff --git a/src/gpu_support.c b/src/gpu_support.c index 46dbf5b7..6f92ac79 100644 --- a/src/gpu_support.c +++ b/src/gpu_support.c @@ -13,11 +13,9 @@ int parse_gpu_env(struct gpu_support_config* config) { if( cuda_visible_devices != NULL && strcmp(cuda_visible_devices, "") != 0 && strcmp(cuda_visible_devices, "NoDevFiles") != 0) { - config->gpu_ids = strdup(cuda_visible_devices); config->is_gpu_support_enabled = 1; } else { - config->gpu_ids = NULL; config->is_gpu_support_enabled = 0; } return 0; @@ -37,11 +35,10 @@ int execute_hook_to_activate_gpu_support(int verbose, const UdiRootConfig* udiCo char* args[8]; args[0] = strdup("/bin/bash"); args[1] = script_path; - args[2] = strdup(udiConfig->gpu_config.gpu_ids); - args[3] = strdup(udiConfig->udiMountPoint); - args[4] = strdup(udiConfig->siteResources); - args[5] = verbose ? strdup("verbose-on") : strdup("verbose-off"); - args[6] = NULL; + args[2] = strdup(udiConfig->udiMountPoint); + args[3] = strdup(udiConfig->siteResources); + args[4] = verbose ? strdup("verbose-on") : strdup("verbose-off"); + args[5] = NULL; ret = forkAndExecv(args); @@ -58,12 +55,9 @@ int fprint_gpu_support_config(FILE* fp, const struct gpu_support_config* config) { size_t written = 0; written += fprintf(fp, "***** GPU support config *****\n"); - written += fprintf(fp, "gpu_ids = %s\n", config->gpu_ids); written += fprintf(fp, "is_gpu_support_enabled = %d\n", config->is_gpu_support_enabled); return written; } void free_gpu_support_config(struct gpu_support_config* config) { - free(config->gpu_ids); - config->gpu_ids = NULL; } diff --git a/src/gpu_support.h b/src/gpu_support.h index b449145b..f4e5d992 100644 --- a/src/gpu_support.h +++ b/src/gpu_support.h @@ -13,7 +13,6 @@ extern "C" { struct _UdiRootConfig; struct gpu_support_config { - char* gpu_ids; int is_gpu_support_enabled; }; diff --git a/src/setupRoot.c b/src/setupRoot.c index 708e5a90..52236355 100644 --- a/src/setupRoot.c +++ b/src/setupRoot.c @@ -95,7 +95,6 @@ int main(int argc, char **argv) { UdiRootConfig udiConfig; SetupRootConfig config; ImageData image; - struct gpu_support_config gpu_config = {}; memset(&udiConfig, 0, sizeof(UdiRootConfig)); memset(&config, 0, sizeof(SetupRootConfig)); @@ -112,6 +111,9 @@ int main(int argc, char **argv) { fprintf(stderr, "FAILED to parse udiRoot configuration. Exiting.\n"); exit(1); } + + udiConfig.gpu_config.is_gpu_support_enabled = 1; //always attempt to enable GPU support + udiConfig.target_uid = config.uid; udiConfig.target_gid = config.gid; udiConfig.auxiliary_gids = shifter_getgrouplist(config.user, udiConfig.target_gid, &(udiConfig.nauxiliary_gids)); diff --git a/src/test/test_gpu_support.cpp b/src/test/test_gpu_support.cpp index d68f8c63..70bdf1e6 100644 --- a/src/test/test_gpu_support.cpp +++ b/src/test/test_gpu_support.cpp @@ -15,7 +15,6 @@ TEST(GPUSupportTestGroup, parseGPUenv_test) { { parse_gpu_env(&config); CHECK(config.is_gpu_support_enabled == 0); - CHECK(config.gpu_ids == NULL); free_gpu_support_config(&config); } // CUDA_VISIBLE_DEVICES= (no value) @@ -23,7 +22,6 @@ TEST(GPUSupportTestGroup, parseGPUenv_test) { setenv("CUDA_VISIBLE_DEVICES", "", 1); parse_gpu_env(&config); CHECK(config.is_gpu_support_enabled == 0); - CHECK(config.gpu_ids == NULL); free_gpu_support_config(&config); } // CUDA_VISIBLE_DEVICES=NoDevFiles @@ -31,7 +29,6 @@ TEST(GPUSupportTestGroup, parseGPUenv_test) { setenv("CUDA_VISIBLE_DEVICES", "NoDevFiles", 1); parse_gpu_env(&config); CHECK(config.is_gpu_support_enabled == 0); - CHECK(config.gpu_ids == NULL); free_gpu_support_config(&config); } // CUDA_VISIBLE_DEVICES=0 @@ -39,7 +36,6 @@ TEST(GPUSupportTestGroup, parseGPUenv_test) { setenv("CUDA_VISIBLE_DEVICES", "0", 1); parse_gpu_env(&config); CHECK(config.is_gpu_support_enabled == 1); - CHECK(config.gpu_ids == std::string("0")); free_gpu_support_config(&config); } // CUDA_VISIBLE_DEVICES=0,1 @@ -47,7 +43,6 @@ TEST(GPUSupportTestGroup, parseGPUenv_test) { setenv("CUDA_VISIBLE_DEVICES", "0,1", 1); parse_gpu_env(&config); CHECK(config.is_gpu_support_enabled == 1); - CHECK(config.gpu_ids == std::string("0,1")); free_gpu_support_config(&config); } }