@@ -244,14 +244,28 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
244244# prepare arguments to eessi_container.sh specific to build step
245245BUILD_STEP_ARGS+=(" --save" " ${TARBALL_TMP_BUILD_STEP_DIR} " )
246246BUILD_STEP_ARGS+=(" --storage" " ${STORAGE} " )
247+
247248# add options required to handle NVIDIA support
248249if command_exists " nvidia-smi" ; then
249- echo " Command 'nvidia-smi' found, using available GPU"
250- BUILD_STEP_ARGS+=(" --nvidia" " all" )
250+ # Accept that this may fail
251+ set +e
252+ nvidia-smi --version
253+ ec=$?
254+ set -e
255+ if [ ${ec} -eq 0 ]; then
256+ echo " Command 'nvidia-smi' found, using available GPU"
257+ BUILD_STEP_ARGS+=(" --nvidia" " all" )
258+ else
259+ echo " Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
260+ echo " This script now assumes this is NOT a GPU node."
261+ echo " If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
262+ BUILD_STEP_ARGS+=(" --nvidia" " install" )
263+ fi
251264else
252265 echo " No 'nvidia-smi' found, no available GPU but allowing overriding this check"
253266 BUILD_STEP_ARGS+=(" --nvidia" " install" )
254267fi
268+
255269# Retain location for host injections so we don't reinstall CUDA
256270# (Always need to run the driver installation as available driver may change)
257271if [[ ! -z ${SHARED_FS_PATH} ]]; then
0 commit comments