diff --git a/README.md b/README.md index d7b9167..e4cdf29 100755 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ git clone https://github.com/hadim/docker-tensorflow-builder.git - Edit the `build.sh` file to modify TensorFlow compilation parameters. Then launch the build: ```bash -LINUX_DISTRO="ubuntu-16.04" +# LINUX_DISTRO="ubuntu-16.04" +LINUX_DISTRO="ubuntu-18.04" # or LINUX_DISTRO="ubuntu-18.10" # or LINUX_DISTRO="centos-7.4" # or LINUX_DISTRO="centos-6.6" @@ -52,7 +53,8 @@ docker-compose run tf - Edit the `build.sh` file to modify TensorFlow compilation parameters. Then launch the build: ```bash -LINUX_DISTRO="ubuntu-16.04" +# LINUX_DISTRO="ubuntu-16.04" +LINUX_DISTRO="ubuntu-18.04" # or LINUX_DISTRO="ubuntu-18.10" # or LINUX_DISTRO="centos-7.4" # or LINUX_DISTRO="centos-6.6" @@ -81,6 +83,7 @@ docker-compose run tf --- - Refer to [tested build configurations](https://www.tensorflow.org/install/source#tested_build_configurations) to know which `BAZEL_VERSION` you need. +- Refer to [the nvidia cuda capabilites table](https://developer.nvidia.com/cuda-gpus#compute) to check the cuda capabilities of your GPU and set accordingly the variable `TF_CUDA_COMPUTE_CAPABILITIES` within the `build.sh` script. - Be patient, the compilation can be long. - Enjoy your Python wheels in the `wheels/` folder. - *Don't forget to remove the container to free the space after the build: `docker-compose rm --force`.* diff --git a/tensorflow/ubuntu-18.04/Dockerfile b/tensorflow/ubuntu-18.04/Dockerfile new file mode 100755 index 0000000..62f8d39 --- /dev/null +++ b/tensorflow/ubuntu-18.04/Dockerfile @@ -0,0 +1,27 @@ +FROM ubuntu:18.04 + +RUN apt update && apt install -y \ + build-essential \ + curl \ + git \ + wget \ + libjpeg-dev \ + openjdk-8-jdk \ + gcc-7 \ + g++-7 \ + && rm -rf /var/lib/lists/* + +# Install Anaconda +WORKDIR / +RUN wget "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O "miniconda.sh" && \ + bash "miniconda.sh" -b -p "/conda" && \ + rm miniconda.sh && \ + echo PATH='/conda/bin:$PATH' >> /root/.bashrc && \ + /conda/bin/conda config --add channels conda-forge && \ + /conda/bin/conda update --yes -n base conda && \ + /conda/bin/conda update --all --yes + +COPY build.sh /build.sh +COPY cuda.sh /cuda.sh + +CMD bash build.sh diff --git a/tensorflow/ubuntu-18.04/build.sh b/tensorflow/ubuntu-18.04/build.sh new file mode 100755 index 0000000..3ab8b72 --- /dev/null +++ b/tensorflow/ubuntu-18.04/build.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +set -ex + +export PATH="/conda/bin:/usr/bin:$PATH" + +if [ "$USE_GPU" -eq "1" ]; then + export CUDA_HOME="/usr/local/cuda" + alias sudo="" + source cuda.sh + cuda.install $CUDA_VERSION $CUDNN_VERSION $NCCL_VERSION + cd / +fi + +# Set correct GCC version +GCC_VERSION="7" +update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$GCC_VERSION 10 +update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$GCC_VERSION 10 +update-alternatives --set gcc "/usr/bin/gcc-$GCC_VERSION" +update-alternatives --set g++ "/usr/bin/g++-$GCC_VERSION" +gcc --version + +# Install an appropriate Python environment +conda config --add channels conda-forge +conda create --yes -n tensorflow python==$PYTHON_VERSION +source activate tensorflow +conda install --yes numpy wheel bazel==$BAZEL_VERSION +pip install keras-applications keras-preprocessing + +# Compile TensorFlow + +# Here you can change the TensorFlow version you want to build. +# You can also tweak the optimizations and various parameters for the build compilation. +# See https://www.tensorflow.org/install/install_sources for more details. + +cd / +rm -fr tensorflow/ +git clone --depth 1 --branch $TF_VERSION_GIT_TAG "https://github.com/tensorflow/tensorflow.git" + +TF_ROOT=/tensorflow +cd $TF_ROOT + +# Python path options +export PYTHON_BIN_PATH=$(which python) +export PYTHON_LIB_PATH="$($PYTHON_BIN_PATH -c 'import site; print(site.getsitepackages()[0])')" +export PYTHONPATH=${TF_ROOT}/lib +export PYTHON_ARG=${TF_ROOT}/lib + +# Compilation parameters +export TF_NEED_CUDA=0 +export TF_NEED_GCP=1 +export TF_CUDA_COMPUTE_CAPABILITIES=6.1,5.2,3.5 +export TF_NEED_HDFS=1 +export TF_NEED_OPENCL=0 +export TF_NEED_JEMALLOC=1 # Need to be disabled on CentOS 6.6 +export TF_ENABLE_XLA=0 +export TF_NEED_VERBS=0 +export TF_CUDA_CLANG=0 +export TF_DOWNLOAD_CLANG=0 +export TF_NEED_MKL=0 +export TF_DOWNLOAD_MKL=0 +export TF_NEED_MPI=0 +export TF_NEED_S3=1 +export TF_NEED_KAFKA=1 +export TF_NEED_GDR=0 +export TF_NEED_OPENCL_SYCL=0 +export TF_SET_ANDROID_WORKSPACE=0 +export TF_NEED_AWS=0 +export TF_NEED_IGNITE=0 +export TF_NEED_ROCM=0 + +# Compiler options +export GCC_HOST_COMPILER_PATH=$(which gcc) + +# Here you can edit this variable to set any optimizations you want. +export CC_OPT_FLAGS="-march=native" + +if [ "$USE_GPU" -eq "1" ]; then + # Cuda parameters + export CUDA_TOOLKIT_PATH=$CUDA_HOME + export CUDNN_INSTALL_PATH=$CUDA_HOME + export TF_CUDA_VERSION="$CUDA_VERSION" + export TF_CUDNN_VERSION="$CUDNN_VERSION" + export TF_NEED_CUDA=1 + export TF_NEED_TENSORRT=0 + export TF_NCCL_VERSION=$NCCL_VERSION + export NCCL_INSTALL_PATH=$CUDA_HOME + export NCCL_INSTALL_PATH=$CUDA_HOME + + # Those two lines are important for the linking step. + export LD_LIBRARY_PATH="$CUDA_TOOLKIT_PATH/lib64:${LD_LIBRARY_PATH}" + ldconfig +fi + +# Compilation +./configure + +if [ "$USE_GPU" -eq "1" ]; then + + bazel build --config=opt \ + --config=cuda \ + --linkopt="-lrt" \ + --linkopt="-lm" \ + --host_linkopt="-lrt" \ + --host_linkopt="-lm" \ + --action_env="LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \ + //tensorflow/tools/pip_package:build_pip_package + + PACKAGE_NAME=tensorflow-gpu + SUBFOLDER_NAME="${TF_VERSION_GIT_TAG}-py${PYTHON_VERSION}-cuda${TF_CUDA_VERSION}-cudnn${TF_CUDNN_VERSION}" + +else + + bazel build --config=opt \ + --linkopt="-lrt" \ + --linkopt="-lm" \ + --host_linkopt="-lrt" \ + --host_linkopt="-lm" \ + --action_env="LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \ + //tensorflow/tools/pip_package:build_pip_package + + PACKAGE_NAME=tensorflow + SUBFOLDER_NAME="${TF_VERSION_GIT_TAG}-py${PYTHON_VERSION}" +fi + +mkdir -p "/wheels/$SUBFOLDER_NAME" + +bazel-bin/tensorflow/tools/pip_package/build_pip_package "/wheels/$SUBFOLDER_NAME" --project_name "$PACKAGE_NAME" + +# Use the following for TF <= 1.8 +# bazel-bin/tensorflow/tools/pip_package/build_pip_package "/wheels/$SUBFOLDER_NAME" + +# Fix wheel folder permissions +chmod -R 777 /wheels/ diff --git a/tensorflow/ubuntu-18.04/cuda.sh b/tensorflow/ubuntu-18.04/cuda.sh new file mode 100755 index 0000000..4c20c52 --- /dev/null +++ b/tensorflow/ubuntu-18.04/cuda.sh @@ -0,0 +1,463 @@ +#!/usr/bin/env bash + +# Cuda and friends installation done right. +# Switch default Cuda version using symbolic link: cuda.switch 9.2 +# Install Cuda: cuda.install.cuda 10.0 +# Install cuDNN to CUDA_HOME: cuda.install.cudnn 7.5 +# Install NCCL to CUDA_HOME: cuda.install.nccl 2.4 +# Install Cuda, cuDNN and NCCL: cuda.install 10.0 7.5 2.4 + +# Author: Hadrien Mary +# License: MIT License +# Date: 11/03/2019 + +is_cuda_home_set() { + if [ -z "$CUDA_HOME" ]; then + echo "CUDA_HOME is not set. Please set it:" + echo 'export CUDA_HOME="/usr/local/cuda/"' + return 1 + fi + return 0 +} + +is_cuda_home_symbolic_link() { + if [[ -e "${CUDA_HOME}" && -L "${CUDA_HOME}" ]]; then + return 0 + elif [[ ! -d "${CUDA_HOME}" && ! -f "${CUDA_HOME}" ]]; then + return 0 + else + echo "CUDA_HOME is not a symbolic link." + echo "Please make it a symbolic link." + return 1 + fi +} + +guess_cuda_version() { + if ! is_cuda_home_set; then + return 1 + fi + + if ! is_cuda_home_symbolic_link; then + return 1 + fi + + POSSIBLE_CUDA_VERSION=$(cat "$CUDA_HOME/version.txt" | cut -d' ' -f 3 | cut -d'.' -f 1-2) + echo $POSSIBLE_CUDA_VERSION +} + +cuda.see() { + if ! is_cuda_home_set; then + return 1 + fi + + PARENT_BASE_DIR=$(dirname $CUDA_HOME) + ls -l $PARENT_BASE_DIR + return 0 +} + +cuda.switch() { + if ! is_cuda_home_set; then + return 1 + fi + + if ! is_cuda_home_symbolic_link; then + return 1 + fi + + if [ -z "$1" ]; then + echo "Please specify a Cuda version." + echo "Usage: cuda.switch CUDA_VERSION" + echo "Cuda version available: 9.0, 9.1, 9.2, 10.0, 10.1" + return 1 + fi + + NEW_CUDA_VERSION="$1" + NEW_CUDA_HOME="$CUDA_HOME-$NEW_CUDA_VERSION" + + if [ ! -d $NEW_CUDA_HOME ]; then + echo "Cuda $NEW_CUDA_VERSION doesn't exist at $NEW_CUDA_HOME." + return 1 + fi + + PARENT_BASE_DIR=$(dirname $CUDA_HOME) + if [ ! -w "$PARENT_BASE_DIR" ]; then + sudo rm -f $CUDA_HOME + sudo ln -s $NEW_CUDA_HOME $CUDA_HOME + else + rm -f $CUDA_HOME + ln -s $NEW_CUDA_HOME $CUDA_HOME + fi + echo "Default Cuda version is now $NEW_CUDA_VERSION at $NEW_CUDA_HOME" +} + +cuda.install() { + cuda.install.cuda $1 + cuda.install.cudnn $2 + cuda.install.nccl $3 +} + +cuda.install.cuda() { + + CUDA_VERSION="$1" + if [ -z "$CUDA_VERSION" ]; then + echo "Please specify a Cuda version." + echo "Usage: cuda.install.cuda CUDA_VERSION" + echo "Example: cuda.install.cuda 10.0" + echo "Cuda version available: 9.0, 9.1, 9.2, 10.0, 9.2." + return 1 + fi + + if ! is_cuda_home_set; then + return 1 + fi + + if ! is_cuda_home_symbolic_link; then + return 1 + fi + + CUDA_PATH="$CUDA_HOME-$CUDA_VERSION" + if [ -d $CUDA_PATH ]; then + echo "$CUDA_PATH exists. Please remove the previous Cuda folder first." + return 1 + fi + + # Setup Cuda URL + if [ "$CUDA_VERSION" = "9.0" ]; then + CUDA_URL="https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_384.81_linux-run" + elif [ "$CUDA_VERSION" = "9.1" ]; then + CUDA_URL="https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux" + elif [ "$CUDA_VERSION" = "9.2" ]; then + CUDA_URL="https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda_9.2.88_396.26_linux" + elif [ "$CUDA_VERSION" = "10.0" ]; then + CUDA_URL="https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux" + elif [ "$CUDA_VERSION" = "10.1" ]; then + CUDA_URL="http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run" + else + echo "Error: You need to set CUDA_VERSION to 9.0, 9.1, 9.2, 10.0 or 10.1." + return 1 + fi + + CUDA_INSTALLER_PATH="/tmp/cuda.run" + + echo "Download Cuda $CUDA_VERSION." + wget "$CUDA_URL" -O "$CUDA_INSTALLER_PATH" + + echo "Install Cuda $CUDA_VERSION." + PARENT_BASE_DIR=$(dirname $CUDA_HOME) + if [ ! -w "$PARENT_BASE_DIR" ]; then + sudo bash "$CUDA_INSTALLER_PATH" --silent --toolkit --override --toolkitpath="$CUDA_PATH" + else + bash "$CUDA_INSTALLER_PATH" --silent --toolkit --override --toolkitpath="$CUDA_PATH" + fi + rm -f "$CUDA_INSTALLER_PATH" + + # Set the symbolic link. + cuda.switch $CUDA_VERSION + + echo "Cuda $CUDA_VERSION is installed at $CUDA_PATH." + + return 0 +} + +cuda.install.cudnn() { + # Install cuDNN in $CUDA_HOME + + if ! is_cuda_home_set; then + return 1 + fi + + if ! is_cuda_home_symbolic_link; then + return 1 + fi + + CUDA_VERSION="$(guess_cuda_version)" + if [ -z "$CUDA_VERSION" ]; then + echo "Can't guess the Cuda version from $CUDA_HOME." + return 1 + fi + + CUDNN_VERSION="$1" + if [ -z "$CUDNN_VERSION" ]; then + echo "Please specify a cuDNN version." + echo "Usage: cuda.install.cudnn CUDNN_VERSION" + echo "Example: cuda.install.cudnn 7.5" + echo "cuDNN version available: 7.0, 7.1, 7.4, 7.5." + return 1 + fi + + # cuDNN 7.0 + if [ "$CUDNN_VERSION" = "7.0" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + CUDNN_VERSION_DETAILED="7.0.5.15" + elif [ "$CUDA_VERSION" = "9.1" ]; then + CUDNN_VERSION_DETAILED="7.0.5.15" + elif [ -n "$CUDNN_VERSION" ]; then + echo "Error: cuDNN $CUDNN_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # cuDNN 7.1 + elif [ "$CUDNN_VERSION" = "7.1" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + CUDNN_VERSION_DETAILED="7.1.4.18" + elif [ "$CUDA_VERSION" = "9.2" ]; then + CUDNN_VERSION_DETAILED="7.1.4.18" + elif [ -n "$CUDNN_VERSION" ]; then + echo "Error: cuDNN $CUDNN_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # cuDNN 7.4 + elif [ "$CUDNN_VERSION" = "7.4" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + CUDNN_VERSION_DETAILED="7.4.2.24" + elif [ "$CUDA_VERSION" = "9.2" ]; then + CUDNN_VERSION_DETAILED="7.4.2.24" + elif [ "$CUDA_VERSION" = "10.0" ]; then + CUDNN_VERSION_DETAILED="7.4.2.24" + elif [ -n "$CUDNN_VERSION" ]; then + echo "Error: cuDNN $CUDNN_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # cuDNN 7.5 + elif [ "$CUDNN_VERSION" = "7.5" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + CUDNN_VERSION_DETAILED="7.5.0.56" + elif [ "$CUDA_VERSION" = "9.2" ]; then + CUDNN_VERSION_DETAILED="7.5.0.56" + elif [ "$CUDA_VERSION" = "10.0" ]; then + CUDNN_VERSION_DETAILED="7.5.0.56" + elif [ "$CUDA_VERSION" = "10.1" ]; then + CUDNN_VERSION_DETAILED="7.5.0.56" + elif [ -n "$CUDNN_VERSION" ]; then + echo "Error: cuDNN $CUDNN_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + elif [ -n "$CUDNN_VERSION" ]; then + echo "Error: You need to set CUDNN_VERSION to 7.0, 7.1, 7.4 or 7.5." + return 1 + fi + + # Setup URLs + CUDNN_URL="https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_${CUDNN_VERSION_DETAILED}-1+cuda${CUDA_VERSION}_amd64.deb" + CUDNN_URL_DEV="https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_${CUDNN_VERSION_DETAILED}-1+cuda${CUDA_VERSION}_amd64.deb" + + # Setup temporary paths + CUDNN_TMP_PATH="/tmp/cudnn.deb" + CUDNN_DEV_TMP_PATH="/tmp/cudnn-dev.deb" + + CUDNN_TMP_DIR_PATH="/tmp/cudnn" + CUDNN_DEV_TMP_DIR_PATH="/tmp/cudnn-dev" + + echo "Download binaries." + wget "$CUDNN_URL" -O "$CUDNN_TMP_PATH" + wget "$CUDNN_URL_DEV" -O "$CUDNN_DEV_TMP_PATH" + + mkdir -p "$CUDNN_TMP_DIR_PATH" + mkdir -p "$CUDNN_DEV_TMP_DIR_PATH" + + echo "Extract binaries." + cd "$CUDNN_TMP_DIR_PATH" + ar x "$CUDNN_TMP_PATH" + tar -xJf data.tar.xz + cd "$CUDNN_DEV_TMP_DIR_PATH" + ar x "$CUDNN_DEV_TMP_PATH" + tar -xJf data.tar.xz + + echo "Install cuDNN files." + + PARENT_BASE_DIR=$(dirname $CUDA_HOME) + if [ ! -w "$PARENT_BASE_DIR" ]; then + sudo mv $CUDNN_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libcudnn* "$CUDA_HOME/lib64/" + sudo mv "$CUDNN_DEV_TMP_DIR_PATH/usr/include/x86_64-linux-gnu/cudnn_v7.h" "$CUDA_HOME/include/" + sudo mv "$CUDNN_DEV_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a" "$CUDA_HOME/lib64/" + + sudo rm -f "$CUDA_HOME/include/cudnn.h" + sudo rm -f "$CUDA_HOME/lib64/libcudnn_static.a" + + sudo ln -s "$CUDA_HOME/include/cudnn_v7.h" "$CUDA_HOME/include/cudnn.h" + sudo ln -s "$CUDA_HOME/lib64/libcudnn_static_v7.a" "$CUDA_HOME/lib64/libcudnn_static.a" + else + mv $CUDNN_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libcudnn* "$CUDA_HOME/lib64/" + mv "$CUDNN_DEV_TMP_DIR_PATH/usr/include/x86_64-linux-gnu/cudnn_v7.h" "$CUDA_HOME/include/" + mv "$CUDNN_DEV_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a" "$CUDA_HOME/lib64/" + + rm -f "$CUDA_HOME/include/cudnn.h" + rm -f "$CUDA_HOME/lib64/libcudnn_static.a" + + ln -s "$CUDA_HOME/include/cudnn_v7.h" "$CUDA_HOME/include/cudnn.h" + ln -s "$CUDA_HOME/lib64/libcudnn_static_v7.a" "$CUDA_HOME/lib64/libcudnn_static.a" + fi + + echo "Cleanup files." + rm -fr "$CUDNN_TMP_DIR_PATH" + rm -fr "$CUDNN_DEV_TMP_DIR_PATH" + rm -f "$CUDNN_TMP_PATH" + rm -f "$CUDNN_DEV_TMP_PATH" + + echo "cuDNN $CUDNN_VERSION is installed at $CUDA_HOME." +} + +cuda.install.nccl() { + # Install NCCL in $CUDA_HOME + + if ! is_cuda_home_set; then + return 1 + fi + + if ! is_cuda_home_symbolic_link; then + return 1 + fi + + CUDA_VERSION="$(guess_cuda_version)" + if [ -z "$CUDA_VERSION" ]; then + echo "Can't guess the Cuda version from $CUDA_HOME." + return 1 + fi + + NCCL_VERSION="$1" + if [ -z "$NCCL_VERSION" ]; then + # echo "Please specify a NCCL version." + # echo "Usage: cuda.install.nccl NCCL_VERSION" + # echo "Example: cuda.install.nccl 2.4" + # echo "NCCL version available: 2.1, 2.2, 2.3 and 2.4" + # return 1 + # Default NCCL version + NCCL_VERSION="2.4" + fi + + # NCCL 2.1 + if [ "$NCCL_VERSION" = "2.1" ]; then + + + if [ "$CUDA_VERSION" = "9.0" ]; then + NCCL_VERSION_DETAILED="2.1.15-1" + elif [ "$CUDA_VERSION" = "9.1" ]; then + NCCL_VERSION_DETAILED="2.1.15-1" + elif [ -n "$NCCL_VERSION" ]; then + echo "Error: NCCL $NCCL_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # NCCL 2.3 + elif [ "$NCCL_VERSION" = "2.2" ]; then + + # NCCL 2.2 + if [ "$CUDA_VERSION" = "9.0" ]; then + NCCL_VERSION_DETAILED="2.2.13-1" + elif [ "$CUDA_VERSION" = "9.2" ]; then + NCCL_VERSION_DETAILED="2.2.13-1" + elif [ -n "$NCCL_VERSION" ]; then + echo "Error: NCCL $NCCL_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # NCCL 2.3 + elif [ "$NCCL_VERSION" = "2.3" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + NCCL_VERSION_DETAILED="2.3.7-1" + elif [ "$CUDA_VERSION" = "9.2" ]; then + NCCL_VERSION_DETAILED="2.3.7-1" + elif [ "$CUDA_VERSION" = "10.0" ]; then + NCCL_VERSION_DETAILED="2.3.7-1" + elif [ -n "$NCCL_VERSION" ]; then + echo "Error: NCCL $NCCL_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + # NCCL 2.4 + elif [ "$NCCL_VERSION" = "2.4" ]; then + + if [ "$CUDA_VERSION" = "9.0" ]; then + NCCL_VERSION_DETAILED="2.4.2-1" + elif [ "$CUDA_VERSION" = "9.2" ]; then + NCCL_VERSION_DETAILED="2.4.2-1" + elif [ "$CUDA_VERSION" = "10.0" ]; then + NCCL_VERSION_DETAILED="2.4.2-1" + elif [ "$CUDA_VERSION" = "10.1" ]; then + NCCL_VERSION_DETAILED="2.4.2-1" + elif [ -n "$NCCL_VERSION" ]; then + echo "Error: NCCL $NCCL_VERSION is not compatible with Cuda $CUDA_VERSION." + return 1 + fi + + elif [ -n "$NCCL_VERSION" ]; then + echo "Error: You need to set NCCL_VERSION to 2.1, 2.2, 2.3 and 2.4." + return 1 + fi + + # Setup URLs + NCCL_URL="https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libnccl2_${NCCL_VERSION_DETAILED}+cuda${CUDA_VERSION}_amd64.deb" + NCCL_URL_DEV="https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libnccl-dev_${NCCL_VERSION_DETAILED}+cuda${CUDA_VERSION}_amd64.deb" + + # Setup temporary paths + NCCL_TMP_PATH="/tmp/nccl.deb" + NCCL_DEV_TMP_PATH="/tmp/nccl-dev.deb" + + NCCL_TMP_DIR_PATH="/tmp/nccl" + NCCL_DEV_TMP_DIR_PATH="/tmp/nccl-dev" + + echo "Download binaries." + wget "$NCCL_URL" -O "$NCCL_TMP_PATH" + wget "$NCCL_URL_DEV" -O "$NCCL_DEV_TMP_PATH" + + mkdir -p "$NCCL_TMP_DIR_PATH" + mkdir -p "$NCCL_DEV_TMP_DIR_PATH" + + echo "Extract binaries." + cd "$NCCL_TMP_DIR_PATH" + ar x "$NCCL_TMP_PATH" + tar -xJf data.tar.xz + cd "$NCCL_DEV_TMP_DIR_PATH" + ar x "$NCCL_DEV_TMP_PATH" + tar -xJf data.tar.xz + + echo "Install NCCL files." + + PARENT_BASE_DIR=$(dirname $CUDA_HOME) + if [ ! -w "$PARENT_BASE_DIR" ]; then + sudo mv $NCCL_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libnccl* "$CUDA_HOME/lib64/" + sudo rm -f "$CUDA_HOME/include/nccl.h" + sudo mv "$NCCL_DEV_TMP_DIR_PATH/usr/include/nccl.h" "$CUDA_HOME/include/nccl.h" + sudo rm -f "$CUDA_HOME/lib64/libnccl_static.a" + sudo mv "$NCCL_DEV_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libnccl_static.a" "$CUDA_HOME/lib64/libnccl_static.a" + else + mv $NCCL_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libnccl* "$CUDA_HOME/lib64/" + rm -f "$CUDA_HOME/include/nccl.h" + mv "$NCCL_DEV_TMP_DIR_PATH/usr/include/nccl.h" "$CUDA_HOME/include/nccl.h" + rm -f "$CUDA_HOME/lib64/libnccl_static.a" + mv "$NCCL_DEV_TMP_DIR_PATH/usr/lib/x86_64-linux-gnu/libnccl_static.a" "$CUDA_HOME/lib64/libnccl_static.a" + fi + + echo "Cleanup files." + rm -fr "$NCCL_TMP_DIR_PATH" + rm -fr "$NCCL_DEV_TMP_DIR_PATH" + rm -f "$NCCL_TMP_PATH" + rm -f "$NCCL_DEV_TMP_PATH" + + echo "NCCL $NCCL_VERSION is installed at $CUDA_HOME." +} + +cuda.gcc.install() { + + if [ -z "$1" ]; then + echo "Please specify a GCC version." + return + fi + export GCC_VERSION="$1" + + sudo apt install --yes gcc-$GCC_VERSION g++-$GCC_VERSION + + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$GCC_VERSION 10 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$GCC_VERSION 10 + + sudo update-alternatives --set gcc "/usr/bin/gcc-$GCC_VERSION" + sudo update-alternatives --set g++ "/usr/bin/g++-$GCC_VERSION" +} diff --git a/tensorflow/ubuntu-18.04/docker-compose.yml b/tensorflow/ubuntu-18.04/docker-compose.yml new file mode 100755 index 0000000..3c383ea --- /dev/null +++ b/tensorflow/ubuntu-18.04/docker-compose.yml @@ -0,0 +1,16 @@ +version: '3' +services: + tf: + build: . + stdin_open: true + tty: true + volumes: + - ../../wheels:/wheels + environment: + - TF_VERSION_GIT_TAG=${TF_VERSION_GIT_TAG:?TF_VERSION_GIT_TAG} + - PYTHON_VERSION=${PYTHON_VERSION:?PYTHON_VERSION} + - BAZEL_VERSION=${BAZEL_VERSION:?BAZEL_VERSION} + - USE_GPU=${USE_GPU-0} + - CUDA_VERSION=${CUDA_VERSION-10.0} + - CUDNN_VERSION=${CUDNN_VERSION-7.5} + - NCCL_VERSION=${NCCL_VERSION-2.4}