diff --git a/CMakeLists.txt b/CMakeLists.txt index f86ee8a..d2ec963 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,7 +114,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git - GIT_TAG 23b37c7b158d05f6206119b1ef831c8e63cc1eb9 + GIT_TAG 1342ef60836bd480995501df2be2d3a5c5750ba2 GIT_SHALLOW ON ) endif() diff --git a/docker/Dockerfile b/docker/Dockerfile index bfbfba0..0af0858 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG TRITON_VERSION=23.05 +ARG TRITON_VERSION=23.06 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 FROM ${BASE_IMAGE} +USER root + RUN apt-get update && \ apt-get install -y --no-install-recommends \ autoconf \ @@ -39,37 +41,58 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.0.1+cu118 && \ - pip3 install --no-cache-dir --extra-index-url https://pypi.ngc.nvidia.com regex fire tritonclient[all] && \ +RUN pip3 install --no-cache-dir --extra-index-url https://pypi.ngc.nvidia.com regex fire tritonclient[all] && \ pip3 install --no-cache-dir accelerate transformers huggingface_hub tokenizers SentencePiece sacrebleu datasets tqdm omegaconf rouge_score && \ pip3 install --no-cache-dir cmake==3.24.3 +RUN rm -rf /opt/hpcx + +RUN git clone https://github.com/NVIDIA/nccl.git && cd nccl && \ + make -j src.build CUDA_HOME=/usr/local/cuda BUILDDIR=/usr + +# RUN rm -rf /opt/hpcx && \ +# wget https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ +# tar -xvf hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ +# cp -r hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64/ /opt/hpcx/ + +RUN mkdir /tmp/openmpi && \ + cd /tmp/openmpi && \ + wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz && \ + tar zxf openmpi-4.1.5.tar.gz && \ + cd openmpi-4.1.5 && \ + ./configure --enable-orterun-prefix-by-default --prefix=/usr/local/opt/openmpi && \ + make -j $(nproc) all && \ + make install && \ + ldconfig && \ + rm -rf /tmp/openmpi + # backend build ADD . /workspace/build/fastertransformer_backend RUN mkdir -p /workspace/build/fastertransformer_backend/build WORKDIR /workspace/build/fastertransformer_backend/build -ARG FORCE_BACKEND_REBUILD=0 -RUN CUDAFLAGS="-include stdio.h" cmake \ - -D CMAKE_EXPORT_COMPILE_COMMANDS=1 \ - -D CMAKE_BUILD_TYPE=Release \ - -D ENABLE_FP8=OFF \ - -D CMAKE_INSTALL_PREFIX=/opt/tritonserver \ - -D TRITON_COMMON_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ - -D TRITON_CORE_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ - -D TRITON_BACKEND_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ - .. && \ - cd _deps/repo-ft-src/ && \ - git log | head -n 3 2>&1 | tee /workspace/build/fastertransformer_backend/FT_version.txt && \ - cd /workspace/build/fastertransformer_backend/build && \ - CUDAFLAGS="-include stdio.h" make -O -j"$(grep -c ^processor /proc/cpuinfo)" install && \ - rm /workspace/build/fastertransformer_backend/build/bin/*_example -rf && \ - rm /workspace/build/fastertransformer_backend/build/lib/lib*Backend.so -rf +# ARG FORCE_BACKEND_REBUILD=0 +# RUN CUDAFLAGS="-include stdio.h" cmake \ +# -D CMAKE_EXPORT_COMPILE_COMMANDS=1 \ +# -D CMAKE_BUILD_TYPE=Release \ +# -D ENABLE_FP8=OFF \ +# -D MPI_HOME=/usr/local/mpi/ \ +# -D CMAKE_INSTALL_PREFIX=/opt/tritonserver \ +# -D TRITON_COMMON_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ +# -D TRITON_CORE_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ +# -D TRITON_BACKEND_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \ +# .. && \ +# cd _deps/repo-ft-src/ && \ +# git log | head -n 3 2>&1 | tee /workspace/build/fastertransformer_backend/FT_version.txt && \ +# cd /workspace/build/fastertransformer_backend/build && \ +# CUDAFLAGS="-include stdio.h" make -O -j"$(grep -c ^processor /proc/cpuinfo)" install && \ +# rm /workspace/build/fastertransformer_backend/build/bin/*_example -rf && \ +# rm /workspace/build/fastertransformer_backend/build/lib/lib*Backend.so -rf -ENV NCCL_LAUNCH_MODE=GROUP -ENV WORKSPACE /workspace -WORKDIR /workspace +# ENV NCCL_LAUNCH_MODE=GROUP +# ENV WORKSPACE /workspace +# WORKDIR /workspace -RUN sed -i 's/#X11UseLocalhost yes/X11UseLocalhost no/g' /etc/ssh/sshd_config && \ - mkdir /var/run/sshd -p +# RUN sed -i 's/#X11UseLocalhost yes/X11UseLocalhost no/g' /etc/ssh/sshd_config && \ +# mkdir /var/run/sshd -p