From be979f4d4ab31d83c0ae9b8cbafff8215ce25b35 Mon Sep 17 00:00:00 2001 From: Robert Starmer Date: Wed, 12 Jul 2023 18:07:27 +0000 Subject: [PATCH 1/4] Add bash based build on Ubuntu for faiss library and prerequisites add_dev.sh: ensure disk space for build and python prerequisites build-prereqs.sh: install prerequisites (intel, nvidia) build-faiss.sh: build faiss libs and python (and python swig) libs Produces a python .whl Produces a .tar.gz of the generated libraries --- .gitignore | 4 +- platform/build-faiss/README.md | 32 +++++++++++++ platform/build-faiss/add_dev.sh | 29 ++++++++++++ platform/build-faiss/build-faiss.sh | 66 +++++++++++++++++++++++++++ platform/build-faiss/build-prereqs.sh | 46 +++++++++++++++++++ 5 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 platform/build-faiss/README.md create mode 100644 platform/build-faiss/add_dev.sh create mode 100644 platform/build-faiss/build-faiss.sh create mode 100644 platform/build-faiss/build-prereqs.sh diff --git a/.gitignore b/.gitignore index a5309e6..82286b1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -build*/ +build/ +faiss* +faiss/ diff --git a/platform/build-faiss/README.md b/platform/build-faiss/README.md new file mode 100644 index 0000000..982697e --- /dev/null +++ b/platform/build-faiss/README.md @@ -0,0 +1,32 @@ +#! Set up an Ubuntu 22.04 machine to build FAISS + +Assumptions: + +/dev/nvme0n1 exists and can be reformatted +NVIDIA GPU installed + +## base setup + +Add python prerequisites +Mount /dev/nvme0n1 on /models +Link .cache and .local from ubuntu to /models + +```sh +bash add_dev.sh +``` + +## Build prerequisites + +Add Nvidia and Intel OneAPK libraries needed to build FAISS + +```sh +bash faiss-prereqs.sh +``` + +## Build FAISS + +Download the git repository and build it! + +```sh +bash build-faiss.sh +``` diff --git a/platform/build-faiss/add_dev.sh b/platform/build-faiss/add_dev.sh new file mode 100644 index 0000000..11d309b --- /dev/null +++ b/platform/build-faiss/add_dev.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +sudo apt-get update && sudo apt-get dist-upgrade -y + +# Install python and build essentials and essential libraries +sudo apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev + +sudo pip install -U pip setuptools + +# mount nvme disk on /models +sudo mkdir /models +sudo mkfs.xfs /dev/nvme0n1 +echo '/dev/nvme0n1 /models xfs defaults 0 2' | sudo tee -a /etc/fstab +sudo mount -a +sudo chmod 777 /models + +# Add pointers to large data dirs into the 'ubuntu' user $HOME +mkdir /models/cache +mv ~/.cache ~/.cache.orig +ln -s /models/cache ~/.cache +mkdir /models/dev +ln -s /models/dev +mkdir /models/local +ln -s /models/local ~/.local + +echo 'export PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc + + + diff --git a/platform/build-faiss/build-faiss.sh b/platform/build-faiss/build-faiss.sh new file mode 100644 index 0000000..5b0414b --- /dev/null +++ b/platform/build-faiss/build-faiss.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +git clone https://github.com/facebookresearch/faiss +cd faiss + +# Configure paths and set environment variables +export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin +source /opt/intel/oneapi/setvars.sh + +#export CC=gcc-12 +#export CXX=g++-12 +# Configure using cmake + +LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.1.0/ CXX=g++-11 cmake -B build \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_OPT_LEVEL=axv2 \ + -DFAISS_ENABLE_C_API=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DBLA_VENDOR=Intel10_64_dyn -Wno-dev . +#cmake -B build . \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_ENABLE_PYTHON=ON \ + -DFAISS_ENABLE_RAFT=OFF \ + -DBUILD_TESTING=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DFAISS_OPT_LEVEL=avx2 -Wno-dev + +# Now build faiss + +make -C build -j$(nproc) faiss +make -C build -j$(nproc) swigfaiss +pushd build/faiss/python;python3 setup.py bdist_wheel;popd + +# and install it. NOTE: this will install into the pyenv virtualenv 'aw' from the begining of the script + +sudo -E make -C build -j$(nproc) install +pip install --force-reinstall build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl +cp build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl ../ + +# add libraries to /usr/local/lib +mkdir -p faiss-libs + +for n in build/faiss/python/*so build/faiss/*so + do + sudo cp $n /usr/local/lib/ + cp $n faiss-libs/ + done +tar cfz ../faiss-libs.tgz faiss-libs/* +rm -rf faiss-libs + +# Add ldconfig settings for intel and faiss libraries + +echo '/opt/intel/oneapi/mkl/2023.1.0/lib/intel64' | sudo tee /etc/ld.so.conf.d/aw_intel.conf +echo '/usr/local/lib' | sudo tee /etc/ld.so.conf.d/aw_faiss.conf + +# Update the ld cache + +sudo -E ldconfig + +cd .. +rm -rf faiss diff --git a/platform/build-faiss/build-prereqs.sh b/platform/build-faiss/build-prereqs.sh new file mode 100644 index 0000000..96bb130 --- /dev/null +++ b/platform/build-faiss/build-prereqs.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + + +# Add a couple Python prerequisites +pip install -U pip setuptools wheel +pip install numpy swig torch + +export DEBIAN_FRONTEND=noninteractive + +# Get Intel OneAPI for BLAS support +# From: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2023-0/apt.html + +# download the key to system keyring +wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ +| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + +# add signed entry to apt sources and configure the APT client to use Intel repository: +echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + + +sudo -E apt update +sudo -E apt install dkms intel-basekit -y + +## Get CUDA and install it + +curl -sLO https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run +sudo bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page + +# ensure we're using the latest cmake +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null + +echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null + +# add the cuda tools to build against + +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo -E apt-get update +sudo -E apt-get install cmake cuda-toolkit -y + +#Verify python and pytorch work + +python3 -c 'import torch; print(f"Is CUDA Available: {torch.cuda.is_available()}")' + From 1e3c079449191ccd65fbdd0d57bf107c7b2fd2d9 Mon Sep 17 00:00:00 2001 From: Robert Starmer Date: Sun, 16 Jul 2023 06:06:48 +0000 Subject: [PATCH 2/4] Add docker build.sh build.sh - build as "root" configured for docker/podman README.md - instructions for running podman build add_dev/build-prereqs.sh - shuffle python install --- .gitignore | 1 + platform/build-faiss/README.md | 27 ++++++- platform/build-faiss/add_dev.sh | 8 -- platform/build-faiss/build-prereqs.sh | 21 +++-- platform/build-faiss/build.sh | 106 ++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 15 deletions(-) create mode 100644 platform/build-faiss/build.sh diff --git a/.gitignore b/.gitignore index 82286b1..e77f55e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ build/ faiss* faiss/ +cuda* diff --git a/platform/build-faiss/README.md b/platform/build-faiss/README.md index 982697e..645411f 100644 --- a/platform/build-faiss/README.md +++ b/platform/build-faiss/README.md @@ -1,5 +1,30 @@ -#! Set up an Ubuntu 22.04 machine to build FAISS +# Set up an Ubuntu 22.04 machine to build FAISS +## Setup for build in Ubuntu 22.04 with podman + +Add podman + +```sh +sudo apt install podman -y +``` + +Run build in podman: + +```sh +podman run --rm -it -v ${PWD}/dev/origin/:/origin ubuntu:22.04 /bin/bash /origin/build.sh +``` + +This should produce two files: + +* python*.whl + + a python wheel for faiss deployment + +* faiss-libs.tgz + + a set of libraries for FAISS. Note Intel libraries are still required as well. + +## Setup for Ubuntu 22.04 bare metal in OCI Assumptions: /dev/nvme0n1 exists and can be reformatted diff --git a/platform/build-faiss/add_dev.sh b/platform/build-faiss/add_dev.sh index 11d309b..76671e5 100644 --- a/platform/build-faiss/add_dev.sh +++ b/platform/build-faiss/add_dev.sh @@ -2,11 +2,6 @@ sudo apt-get update && sudo apt-get dist-upgrade -y -# Install python and build essentials and essential libraries -sudo apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev - -sudo pip install -U pip setuptools - # mount nvme disk on /models sudo mkdir /models sudo mkfs.xfs /dev/nvme0n1 @@ -24,6 +19,3 @@ mkdir /models/local ln -s /models/local ~/.local echo 'export PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc - - - diff --git a/platform/build-faiss/build-prereqs.sh b/platform/build-faiss/build-prereqs.sh index 96bb130..d7a1568 100644 --- a/platform/build-faiss/build-prereqs.sh +++ b/platform/build-faiss/build-prereqs.sh @@ -1,6 +1,15 @@ #!/bin/bash set -e +export PATH=$HOME/.local/bin:$PATH +export DEBIAN_FRONTEND=noninteractive + +sudo -E apt-get update && sudo -E apt-get dist-upgrade -y + +# Install python and build essentials and essential libraries +sudo -E apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev lib +xml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev neovim curl git wget + # Add a couple Python prerequisites @@ -14,10 +23,10 @@ export DEBIAN_FRONTEND=noninteractive # download the key to system keyring wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ -| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null +| gpg --dearmor | sudo -E tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null # add signed entry to apt sources and configure the APT client to use Intel repository: -echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list +echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo -E tee /etc/apt/sources.list.d/oneAPI.list sudo -E apt update @@ -26,17 +35,17 @@ sudo -E apt install dkms intel-basekit -y ## Get CUDA and install it curl -sLO https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run -sudo bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page +sudo -E bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page # ensure we're using the latest cmake -wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo -E tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null -echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null +echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo -E tee /etc/apt/sources.list.d/kitware.list >/dev/null # add the cuda tools to build against wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb -sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo -E dpkg -i cuda-keyring_1.1-1_all.deb sudo -E apt-get update sudo -E apt-get install cmake cuda-toolkit -y diff --git a/platform/build-faiss/build.sh b/platform/build-faiss/build.sh new file mode 100644 index 0000000..c438795 --- /dev/null +++ b/platform/build-faiss/build.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +if [ -d /origin ]; then + cd /origin/platform/build-faiss +else + echo "artificialwisdomai/origin project needs to exist" + exit 1 +fi + +if [[ ! `id -u` -eq 0 ]]; then + echo "This needs to run as root" + exit 1 +fi + +export PATH=$HOME/.local/bin:$PATH +export DEBIAN_FRONTEND=noninteractive + +apt-get update && apt-get dist-upgrade -y + +# Install python and build essentials and essential libraries +apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev neovim curl git wget + +# Update Setuptools +python3 -m pip install -U pip setuptools wheel + +# Add a couple Python prerequisites +pip install numpy swig torch + +# Get Intel OneAPI for BLAS support +# From: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2023-0/apt.html + +# download the key to system keyring +wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ +| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + +# add signed entry to apt sources and configure the APT client to use Intel repository: +echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list + +apt update +apt install dkms intel-basekit -y + +## Get CUDA and install it + +curl -sLO https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run +bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page + +# ensure we're using the latest cmake +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null + +echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null + +# add the cuda tools to build against + +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +dpkg -i cuda-keyring_1.1-1_all.deb +apt-get update +apt-get install cmake cuda-toolkit -y + +#Verify python and pytorch work + +python3 -c 'import torch; print(f"Is CUDA Available: {torch.cuda.is_available()}")' + +git clone https://github.com/facebookresearch/faiss +cd faiss + +# Configure paths and set environment variables +export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin +source /opt/intel/oneapi/setvars.sh + +# Configure using cmake + +LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.2.0/ CXX=g++-11 cmake -B build \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_OPT_LEVEL=avx2 \ + -DFAISS_ENABLE_C_API=ON \ + -DFAISS_ENABLE_PYTHON=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DFAISS_ENABLE_RAFT=OFF \ + -DBLA_VENDOR=Intel10_64_dyn -Wno-dev . + +# Now build faiss + +make -C build -j$(nproc) faiss +make -C build -j$(nproc) swigfaiss +pushd build/faiss/python;python3 setup.py bdist_wheel;popd + +# and install it. NOTE: this will install into the pyenv virtualenv 'aw' from the begining of the script + +make -C build -j$(nproc) install +#pip install --force-reinstall build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl +cp build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl ../ + +# add libraries to /usr/local/lib +mkdir -p ../faiss-libs + +for n in build/faiss/python/*so build/faiss/*so + do + cp $n ../faiss-libs/ + done +tar cfz ../faiss-libs.tgz ../faiss-libs/* +rm -rf ../faiss-libs + +cd .. +#rm -rf faiss From 998f97adbc7fd096bc5f95cd2763949f8120bfa2 Mon Sep 17 00:00:00 2001 From: Robert Starmer Date: Sat, 22 Jul 2023 19:16:53 +0000 Subject: [PATCH 3/4] Fix erroneous linebreak in prereqs --- platform/build-faiss/build-prereqs.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/platform/build-faiss/build-prereqs.sh b/platform/build-faiss/build-prereqs.sh index d7a1568..8c8a9f5 100644 --- a/platform/build-faiss/build-prereqs.sh +++ b/platform/build-faiss/build-prereqs.sh @@ -7,8 +7,7 @@ export DEBIAN_FRONTEND=noninteractive sudo -E apt-get update && sudo -E apt-get dist-upgrade -y # Install python and build essentials and essential libraries -sudo -E apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev lib -xml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev neovim curl git wget +sudo -E apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev neovim curl git wget From 6aa2974782293211bf02e142bc363ee1d43c9084 Mon Sep 17 00:00:00 2001 From: Robert Starmer Date: Sat, 22 Jul 2023 21:54:02 +0000 Subject: [PATCH 4/4] Update build and preqs add run_training --- platform/build-faiss/build-faiss.sh | 3 ++- platform/build-faiss/build-prereqs.sh | 38 ++++++++++----------------- retrieval/run_training.sh | 18 +++++++++++++ retrieval/train.py | 4 +-- 4 files changed, 36 insertions(+), 27 deletions(-) create mode 100644 retrieval/run_training.sh diff --git a/platform/build-faiss/build-faiss.sh b/platform/build-faiss/build-faiss.sh index 5b0414b..8a6aa27 100644 --- a/platform/build-faiss/build-faiss.sh +++ b/platform/build-faiss/build-faiss.sh @@ -10,8 +10,9 @@ source /opt/intel/oneapi/setvars.sh #export CC=gcc-12 #export CXX=g++-12 # Configure using cmake +#export CXX=g++-11 -LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.1.0/ CXX=g++-11 cmake -B build \ +LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.2.0/ cmake -B build \ -DBUILD_SHARED_LIBS=ON \ -DBUILD_TESTING=ON \ -DFAISS_ENABLE_GPU=ON \ diff --git a/platform/build-faiss/build-prereqs.sh b/platform/build-faiss/build-prereqs.sh index 8c8a9f5..636080b 100644 --- a/platform/build-faiss/build-prereqs.sh +++ b/platform/build-faiss/build-prereqs.sh @@ -4,49 +4,39 @@ set -e export PATH=$HOME/.local/bin:$PATH export DEBIAN_FRONTEND=noninteractive +cat < /dev/null - -# add signed entry to apt sources and configure the APT client to use Intel repository: echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo -E tee /etc/apt/sources.list.d/oneAPI.list - -sudo -E apt update -sudo -E apt install dkms intel-basekit -y - -## Get CUDA and install it - -curl -sLO https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run -sudo -E bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page - # ensure we're using the latest cmake wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo -E tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null - echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo -E tee /etc/apt/sources.list.d/kitware.list >/dev/null # add the cuda tools to build against - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo -E dpkg -i cuda-keyring_1.1-1_all.deb + +# Update and install MKL, Cmake, and Cuda-toolkit sudo -E apt-get update -sudo -E apt-get install cmake cuda-toolkit -y +sudo -E apt install intel-oneapi-mkl cmake cuda-11-8 -y #Verify python and pytorch work diff --git a/retrieval/run_training.sh b/retrieval/run_training.sh new file mode 100644 index 0000000..1de8303 --- /dev/null +++ b/retrieval/run_training.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +git clone https://github.com/istio/istio.io.git /tmp/istio/ +mv /tmp/istio/content/en ./en + +rm -rf chunks *json +mkdir -p ./chunks + +if [ -d .venv ]; then + source .venv/bin/activate +else + python3 -m venv .venv + source .venv/bin/activate + pip install -U pip wheel -r requirements.txt + pip install ../platform/build-faiss/faiss-1.7.4-py3-none-any.whl +fi + +python3 train.py diff --git a/retrieval/train.py b/retrieval/train.py index 3e520a5..d6ebf1d 100644 --- a/retrieval/train.py +++ b/retrieval/train.py @@ -80,7 +80,7 @@ retro=retro, knn=2, chunk_size=64, - documents_path="/home/sdake/en", + documents_path="./en", # models/RedPajama-Data-1T-Sample", glob="**/*.md", chunks_memmap_path="./chunks/train.chunks.dat", @@ -132,7 +132,7 @@ " [aw.a]•[/aw.a] [aw.b]retrieval_model[/aw.b][aw.a]=[/aw.a][aw.b]artificialwisdomai[/aw.b][aw.a]/[/aw.a][aw.b]retroformer [aw.a]•[/aw.a] [aw.b]foundation_model[/aw.b][aw.a]=[/aw.a][aw.b]mosaicml[/aw.b][aw.a]/[/aw.a][aw.b]mpt30b[/aw.b] [aw.a]•[/aw.a] " ) for epoch in range(EPOCH_MAX): - dataloader = iter(wrapper.get_dataloader(batch_size=4, shuffle=True)) + dataloader = iter(wrapper.get_dataloader(batch_size=2, shuffle=True)) task_id = progress_bar.add_task( description="Epoch {}".format(epoch), loss="loss=nil", total=len(dataloader) )