diff --git a/.github/workflows/self-hosted.yml b/.github/workflows/self-hosted.yml new file mode 100644 index 0000000..fdd39fe --- /dev/null +++ b/.github/workflows/self-hosted.yml @@ -0,0 +1,107 @@ +name: Self-Hosted + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + workflow_dispatch: + +env: + build_dir: "build" + +jobs: + build: + name: Build ${{ matrix.os }} GCC ${{ matrix.gcc }} CUDA ${{ matrix.cuda }} + runs-on: self-hosted + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-24.04 + cuda: "12.8" + gcc: 13 + env: + config: "Release" + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set environment variables + run: | + echo "CUDA_PATH=/usr/local/cuda-12.8" >> $GITHUB_ENV + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV + echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> $GITHUB_ENV + + - name: Configure CMake build + run: | + cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -DCUBOOL_BUILD_TESTS=ON + + - name: Build library sources + run: | + cmake --build ${{ env.build_dir }} --target all --verbose -j `nproc` + + test: + name: Test GPU ${{ matrix.gpu }} CUDA ${{ matrix.cuda }} + needs: build + runs-on: self-hosted + strategy: + fail-fast: false + matrix: + include: + - gpu: NVIDIA-GeForce-GT-1030 + cuda: "12.9" + env: + unit-test-file: gpu_test_all.log + regression-test-file: gpu_test_regression.log + + steps: + - name: Run unit-tests + working-directory: ${{ env.build_dir }} + run: | + bash scripts/run_tests_all.sh | tee ${{ env.unit-test-file }} + + - name: Upload unit tests resutls + uses: actions/upload-artifact@v4 + with: + name: ${{ env.unit-test-file }} + path: ${{ env.build_dir }}/${{ env.unit-test-file }} + + - name: Check for unit tests results + working-directory: ${{ env.build_dir }} + run: | + ! grep -q "FAILED" ${{ env.unit-test-file }} + + - name: Run regression-tests + working-directory: ${{ env.build_dir }}/python + run: | + bash run_tests.sh 2>&1 | tee ${{ env.regression-test-file }} + + - name: Upload regression tests resutls + uses: actions/upload-artifact@v4 + with: + name: ${{ env.regression-test-file }} + path: ${{ env.build_dir }}/python/${{ env.regression-test-file }} + + - name: Check for regression tests results + working-directory: ${{ env.build_dir }}/python + run: | + ! grep -q "FAILED" ${{ env.regression-test-file }} + + clean: + name: Cleanup workspace + needs: test + if: always() + runs-on: self-hosted + + steps: + - name: Cleanup workspace + run: | + rm -rf ${{ github.workspace }}/* + rm -rf ${{ github.workspace }}/.* diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index cd33dd3..bf22ee3 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -1,5 +1,3 @@ -# Original script from https://github.com/ptheywood/cuda-cmake-github-actions - name: Ubuntu on: @@ -9,74 +7,97 @@ on: branches: [ master ] workflow_dispatch: +env: + build_dir: "build" + artifact: "cubool-ubuntu-build.tar.xz" + jobs: build: - name: Build ${{ matrix.os }} GCC ${{ matrix.gcc }} CUDA ${{ matrix.cuda }} + name: Build ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - - os: ubuntu-18.04 - cuda: "10.1" - gcc: 8 + - os: ubuntu-22.04 env: - build_dir: "build" config: "Release" - artifact: "cubool-ubuntu-build.tar.xz" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-python@v2 - with: - python-version: '3.7' - - name: Install CUDA - env: - cuda: ${{ matrix.cuda }} + - name: Configure CMake build run: | - source ./scripts/install_cuda_ubuntu.sh - if [[ $? -eq 0 ]]; then - # Set paths for subsequent steps, using ${CUDA_PATH} - echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" - echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV - echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - fi - shell: bash - - - name: Install and configure GCC and GXX + cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -DCUBOOL_BUILD_TESTS=ON -DCUBOOL_WITH_CUDA=OFF + + - name: Build library sources run: | - sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} - echo "СС=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV - echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV - echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV + cmake --build ${{ env.build_dir }} --target all --verbose -j `nproc` - - name: Configure CMake build - run: cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -DCUBOOL_BUILD_TESTS=YES + - name: Prepare upload binary + run: | + tar cfz ${{ env.artifact }} ${{ env.build_dir }} - - name: Build library sources + - name: Upload binary + uses: actions/upload-artifact@v4 + with: + name: ${{ env.artifact }} + path: ${{ env.artifact }} + + test: + name: Test CPU ${{ matrix.cpu }} + needs: build + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + cpu: AMD-EPYC-7763 + env: + unit-test-file: cpu_test_all.log + regression-test-file: cpu_test_regression.log + + steps: + - uses: actions/download-artifact@v4 + with: + name: ${{ env.artifact }} + + - name: Unarchive artifact + run: | + tar xzf ${{ env.artifact }} + rm ${{ env.artifact }} + + - name: Run unit-tests working-directory: ${{ env.build_dir }} - run: cmake --build . --target all --verbose -j `nproc` + run: | + bash scripts/run_tests_all.sh | tee ${{ env.unit-test-file }} + + - name: Upload unit tests resutls + uses: actions/upload-artifact@v4 + with: + name: ${{ env.unit-test-file }} + path: ${{ env.build_dir }}/${{ env.unit-test-file }} - - name: Run unit-tests (sequential backend) + - name: Check for unit tests results working-directory: ${{ env.build_dir }} - run: bash scripts/run_tests_fallback.sh - shell: bash + run: | + ! grep -q "FAILED" ${{ env.unit-test-file }} - - name: Run regression-tests (sequential backend) + - name: Run regression-tests working-directory: ${{ env.build_dir }}/python - run: bash run_tests.sh - shell: bash - - - name: Prepare upload binary - shell: bash - run: tar cfz ${{ env.artifact }} ${{ env.build_dir }} + run: | + bash run_tests.sh 2>&1 | tee ${{ env.regression-test-file }} - - name: Upload binary - uses: actions/upload-artifact@v2 + - name: Upload regression tests resutls + uses: actions/upload-artifact@v4 with: - name: ${{ env.artifact }} - path: ${{ env.artifact }} \ No newline at end of file + name: ${{ env.regression-test-file }} + path: ${{ env.build_dir }}/python/${{ env.regression-test-file }} + + - name: Check for regression tests results + working-directory: ${{ env.build_dir }}/python + run: | + ! grep -q "FAILED" ${{ env.regression-test-file }} diff --git a/.gitmodules b/.gitmodules index 3adb3ec..f98d82d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "deps/gtest"] path = deps/gtest url = https://github.com/google/googletest.git -[submodule "deps/cub"] - path = deps/cub - url = https://github.com/NVIDIA/cub.git diff --git a/CMakeLists.txt b/CMakeLists.txt index fb9212a..a6404f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,8 @@ option(CUBOOL_WITH_NAIVE "Build library with naive and naive-shared dens option(CUBOOL_BUILD_TESTS "Build project unit-tests with gtest" ON) option(CUBOOL_COPY_TO_PY_PACKAGE "Copy compiled shared library into python package folder (for package use purposes)" ON) +option(CUBOOL_USE_NSPARSE_MERGE_FUNCTOR "Use nsparse optimiztion for matrix addition function" ON) + set(CUBOOL_VERSION_MAJOR 1) set(CUBOOL_VERSION_MINOR 0) set(CUBOOL_VERSION_SUB 0) @@ -32,14 +34,6 @@ endif() # Configure cuda dependencies if (CUBOOL_WITH_CUDA) - message(STATUS "Add cub as cuda utility") - set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE) - set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE) - set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE) - add_subdirectory(deps/cub) - add_library(cub INTERFACE IMPORTED) - target_link_libraries(cub INTERFACE CUB::CUB) - message(STATUS "Add nsparse library as crs matrix multiplication backend") add_subdirectory(deps/nsparse-um) endif() @@ -61,4 +55,4 @@ add_subdirectory(cubool) file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) # Copy python related stuff -file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) \ No newline at end of file +file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) diff --git a/README.md b/README.md index 160a767..e0bcb23 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ $ bash ./scripts/run_tests_all.sh By default, the following cmake options will be automatically enabled: - `CUBOOL_WITH_CUDA` - build library with actual cuda backend -- `CUBOOL_WITH_SEQUENTIAL` - build library witt cpu based backend +- `CUBOOL_WITH_SEQUENTIAL` - build library with cpu based backend - `CUBOOL_WITH_TESTS` - build library unit-tests collection > Note: in order to provide correct GCC version for CUDA sources compiling, diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 092504b..ed3cb41 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -87,6 +87,7 @@ set(CUBOOL_C_API_SOURCES sources/cuBool_Matrix_Reduce2.cpp sources/cuBool_Matrix_EWiseAdd.cpp sources/cuBool_Matrix_EWiseMult.cpp + sources/cuBool_Matrix_EWiseMultInverted.cpp sources/cuBool_Vector_New.cpp sources/cuBool_Vector_Build.cpp sources/cuBool_Vector_SetElement.cpp @@ -125,6 +126,7 @@ if (CUBOOL_WITH_CUDA) sources/cuda/cuda_matrix.cu sources/cuda/cuda_matrix_ewiseadd.cu sources/cuda/cuda_matrix_ewisemult.cu + sources/cuda/cuda_matrix_ewisemult_inverted.cu sources/cuda/cuda_matrix_kronecker.cu sources/cuda/cuda_matrix_multiply.cu sources/cuda/cuda_matrix_transpose.cu @@ -147,6 +149,7 @@ if (CUBOOL_WITH_CUDA) sources/cuda/kernels/spgemv_t.cuh sources/cuda/kernels/spewiseadd.cuh sources/cuda/kernels/spewisemult.cuh + sources/cuda/kernels/spewisemultinverted.cuh sources/cuda/kernels/sptranspose.cuh sources/cuda/kernels/sptranspose2.cuh sources/cuda/kernels/spkron.cuh @@ -173,6 +176,8 @@ if (CUBOOL_WITH_SEQUENTIAL) sources/sequential/sq_ewiseadd.hpp sources/sequential/sq_ewisemult.cpp sources/sequential/sq_ewisemult.hpp + sources/sequential/sq_ewisemultinverted.cpp + sources/sequential/sq_ewisemultinverted.hpp sources/sequential/sq_spgemm.cpp sources/sequential/sq_spgemm.hpp sources/sequential/sq_spgemv.cpp @@ -201,11 +206,9 @@ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MAJOR=${CUBOOL_VERSION_ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MINOR=${CUBOOL_VERSION_MINOR}) target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_SUB=${CUBOOL_VERSION_SUB}) -target_compile_features(cubool PUBLIC cxx_std_14) +target_compile_definitions(cubool PRIVATE CUBOOL_USE_NSPARSE_MERGE_FUNCTOR=$) -target_compile_options(cubool PRIVATE $<$: -Wall>) -target_compile_options(cubool PRIVATE $<$,$>: -O2>) -target_compile_options(cubool PRIVATE $<$,$>: -O0>) +target_compile_features(cubool PUBLIC cxx_std_17) set_target_properties(cubool PROPERTIES CXX_STANDARD 17) set_target_properties(cubool PROPERTIES CXX_STANDARD_REQUIRED ON) @@ -219,7 +222,6 @@ endforeach() if (CUBOOL_WITH_CUDA) set_target_properties(cubool PROPERTIES CUDA_STANDARD 14) set_target_properties(cubool PROPERTIES CUDA_STANDARD_REQUIRED ON) - set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ @@ -256,4 +258,4 @@ if (CUBOOL_COPY_TO_PY_PACKAGE) "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}" "${CMAKE_BINARY_DIR}/python/pycubool" COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder") -endif() \ No newline at end of file +endif() diff --git a/cubool/include/cubool/cubool.h b/cubool/include/cubool/cubool.h index cd16bb3..b5f7885 100644 --- a/cubool/include/cubool/cubool.h +++ b/cubool/include/cubool/cubool.h @@ -925,4 +925,30 @@ CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Kronecker( cuBool_Hints hints ); +/** + * Performs result = matrix * ~mask, where + * '*' is boolean semiring 'and' operation + * '~' is operation for invert matrix (0 swaps to 1 and 1 to 0) + * + * @note To perform this operation matrices must be compatible + * dim(matrix) = M x T + * dim(mask) = T x N + * dim(result) = M x N + * + * @note Pass `CUBOOL_HINT_TIME_CHECK` hint to measure operation time + * + * @param result[out] Destination matrix to store result + * @param left Source matrix to be multiplied + * @param right Source matrix to be inverted and multiplied + * @param hints Hints for the operation + * + * @return Error code on this operation + */ +CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted( + cuBool_Matrix result, + cuBool_Matrix matrix, + cuBool_Matrix mask, + cuBool_Hints hints +); + #endif //CUBOOL_CUBOOL_H diff --git a/cubool/sources/backend/matrix_base.hpp b/cubool/sources/backend/matrix_base.hpp index 5332031..8842eb9 100644 --- a/cubool/sources/backend/matrix_base.hpp +++ b/cubool/sources/backend/matrix_base.hpp @@ -49,6 +49,7 @@ namespace cubool { virtual void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; virtual void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; virtual void eWiseMult(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; + virtual void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) = 0; virtual index getNrows() const = 0; virtual index getNcols() const = 0; @@ -59,4 +60,4 @@ namespace cubool { } -#endif //CUBOOL_MATRIX_BASE_HPP \ No newline at end of file +#endif //CUBOOL_MATRIX_BASE_HPP diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp index 571b010..49e35c3 100644 --- a/cubool/sources/core/matrix.cpp +++ b/cubool/sources/core/matrix.cpp @@ -349,6 +349,46 @@ namespace cubool { mHnd->eWiseMult(*a->mHnd, *b->mHnd, false); } + void Matrix::eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) { + + const auto* mat = dynamic_cast(&matrix); + const auto* msk = dynamic_cast(&mask); + + CHECK_RAISE_ERROR(mat != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class"); + CHECK_RAISE_ERROR(msk != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class"); + + index M = mat->getNrows(); + index N = msk->getNcols(); + + CHECK_RAISE_ERROR(M == msk->getNrows(), InvalidArgument, "Passed matrices have incompatible size"); + CHECK_RAISE_ERROR(N == msk->getNcols(), InvalidArgument, "Passed matrices have incompatible size"); + + CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Matrix has incompatible size for operation result"); + CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result"); + + mat->commitCache(); + msk->commitCache(); + this->releaseCache(); + + if (checkTime) { + TIMER_ACTION(timer, mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false)); + + LogStream stream(*Library::getLogger()); + stream << Logger::Level::Info + << "Time: " << timer.getElapsedTimeMs() << " ms " + << "Matrix::eWiseMultInverted: " + << this->getDebugMarker() << " = " + << mat->getDebugMarker() << " + " + << msk->getDebugMarker() << LogStream::cmt; + + return; + } + + mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false); + } + + + index Matrix::getNrows() const { return mHnd->getNrows(); } @@ -396,4 +436,4 @@ namespace cubool { // Clear arrays releaseCache(); } -} \ No newline at end of file +} diff --git a/cubool/sources/core/matrix.hpp b/cubool/sources/core/matrix.hpp index 20cb890..6c4d38b 100644 --- a/cubool/sources/core/matrix.hpp +++ b/cubool/sources/core/matrix.hpp @@ -56,6 +56,7 @@ namespace cubool { void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) override; index getNrows() const override; index getNcols() const override; @@ -77,4 +78,4 @@ namespace cubool { } -#endif //CUBOOL_MATRIX_HPP \ No newline at end of file +#endif //CUBOOL_MATRIX_HPP diff --git a/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp new file mode 100644 index 0000000..37fe607 --- /dev/null +++ b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp @@ -0,0 +1,44 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include + +CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted( + cuBool_Matrix result, + cuBool_Matrix matrix, + cuBool_Matrix mask, + cuBool_Hints hints +) { + CUBOOL_BEGIN_BODY + CUBOOL_VALIDATE_LIBRARY + CUBOOL_ARG_NOT_NULL(result) + CUBOOL_ARG_NOT_NULL(matrix) + CUBOOL_ARG_NOT_NULL(mask) + auto resultM = (cubool::Matrix *) result; + auto matrixM = (cubool::Matrix *) matrix; + auto maskM = (cubool::Matrix *) mask; + + resultM->eWiseMultInverted(*matrixM, *maskM, hints & CUBOOL_HINT_TIME_CHECK); + CUBOOL_END_BODY +} diff --git a/cubool/sources/cuda/cuda_matrix.hpp b/cubool/sources/cuda/cuda_matrix.hpp index e19fec1..9f1b7a2 100644 --- a/cubool/sources/cuda/cuda_matrix.hpp +++ b/cubool/sources/cuda/cuda_matrix.hpp @@ -56,6 +56,7 @@ namespace cubool { void kronecker(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; void eWiseAdd(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; index getNrows() const override; index getNcols() const override; @@ -79,4 +80,4 @@ namespace cubool { }; }; -#endif //CUBOOL_CUDA_MATRIX_HPP \ No newline at end of file +#endif //CUBOOL_CUDA_MATRIX_HPP diff --git a/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu new file mode 100644 index 0000000..2e0fa87 --- /dev/null +++ b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu @@ -0,0 +1,63 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include +#include + +namespace cubool { + + void CudaMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) { + auto a = dynamic_cast(&aBase); + auto b = dynamic_cast(&bBase); + + CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class"); + CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class"); + + index M = this->getNrows(); + index N = this->getNcols(); + + + assert(a->getNrows() == M); + assert(a->getNcols() == N); + + assert(b->getNrows() == M); + assert(b->getNcols() == N); + + if (a->isMatrixEmpty() || b->isMatrixEmpty()) { + this->clearAndResizeStorageToDim(); + return; + } + + // Ensure csr proper csr format even if empty + a->resizeStorageToDim(); + b->resizeStorageToDim(); + + kernels::SpVectorEWiseMultInverted> spFunctor; + auto result = spFunctor(a->mMatrixImpl, b->mMatrixImpl); + + // Assign the actual impl result to this storage + this->mMatrixImpl = std::move(result); + } + +} diff --git a/cubool/sources/cuda/cuda_vector.cu b/cubool/sources/cuda/cuda_vector.cu index ced56a0..f2fa9b0 100644 --- a/cubool/sources/cuda/cuda_vector.cu +++ b/cubool/sources/cuda/cuda_vector.cu @@ -29,6 +29,8 @@ #include #include +#include + namespace cubool { CudaVector::CudaVector(size_t nrows, CudaInstance &instance) diff --git a/cubool/sources/cuda/kernels/spewiseadd.cuh b/cubool/sources/cuda/kernels/spewiseadd.cuh index f6ad892..d1e1e87 100644 --- a/cubool/sources/cuda/kernels/spewiseadd.cuh +++ b/cubool/sources/cuda/kernels/spewiseadd.cuh @@ -27,6 +27,8 @@ #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spewisemult.cuh b/cubool/sources/cuda/kernels/spewisemult.cuh index a4705de..6d17a46 100644 --- a/cubool/sources/cuda/kernels/spewisemult.cuh +++ b/cubool/sources/cuda/kernels/spewisemult.cuh @@ -30,6 +30,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spewisemultinverted.cuh b/cubool/sources/cuda/kernels/spewisemultinverted.cuh new file mode 100644 index 0000000..353bc97 --- /dev/null +++ b/cubool/sources/cuda/kernels/spewisemultinverted.cuh @@ -0,0 +1,106 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef CUBOOL_SPEWISEMULT_CUH +#define CUBOOL_SPEWISEMULT_CUH + +#include +#include +#include +#include + +#include + +namespace cubool { + namespace kernels { + + template + struct SpVectorEWiseMultInverted { + template + using ContainerType = thrust::device_vector::other>; + using MatrixType = nsparse::matrix; + using LargeIndexType = unsigned long; + + static_assert(sizeof(LargeIndexType) > sizeof(IndexType), "Values intersection index must be larger"); + + static void fillIndices(const MatrixType& m, ContainerType& out) { + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(m.m_vals), + [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(), + outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) { + LargeIndexType row = findNearestRowIdx(valueId, nrows, rowOffset); + LargeIndexType col = colIndex[valueId]; + LargeIndexType index = row * ncols + col; + outIndices[valueId] = index; + }); + } + + MatrixType operator()(const MatrixType& a, const MatrixType& b) { + auto aNvals = a.m_vals; + auto bNvals = b.m_vals; + auto worst = aNvals; + + // Allocate memory for the worst case scenario + ContainerType inputA(aNvals); + ContainerType inputB(bNvals); + + fillIndices(a, inputA); + fillIndices(b, inputB); + + ContainerType intersected(worst); + + auto out = thrust::set_difference(inputA.begin(), inputA.end(), + inputB.begin(), inputB.end(), + intersected.begin()); + + // Count result nvals count + auto nvals = thrust::distance(intersected.begin(), out); + + ContainerType rowOffsetTmp(a.m_rows + 1); + ContainerType colIndex(nvals); + + thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0); + + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(nvals), + [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(), + nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) { + LargeIndexType i = intersected[valueId]; + LargeIndexType row = i / ncols; + LargeIndexType col = i % ncols; + atomicAdd((rowOffset + row).get(), 1); + colIndex[valueId] = (IndexType) col; + }); + + ContainerType rowOffset(a.m_rows + 1); + thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus()); + + assert(nvals == rowOffset.back()); + + return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals); + } + }; + + } +} + +#endif //CUBOOL_SPEWISEMULT_CUH diff --git a/cubool/sources/cuda/kernels/spgemv.cuh b/cubool/sources/cuda/kernels/spgemv.cuh index 7340174..818ff62 100644 --- a/cubool/sources/cuda/kernels/spgemv.cuh +++ b/cubool/sources/cuda/kernels/spgemv.cuh @@ -32,6 +32,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spgemv_t.cuh b/cubool/sources/cuda/kernels/spgemv_t.cuh index 6225d71..2a5db83 100644 --- a/cubool/sources/cuda/kernels/spgemv_t.cuh +++ b/cubool/sources/cuda/kernels/spgemv_t.cuh @@ -32,6 +32,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh index 8a14f8d..4734e35 100644 --- a/cubool/sources/cuda/kernels/spmerge.cuh +++ b/cubool/sources/cuda/kernels/spmerge.cuh @@ -29,9 +29,13 @@ #include #include +#include +#include + namespace cubool { namespace kernels { +#if CUBOOL_USE_NSPARSE_MERGE_FUNCTOR template class SpMergeFunctor { public: @@ -53,7 +57,7 @@ namespace cubool { assert(a.m_cols == b.m_cols); IndexType rows = a.m_rows; - IndexType cols = a.m_rows; + IndexType cols = a.m_cols; constexpr auto config_merge = make_bin_seq< @@ -77,6 +81,72 @@ namespace cubool { private: nsparse::unique_merge_functor_t uniqueMergeFunctor; }; +#else + template + class SpMergeFunctor { + public: + template + using ContainerType = thrust::device_vector::other>; + using MatrixType = nsparse::matrix; + using LargeIndexType = unsigned long; + + static void fillIndices(const MatrixType& m, ContainerType& out) { + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(m.m_vals), + [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(), + outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) { + LargeIndexType row = findNearestRowIdx(valueId, nrows, rowOffset); + LargeIndexType col = colIndex[valueId]; + LargeIndexType index = row * ncols + col; + outIndices[valueId] = index; + }); + } + + MatrixType operator()(const MatrixType& a, const MatrixType& b) { + auto aNvals = a.m_vals; + auto bNvals = b.m_vals; + auto worst = aNvals + bNvals; + + // Allocate memory for the worst case scenario + ContainerType inputA(aNvals); + ContainerType inputB(bNvals); + + fillIndices(a, inputA); + fillIndices(b, inputB); + + ContainerType intersected(worst); + + auto out = thrust::set_union(inputA.begin(), inputA.end(), + inputB.begin(), inputB.end(), + intersected.begin()); + + // Count result nvals count + auto nvals = thrust::distance(intersected.begin(), out); + + ContainerType rowOffsetTmp(a.m_rows + 1); + ContainerType colIndex(nvals); + + thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0); + + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(nvals), + [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(), + nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) { + LargeIndexType i = intersected[valueId]; + LargeIndexType row = i / ncols; + LargeIndexType col = i % ncols; + atomicAdd((rowOffset + row).get(), 1); + colIndex[valueId] = (IndexType) col; + }); + + ContainerType rowOffset(a.m_rows + 1); + thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus()); + + assert(nvals == rowOffset.back()); + + return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals); + } + }; + +#endif } } diff --git a/cubool/sources/cuda/kernels/spreduce.cuh b/cubool/sources/cuda/kernels/spreduce.cuh index 3a15bb1..cfcf398 100644 --- a/cubool/sources/cuda/kernels/spreduce.cuh +++ b/cubool/sources/cuda/kernels/spreduce.cuh @@ -30,6 +30,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/sptranspose2.cuh b/cubool/sources/cuda/kernels/sptranspose2.cuh index 8c50e45..c686818 100644 --- a/cubool/sources/cuda/kernels/sptranspose2.cuh +++ b/cubool/sources/cuda/kernels/sptranspose2.cuh @@ -29,6 +29,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/sequential/sq_ewisemultinverted.cpp b/cubool/sources/sequential/sq_ewisemultinverted.cpp new file mode 100644 index 0000000..ee810c9 --- /dev/null +++ b/cubool/sources/sequential/sq_ewisemultinverted.cpp @@ -0,0 +1,105 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include +#include + +namespace cubool { + + void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out) { + out.rowOffsets.resize(a.nrows + 1, 0); + + size_t nvals = 0; + + // Count nnz of the result matrix to allocate memory + for (index i = 0; i < a.nrows; i++) { + index ak = a.rowOffsets[i]; + index bk = b.rowOffsets[i]; + index asize = a.rowOffsets[i + 1] - ak; + index bsize = b.rowOffsets[i + 1] - bk; + + const index* ar = &a.colIndices[ak]; + const index* br = &b.colIndices[bk]; + const index* arend = ar + asize; + const index* brend = br + bsize; + + index nvalsInRow = 0; + + while (ar != arend && br != brend) { + if (*ar == *br) { + ar++; + br++; + } + else if (*ar < *br) { + nvalsInRow++; + ar++; + } + else { + br++; + } + } + nvalsInRow += arend - ar; + + nvals += nvalsInRow; + out.rowOffsets[i] = nvalsInRow; + } + + // Eval row offsets + exclusive_scan(out.rowOffsets.begin(), out.rowOffsets.end(), 0); + + // Allocate memory for values + out.nvals = nvals; + out.colIndices.resize(nvals); + + // Fill sorted column indices + size_t k = 0; + for (index i = 0; i < a.nrows; i++) { + const index* ar = &a.colIndices[a.rowOffsets[i]]; + const index* br = &b.colIndices[b.rowOffsets[i]]; + const index* arend = &a.colIndices[a.rowOffsets[i + 1]]; + const index* brend = &b.colIndices[b.rowOffsets[i + 1]]; + + while (ar != arend && br != brend) { + if (*ar == *br) { + ar++; + br++; + } + else if (*ar < *br) { + out.colIndices[k] = *ar; + k++; + ar++; + } + else { + br++; + } + } + + while (ar != arend) { + out.colIndices[k] = *ar; + k++; + ar++; + } + } + } +} diff --git a/cubool/sources/sequential/sq_ewisemultinverted.hpp b/cubool/sources/sequential/sq_ewisemultinverted.hpp new file mode 100644 index 0000000..99b2615 --- /dev/null +++ b/cubool/sources/sequential/sq_ewisemultinverted.hpp @@ -0,0 +1,42 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef CUBOOL_SQ_EWISEMULTINVERTED_HPP +#define CUBOOL_SQ_EWISEMULTINVERTED_HPP + +#include + +namespace cubool { + + /** + * Element-wise multiplication of the matrices `a` and inverted `b`. + * + * @param a Input matrix + * @param b Input matrix, which will be inverted before multiplication + * @param[out] out Where to store the result + */ + void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out); +} + +#endif //CUBOOL_SQ_EWISEMULTINVERTED_HPP diff --git a/cubool/sources/sequential/sq_matrix.cpp b/cubool/sources/sequential/sq_matrix.cpp index df96e4e..00473f1 100644 --- a/cubool/sources/sequential/sq_matrix.cpp +++ b/cubool/sources/sequential/sq_matrix.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,31 @@ namespace cubool { this->mData = std::move(out); } + void SqMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) { + auto a = dynamic_cast(&aBase); + auto b = dynamic_cast(&bBase); + + CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class"); + CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class"); + + assert(a->getNrows() == this->getNrows()); + assert(a->getNcols() == this->getNcols()); + assert(a->getNrows() == b->getNrows()); + assert(a->getNcols() == b->getNcols()); + + CsrData out; + out.nrows = this->getNrows(); + out.ncols = this->getNcols(); + + a->allocateStorage(); + b->allocateStorage(); + sq_ewisemultinverted(a->mData, b->mData, out); + + this->mData = std::move(out); + } + + + index SqMatrix::getNrows() const { return mData.nrows; } diff --git a/cubool/sources/sequential/sq_matrix.hpp b/cubool/sources/sequential/sq_matrix.hpp index 1a3f539..d348232 100644 --- a/cubool/sources/sequential/sq_matrix.hpp +++ b/cubool/sources/sequential/sq_matrix.hpp @@ -51,6 +51,7 @@ namespace cubool { void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; index getNrows() const override; index getNcols() const override; diff --git a/cubool/sources/utils/data_utils.hpp b/cubool/sources/utils/data_utils.hpp index 2356382..b7863b3 100644 --- a/cubool/sources/utils/data_utils.hpp +++ b/cubool/sources/utils/data_utils.hpp @@ -27,6 +27,7 @@ #include #include +#include namespace cubool { diff --git a/cubool/tests/CMakeLists.txt b/cubool/tests/CMakeLists.txt index 0e0f620..84ddc96 100644 --- a/cubool/tests/CMakeLists.txt +++ b/cubool/tests/CMakeLists.txt @@ -34,6 +34,9 @@ target_link_libraries(test_matrix_ewiseadd PUBLIC testing) add_executable(test_matrix_ewisemult test_matrix_ewisemult.cpp) target_link_libraries(test_matrix_ewisemult PUBLIC testing) +add_executable(test_matrix_ewisemult_inverted test_matrix_ewisemult_inverted.cpp) +target_link_libraries(test_matrix_ewisemult_inverted PUBLIC testing) + add_executable(test_vector_misc test_vector_misc.cpp) target_link_libraries(test_vector_misc PUBLIC testing) diff --git a/cubool/tests/test_matrix_ewisemult_inverted.cpp b/cubool/tests/test_matrix_ewisemult_inverted.cpp new file mode 100644 index 0000000..250ba55 --- /dev/null +++ b/cubool/tests/test_matrix_ewisemult_inverted.cpp @@ -0,0 +1,109 @@ +#include "cubool/cubool.h" +#include +#include + +using DataMatrix = std::vector>; + +void testApplyNotMask(const DataMatrix &matrix_data, const DataMatrix &mask_data) { + cuBool_Index nrows, ncols; + nrows = matrix_data.size(); + ncols = matrix_data[0].size(); + testing::Matrix test_matrix = testing::Matrix::generatet(nrows, ncols, + [&matrix_data](cuBool_Index i, cuBool_Index j) { return matrix_data[i][j]; }); + nrows = mask_data.size(); + ncols = mask_data[0].size(); + testing::Matrix test_mask = testing::Matrix::generatet(nrows, ncols, + [&mask_data](cuBool_Index i, cuBool_Index j) { return mask_data[i][j]; }); + + cuBool_Matrix matrix, mask, result; + ASSERT_EQ(cuBool_Matrix_New(&matrix, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_New(&mask, test_mask.nrows, test_mask.ncols), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_New(&result, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS); + + ASSERT_EQ(cuBool_Matrix_Build(matrix, test_matrix.rowsIndex.data(), test_matrix.colsIndex.data(), test_matrix.nvals, + CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_Build(mask, test_mask.rowsIndex.data(), test_mask.colsIndex.data(), test_mask.nvals, + CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS); + + cuBool_Matrix_EWiseMulInverted(result, matrix, mask, CUBOOL_HINT_NO); + + // validate value of algorithm + cuBool_Index nvals; + cuBool_Matrix_Nvals(result, &nvals); + std::vector rows(nvals), cols(nvals); + cuBool_Matrix_ExtractPairs(result, rows.data(), cols.data(), &nvals); + + cuBool_Matrix_Free(matrix); + cuBool_Matrix_Free(mask); + cuBool_Matrix_Free(result); + + auto mask_data_inverted = mask_data; + for (auto &row : mask_data_inverted) { + for (int &value : row) { + value = !value; + } + } + + std::vector result_data(matrix_data.size(), std::vector(matrix_data[0].size(), 0)); + for (int i = 0; i < nvals; i++) { + result_data[rows[i]][cols[i]] = 1; + } + + for (int i = 0; i < matrix_data.size(); i++) { + for (int j = 0; j < matrix_data[0].size(); j++) { + ASSERT_EQ(matrix_data[i][j] * mask_data_inverted[i][j], result_data[i][j]); + } + } + +} + +TEST(cuBool_Matrix, ApplyMatrix) { + ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS); + + DataMatrix matrix { + {1, 0, 0}, + {0, 0, 0}, + {0, 1, 0}, + }; + + DataMatrix mask { + {0, 1, 1}, + {1, 0, 1}, + {0, 1, 1}, + }; + // iverted is + // 1 0 0 + // 0 1 0 + // 1 0 0 + // matrix & ~mask must have (0, 0) + + testApplyNotMask(matrix, mask); + + ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS); +} + +TEST(cuBool_Matrix, ApplyMatrixRandom) { + ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS); + + for (int i = 0; i < 102; i++) { + int n = rand() % 10 + 1; + int m = rand() % 10 + 1; + + DataMatrix matrix(n, std::vector(m, 0)); + DataMatrix mask(n, std::vector(m, 0)); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < m; j++) { + matrix[i][j] = rand() & 1; + mask[i][j] = rand() & 1; + } + } + + testApplyNotMask(matrix, mask); + } + + ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS); +} + + +CUBOOL_GTEST_MAIN diff --git a/deps/cub b/deps/cub deleted file mode 160000 index b229817..0000000 --- a/deps/cub +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b229817e3963fc942c7cc2c61715a6b2b2c49bed diff --git a/deps/gtest b/deps/gtest index d9c309f..6910c9d 160000 --- a/deps/gtest +++ b/deps/gtest @@ -1 +1 @@ -Subproject commit d9c309fdab807b716c2cf4d4a42989b8c34f712a +Subproject commit 6910c9d9165801d8827d628cb72eb7ea9dd538c5 diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt index ee5ac0d..91ed084 100644 --- a/deps/nsparse-um/CMakeLists.txt +++ b/deps/nsparse-um/CMakeLists.txt @@ -3,7 +3,7 @@ project(nsparse_um LANGUAGES CXX CUDA) add_library(nsparse_um INTERFACE) target_include_directories(nsparse_um INTERFACE include/) -target_link_libraries(nsparse_um INTERFACE cub) +target_link_libraries(nsparse_um INTERFACE) target_compile_options(nsparse_um INTERFACE $<$: --expt-relaxed-constexpr --expt-extended-lambda>) if (CUBOOL_BUILD_NSPARSE_TESTS) diff --git a/deps/nsparse-um/include/nsparse/unified_allocator.h b/deps/nsparse-um/include/nsparse/unified_allocator.h index ea6a98b..9c3b2f8 100644 --- a/deps/nsparse-um/include/nsparse/unified_allocator.h +++ b/deps/nsparse-um/include/nsparse/unified_allocator.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/deps/nsparse-um/test/CMakeLists.txt b/deps/nsparse-um/test/CMakeLists.txt index 3bece29..6a1aaff 100644 --- a/deps/nsparse-um/test/CMakeLists.txt +++ b/deps/nsparse-um/test/CMakeLists.txt @@ -3,7 +3,7 @@ project(nsparse_um_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub) +target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17) diff --git a/deps/nsparse/test/CMakeLists.txt b/deps/nsparse/test/CMakeLists.txt index 9e71d90..5abd415 100644 --- a/deps/nsparse/test/CMakeLists.txt +++ b/deps/nsparse/test/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.11) project(nsparse_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub) +target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17) diff --git a/python/publish_pypi.sh b/python/publish_pypi.sh index 40d4c65..0c78e87 100644 --- a/python/publish_pypi.sh +++ b/python/publish_pypi.sh @@ -1,2 +1,3 @@ +#!/bin/bash python3 setup.py sdist bdist_wheel twine upload --verbose dist/* \ No newline at end of file diff --git a/python/publish_t_pypi.sh b/python/publish_t_pypi.sh index aa781e1..d68cbdd 100644 --- a/python/publish_t_pypi.sh +++ b/python/publish_t_pypi.sh @@ -1,2 +1,3 @@ +#!/bin/bash python3 setup.py sdist bdist_wheel twine upload --verbose --repository testpypi dist/* \ No newline at end of file diff --git a/python/run_tests.sh b/python/run_tests.sh index febf012..0c216b3 100644 --- a/python/run_tests.sh +++ b/python/run_tests.sh @@ -1,3 +1,4 @@ +#!/bin/bash export PYTHONPATH="`pwd`:$PYTHONPATH" cd tests python3 -m unittest discover -v diff --git a/scripts/install_cuda_ubuntu.sh b/scripts/install_cuda_ubuntu.sh index 8c7e5ed..019f2aa 100644 --- a/scripts/install_cuda_ubuntu.sh +++ b/scripts/install_cuda_ubuntu.sh @@ -1,8 +1,13 @@ +#!/bin/bash # Original script from https://github.com/ptheywood/cuda-cmake-github-actions CUDA_PACKAGES_IN=( - "command-line-tools" - "libraries-dev" + "cuda-compiler" + "cuda-cudart-dev" + "cuda-nvtx" + "cuda-nvrtc-dev" + "libcurand-dev" # 11-0+ + "cuda-cccl" # 11.4+, provides cub and thrust. On 11.3 known as cuda-thrust-11-3 ) # returns 0 (true) if a >= b @@ -13,7 +18,7 @@ function version_ge() { # returns 0 (true) if a > b function version_gt() { [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$1" = "$2" ] && return 1 || version_ge $1 $2 + [ "$1" = "$2" ] && return 1 || version_ge "$1" "$2" } # returns 0 (true) if a <= b function version_le() { @@ -23,15 +28,16 @@ function version_le() { # returns 0 (true) if a < b function version_lt() { [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$1" = "$2" ] && return 1 || version_le $1 $2 + [ "$1" = "$2" ] && return 1 || version_le "$1" "$2" } ## Select CUDA version # Get the cuda version from the environment as $cuda. -CUDA_VERSION_MAJOR_MINOR=${cuda} +CUDA_VERSION_MAJOR_MINOR=${cuda:=12.8} # Split the version. +# We (might/probably) don't know PATCH at this point - it depends which version gets installed. CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) @@ -55,7 +61,7 @@ if [ -z "${CUDA_MINOR}" ] ; then exit 1 fi # If we don't know the Ubuntu version, error. -if [ -z ${UBUNTU_VERSION} ]; then +if [ -z "${UBUNTU_VERSION}" ]; then echo "Error: Unknown Ubuntu version. Aborting." exit 1 fi @@ -64,43 +70,82 @@ fi ## Select CUDA packages to install CUDA_PACKAGES="" for package in "${CUDA_PACKAGES_IN[@]}" -do : +do : + # @todo This is not perfect. Should probably provide a separate list for diff versions # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y - if [[ "${package}" == "nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then - package="compiler" - elif [[ "${package}" == "compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then - package="nvcc" + if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-compiler" + elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-nvcc" + # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ + elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then + # CUDA cuda-thrust >= 11.4 + if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then + package="cuda-cccl" + # Use cuda-thrust > 11.2 + elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then + package="cuda-thrust" + # Do not include this pacakge < 11.3 + else + continue + fi + fi + # CUDA 11+ includes lib* / lib*-dev packages, which if they existed previously where cuda-cu*- / cuda-cu*-dev- + if [[ ${package} == libcu* ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "11.0" ; then + package="${package/libcu/cuda-cu}" fi # Build the full package name and append to the string. - CUDA_PACKAGES+=" cuda-${package}-${CUDA_MAJOR}-${CUDA_MINOR}" + CUDA_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" +CPU_ARCH="x86_64" PIN_FILENAME="cuda-ubuntu${UBUNTU_VERSION}.pin" -PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/${PIN_FILENAME}" -APT_KEY_URL="http://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/7fa2af80.pub" -REPO_URL="http://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/" +PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" +# apt keyring package now available https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ +KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" +KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/${KERYRING_PACKAGE_FILENAME}" +REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/" echo "PIN_FILENAME ${PIN_FILENAME}" echo "PIN_URL ${PIN_URL}" -echo "APT_KEY_URL ${APT_KEY_URL}" +echo "KEYRING_PACKAGE_URL ${KEYRING_PACKAGE_URL}" -sudo rm -r /var/lib/apt/lists/* -sudo apt-get clean -sudo apt-get update +# Detect if the script is being run as root, storing true/false in is_root. +is_root=false +if (( EUID == 0)); then + is_root=true +fi +# Find if sudo is available +has_sudo=false +if command -v sudo &> /dev/null ; then + has_sudo=true +fi +# Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. +if [ "$is_root" = false ] && [ "$has_sudo" = false ]; then + echo "Root or sudo is required. Aborting." + exit 1 +elif [ "$is_root" = false ] ; then + USE_SUDO=sudo +else + USE_SUDO= +fi + +$USE_SUDO rm -r /var/lib/apt/lists/* +$USE_SUDO apt-get clean +$USE_SUDO apt-get update ## Install echo "Adding CUDA Repository" -wget ${PIN_URL} -sudo mv ${PIN_FILENAME} /etc/apt/preferences.d/cuda-repository-pin-600 -sudo apt-key adv --fetch-keys ${APT_KEY_URL} -sudo add-apt-repository "deb ${REPO_URL} /" -sudo apt-get update +wget "${PIN_URL}" +$USE_SUDO mv "${PIN_FILENAME}" /etc/apt/preferences.d/cuda-repository-pin-600 +wget "${KEYRING_PACKAGE_URL}" && ${USE_SUDO} dpkg -i ${KERYRING_PACKAGE_FILENAME} && rm ${KERYRING_PACKAGE_FILENAME} +$USE_SUDO add-apt-repository "deb ${REPO_URL} /" +$USE_SUDO apt-get update echo "Installing CUDA packages ${CUDA_PACKAGES}" -sudo apt-get -y install ${CUDA_PACKAGES} -if [[ $? -ne 0 ]]; then +if ! $USE_SUDO apt-get -y install "${CUDA_PACKAGES}"; then echo "CUDA Installation Error." exit 1 fi @@ -109,7 +154,8 @@ fi CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" export CUDA_PATH=${CUDA_PATH} - export PATH="$CUDA_PATH/bin:$PATH" export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" -nvcc -V \ No newline at end of file +export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" +# Check nvcc is now available. +nvcc -V diff --git a/scripts/run_tests_all.sh b/scripts/run_tests_all.sh index 8f2e7e0..1fd405f 100644 --- a/scripts/run_tests_all.sh +++ b/scripts/run_tests_all.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Runs all tests executables # Invoke this script within build directory ./cubool/tests/test_library_api diff --git a/scripts/run_tests_fallback.sh b/scripts/run_tests_fallback.sh index f468fc1..50a4f84 100644 --- a/scripts/run_tests_fallback.sh +++ b/scripts/run_tests_fallback.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Runs all tests executables # Invoke this script within build directory ./cubool/tests/test_library_api