From ead088f20ef52c77f517a9540b876ec8abc963ca Mon Sep 17 00:00:00 2001 From: Mitya <8863314@gmail.com> Date: Tue, 22 Oct 2024 12:24:56 +0300 Subject: [PATCH 01/10] Compiled cuBool and run tests with CUDA 12.6 --- .gitmodules | 3 --- CMakeLists.txt | 12 ++++++------ cubool/sources/cuda/cuda_vector.cu | 2 ++ cubool/sources/cuda/kernels/spewiseadd.cuh | 2 ++ cubool/sources/cuda/kernels/spewisemult.cuh | 2 ++ cubool/sources/cuda/kernels/spgemv.cuh | 2 ++ cubool/sources/cuda/kernels/spgemv_t.cuh | 2 ++ cubool/sources/cuda/kernels/spreduce.cuh | 2 ++ cubool/sources/cuda/kernels/sptranspose2.cuh | 2 ++ cubool/sources/utils/data_utils.hpp | 1 + deps/cub | 1 - deps/nsparse-um/CMakeLists.txt | 3 ++- deps/nsparse-um/include/nsparse/unified_allocator.h | 2 +- deps/nsparse-um/test/CMakeLists.txt | 3 ++- deps/nsparse/test/CMakeLists.txt | 3 ++- 15 files changed, 28 insertions(+), 14 deletions(-) delete mode 160000 deps/cub diff --git a/.gitmodules b/.gitmodules index 3adb3ec..f98d82d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "deps/gtest"] path = deps/gtest url = https://github.com/google/googletest.git -[submodule "deps/cub"] - path = deps/cub - url = https://github.com/NVIDIA/cub.git diff --git a/CMakeLists.txt b/CMakeLists.txt index fb9212a..e3ab3eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,12 +33,12 @@ endif() # Configure cuda dependencies if (CUBOOL_WITH_CUDA) message(STATUS "Add cub as cuda utility") - set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE) - set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE) - set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE) - add_subdirectory(deps/cub) - add_library(cub INTERFACE IMPORTED) - target_link_libraries(cub INTERFACE CUB::CUB) + # set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE) + # set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE) + # set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE) + # add_subdirectory(deps/cub) + # add_library(cub INTERFACE IMPORTED) + # target_link_libraries(cub INTERFACE CUB::CUB) message(STATUS "Add nsparse library as crs matrix multiplication backend") add_subdirectory(deps/nsparse-um) diff --git a/cubool/sources/cuda/cuda_vector.cu b/cubool/sources/cuda/cuda_vector.cu index ced56a0..f2fa9b0 100644 --- a/cubool/sources/cuda/cuda_vector.cu +++ b/cubool/sources/cuda/cuda_vector.cu @@ -29,6 +29,8 @@ #include #include +#include + namespace cubool { CudaVector::CudaVector(size_t nrows, CudaInstance &instance) diff --git a/cubool/sources/cuda/kernels/spewiseadd.cuh b/cubool/sources/cuda/kernels/spewiseadd.cuh index f6ad892..d1e1e87 100644 --- a/cubool/sources/cuda/kernels/spewiseadd.cuh +++ b/cubool/sources/cuda/kernels/spewiseadd.cuh @@ -27,6 +27,8 @@ #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spewisemult.cuh b/cubool/sources/cuda/kernels/spewisemult.cuh index a4705de..6d17a46 100644 --- a/cubool/sources/cuda/kernels/spewisemult.cuh +++ b/cubool/sources/cuda/kernels/spewisemult.cuh @@ -30,6 +30,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spgemv.cuh b/cubool/sources/cuda/kernels/spgemv.cuh index 7340174..818ff62 100644 --- a/cubool/sources/cuda/kernels/spgemv.cuh +++ b/cubool/sources/cuda/kernels/spgemv.cuh @@ -32,6 +32,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spgemv_t.cuh b/cubool/sources/cuda/kernels/spgemv_t.cuh index 6225d71..2a5db83 100644 --- a/cubool/sources/cuda/kernels/spgemv_t.cuh +++ b/cubool/sources/cuda/kernels/spgemv_t.cuh @@ -32,6 +32,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/spreduce.cuh b/cubool/sources/cuda/kernels/spreduce.cuh index 3a15bb1..cfcf398 100644 --- a/cubool/sources/cuda/kernels/spreduce.cuh +++ b/cubool/sources/cuda/kernels/spreduce.cuh @@ -30,6 +30,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/cuda/kernels/sptranspose2.cuh b/cubool/sources/cuda/kernels/sptranspose2.cuh index 8c50e45..c686818 100644 --- a/cubool/sources/cuda/kernels/sptranspose2.cuh +++ b/cubool/sources/cuda/kernels/sptranspose2.cuh @@ -29,6 +29,8 @@ #include #include +#include + namespace cubool { namespace kernels { diff --git a/cubool/sources/utils/data_utils.hpp b/cubool/sources/utils/data_utils.hpp index 2356382..b7863b3 100644 --- a/cubool/sources/utils/data_utils.hpp +++ b/cubool/sources/utils/data_utils.hpp @@ -27,6 +27,7 @@ #include #include +#include namespace cubool { diff --git a/deps/cub b/deps/cub deleted file mode 160000 index b229817..0000000 --- a/deps/cub +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b229817e3963fc942c7cc2c61715a6b2b2c49bed diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt index ee5ac0d..cdc4463 100644 --- a/deps/nsparse-um/CMakeLists.txt +++ b/deps/nsparse-um/CMakeLists.txt @@ -3,7 +3,8 @@ project(nsparse_um LANGUAGES CXX CUDA) add_library(nsparse_um INTERFACE) target_include_directories(nsparse_um INTERFACE include/) -target_link_libraries(nsparse_um INTERFACE cub) +# target_link_libraries(nsparse_um INTERFACE cub) +target_link_libraries(nsparse_um INTERFACE) target_compile_options(nsparse_um INTERFACE $<$: --expt-relaxed-constexpr --expt-extended-lambda>) if (CUBOOL_BUILD_NSPARSE_TESTS) diff --git a/deps/nsparse-um/include/nsparse/unified_allocator.h b/deps/nsparse-um/include/nsparse/unified_allocator.h index ea6a98b..c15f19e 100644 --- a/deps/nsparse-um/include/nsparse/unified_allocator.h +++ b/deps/nsparse-um/include/nsparse/unified_allocator.h @@ -3,7 +3,7 @@ #include #include #include -#include +// #include #include #include diff --git a/deps/nsparse-um/test/CMakeLists.txt b/deps/nsparse-um/test/CMakeLists.txt index 3bece29..6a29690 100644 --- a/deps/nsparse-um/test/CMakeLists.txt +++ b/deps/nsparse-um/test/CMakeLists.txt @@ -3,7 +3,8 @@ project(nsparse_um_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub) +# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub) +target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17) diff --git a/deps/nsparse/test/CMakeLists.txt b/deps/nsparse/test/CMakeLists.txt index 9e71d90..9927725 100644 --- a/deps/nsparse/test/CMakeLists.txt +++ b/deps/nsparse/test/CMakeLists.txt @@ -2,7 +2,8 @@ cmake_minimum_required(VERSION 3.11) project(nsparse_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub) +# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub) +target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17) From 443d78deae3a4ef525e722afba2a5db3f1c08948 Mon Sep 17 00:00:00 2001 From: Mitya <8863314@gmail.com> Date: Tue, 22 Oct 2024 12:47:31 +0300 Subject: [PATCH 02/10] Temporary removed python package support for compiling rpq algorithm --- CMakeLists.txt | 4 ++-- cubool/CMakeLists.txt | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3ab3eb..da9e6d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_subdirectory(cubool) # Copy scripts into binary directory -file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) +# file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) # Copy python related stuff -file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) \ No newline at end of file +# file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 092504b..88591de 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -246,14 +246,14 @@ if (CUBOOL_BUILD_TESTS) endif() # Copy cubool library after build if allowed -if (CUBOOL_COPY_TO_PY_PACKAGE) - set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME}) - - add_custom_command( - TARGET cubool POST_BUILD - COMMAND "${CMAKE_COMMAND}" -E - copy - "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}" - "${CMAKE_BINARY_DIR}/python/pycubool" - COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder") -endif() \ No newline at end of file +# if (CUBOOL_COPY_TO_PY_PACKAGE) +# set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME}) +# +# add_custom_command( +# TARGET cubool POST_BUILD +# COMMAND "${CMAKE_COMMAND}" -E +# copy +# "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}" +# "${CMAKE_BINARY_DIR}/python/pycubool" +# COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder") +# endif() From f429420f50c6297171f8cd7c485e1a6379fef2d5 Mon Sep 17 00:00:00 2001 From: Mitya <8863314@gmail.com> Date: Tue, 5 Nov 2024 20:57:12 +0300 Subject: [PATCH 03/10] Fixed cubool tests --- cubool/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 88591de..406510c 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -219,7 +219,7 @@ endforeach() if (CUBOOL_WITH_CUDA) set_target_properties(cubool PROPERTIES CUDA_STANDARD 14) set_target_properties(cubool PROPERTIES CUDA_STANDARD_REQUIRED ON) - set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + # set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ From e434bb145970a835f9470fc4870f1295b669e80b Mon Sep 17 00:00:00 2001 From: Mitya <8863314@gmail.com> Date: Tue, 5 Nov 2024 22:41:40 +0300 Subject: [PATCH 04/10] Implemented applying inverted mask --- cubool/CMakeLists.txt | 5 + cubool/include/cubool/cubool.h | 26 ++++ cubool/sources/backend/matrix_base.hpp | 3 +- cubool/sources/core/matrix.cpp | 44 +++++- cubool/sources/core/matrix.hpp | 3 +- .../cuBool_Matrix_EWiseMultInverted.cpp | 44 ++++++ cubool/sources/cuda/cuda_matrix.hpp | 3 +- .../cuda/cuda_matrix_ewisemult_inverted.cu | 63 ++++++++ .../cuda/kernels/spewisemultinverted.cuh | 106 ++++++++++++++ .../sequential/sq_ewisemultinverted.cpp | 105 ++++++++++++++ .../sequential/sq_ewisemultinverted.hpp | 42 ++++++ cubool/sources/sequential/sq_matrix.cpp | 26 ++++ cubool/sources/sequential/sq_matrix.hpp | 1 + cubool/tests/CMakeLists.txt | 3 + .../tests/test_matrix_ewisemult_inverted.cpp | 134 ++++++++++++++++++ 15 files changed, 604 insertions(+), 4 deletions(-) create mode 100644 cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp create mode 100644 cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu create mode 100644 cubool/sources/cuda/kernels/spewisemultinverted.cuh create mode 100644 cubool/sources/sequential/sq_ewisemultinverted.cpp create mode 100644 cubool/sources/sequential/sq_ewisemultinverted.hpp create mode 100644 cubool/tests/test_matrix_ewisemult_inverted.cpp diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 406510c..cd80032 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -87,6 +87,7 @@ set(CUBOOL_C_API_SOURCES sources/cuBool_Matrix_Reduce2.cpp sources/cuBool_Matrix_EWiseAdd.cpp sources/cuBool_Matrix_EWiseMult.cpp + sources/cuBool_Matrix_EWiseMultInverted.cpp sources/cuBool_Vector_New.cpp sources/cuBool_Vector_Build.cpp sources/cuBool_Vector_SetElement.cpp @@ -125,6 +126,7 @@ if (CUBOOL_WITH_CUDA) sources/cuda/cuda_matrix.cu sources/cuda/cuda_matrix_ewiseadd.cu sources/cuda/cuda_matrix_ewisemult.cu + sources/cuda/cuda_matrix_ewisemult_inverted.cu sources/cuda/cuda_matrix_kronecker.cu sources/cuda/cuda_matrix_multiply.cu sources/cuda/cuda_matrix_transpose.cu @@ -147,6 +149,7 @@ if (CUBOOL_WITH_CUDA) sources/cuda/kernels/spgemv_t.cuh sources/cuda/kernels/spewiseadd.cuh sources/cuda/kernels/spewisemult.cuh + sources/cuda/kernels/spewisemultinverted.cuh sources/cuda/kernels/sptranspose.cuh sources/cuda/kernels/sptranspose2.cuh sources/cuda/kernels/spkron.cuh @@ -173,6 +176,8 @@ if (CUBOOL_WITH_SEQUENTIAL) sources/sequential/sq_ewiseadd.hpp sources/sequential/sq_ewisemult.cpp sources/sequential/sq_ewisemult.hpp + sources/sequential/sq_ewisemultinverted.cpp + sources/sequential/sq_ewisemultinverted.hpp sources/sequential/sq_spgemm.cpp sources/sequential/sq_spgemm.hpp sources/sequential/sq_spgemv.cpp diff --git a/cubool/include/cubool/cubool.h b/cubool/include/cubool/cubool.h index cd16bb3..ef7d9b5 100644 --- a/cubool/include/cubool/cubool.h +++ b/cubool/include/cubool/cubool.h @@ -925,4 +925,30 @@ CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Kronecker( cuBool_Hints hints ); +/** + * Performs result = left + ~right, where + * '+' is boolean semiring 'or' operation + * '~' is operation for invert matrix (0 swaps to 1 and 1 to 0) + * + * @note Matrices must be compatible + * dim(result) = M x N + * dim(left) = M x N + * dim(right) = M x N + * + * @note Pass `CUBOOL_HINT_TIME_CHECK` hint to measure operation time + * + * @param result[out] Destination matrix to store result + * @param left Source matrix to be added + * @param right Source matrix to be inverted and added + * @param hints Hints for the operation + * + * @return Error code on this operation + */ +CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted( + cuBool_Matrix result, + cuBool_Matrix matrix, + cuBool_Matrix mask, + cuBool_Hints hints +); + #endif //CUBOOL_CUBOOL_H diff --git a/cubool/sources/backend/matrix_base.hpp b/cubool/sources/backend/matrix_base.hpp index 5332031..8842eb9 100644 --- a/cubool/sources/backend/matrix_base.hpp +++ b/cubool/sources/backend/matrix_base.hpp @@ -49,6 +49,7 @@ namespace cubool { virtual void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; virtual void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; virtual void eWiseMult(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0; + virtual void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) = 0; virtual index getNrows() const = 0; virtual index getNcols() const = 0; @@ -59,4 +60,4 @@ namespace cubool { } -#endif //CUBOOL_MATRIX_BASE_HPP \ No newline at end of file +#endif //CUBOOL_MATRIX_BASE_HPP diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp index 571b010..730fe63 100644 --- a/cubool/sources/core/matrix.cpp +++ b/cubool/sources/core/matrix.cpp @@ -349,6 +349,48 @@ namespace cubool { mHnd->eWiseMult(*a->mHnd, *b->mHnd, false); } + void Matrix::eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) { + + const auto* mat = dynamic_cast(&matrix); + const auto* msk = dynamic_cast(&mask); + + CHECK_RAISE_ERROR(mat != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class"); + CHECK_RAISE_ERROR(msk != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class"); + + index M = mat->getNrows(); + index N = msk->getNcols(); + + CHECK_RAISE_ERROR(M == msk->getNrows(), InvalidArgument, "Passed matrices have incompatible size"); + CHECK_RAISE_ERROR(N == msk->getNcols(), InvalidArgument, "Passed matrices have incompatible size"); + + CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Matrix has incompatible size for operation result"); + CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result"); + + mat->commitCache(); + msk->commitCache(); + this->releaseCache(); + + /* + if (checkTime) { + TIMER_ACTION(timer, mHnd->eWiseMult(*a->mHnd, *b->mHnd, false)); + + LogStream stream(*Library::getLogger()); + stream << Logger::Level::Info + << "Time: " << timer.getElapsedTimeMs() << " ms " + << "Matrix::eWiseMult: " + << this->getDebugMarker() << " = " + << a->getDebugMarker() << " + " + << b->getDebugMarker() << LogStream::cmt; + + return; + } + */ + + mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false); + } + + + index Matrix::getNrows() const { return mHnd->getNrows(); } @@ -396,4 +438,4 @@ namespace cubool { // Clear arrays releaseCache(); } -} \ No newline at end of file +} diff --git a/cubool/sources/core/matrix.hpp b/cubool/sources/core/matrix.hpp index 20cb890..6c4d38b 100644 --- a/cubool/sources/core/matrix.hpp +++ b/cubool/sources/core/matrix.hpp @@ -56,6 +56,7 @@ namespace cubool { void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) override; index getNrows() const override; index getNcols() const override; @@ -77,4 +78,4 @@ namespace cubool { } -#endif //CUBOOL_MATRIX_HPP \ No newline at end of file +#endif //CUBOOL_MATRIX_HPP diff --git a/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp new file mode 100644 index 0000000..37fe607 --- /dev/null +++ b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp @@ -0,0 +1,44 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include + +CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted( + cuBool_Matrix result, + cuBool_Matrix matrix, + cuBool_Matrix mask, + cuBool_Hints hints +) { + CUBOOL_BEGIN_BODY + CUBOOL_VALIDATE_LIBRARY + CUBOOL_ARG_NOT_NULL(result) + CUBOOL_ARG_NOT_NULL(matrix) + CUBOOL_ARG_NOT_NULL(mask) + auto resultM = (cubool::Matrix *) result; + auto matrixM = (cubool::Matrix *) matrix; + auto maskM = (cubool::Matrix *) mask; + + resultM->eWiseMultInverted(*matrixM, *maskM, hints & CUBOOL_HINT_TIME_CHECK); + CUBOOL_END_BODY +} diff --git a/cubool/sources/cuda/cuda_matrix.hpp b/cubool/sources/cuda/cuda_matrix.hpp index e19fec1..9f1b7a2 100644 --- a/cubool/sources/cuda/cuda_matrix.hpp +++ b/cubool/sources/cuda/cuda_matrix.hpp @@ -56,6 +56,7 @@ namespace cubool { void kronecker(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; void eWiseAdd(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; index getNrows() const override; index getNcols() const override; @@ -79,4 +80,4 @@ namespace cubool { }; }; -#endif //CUBOOL_CUDA_MATRIX_HPP \ No newline at end of file +#endif //CUBOOL_CUDA_MATRIX_HPP diff --git a/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu new file mode 100644 index 0000000..2e0fa87 --- /dev/null +++ b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu @@ -0,0 +1,63 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include +#include + +namespace cubool { + + void CudaMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) { + auto a = dynamic_cast(&aBase); + auto b = dynamic_cast(&bBase); + + CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class"); + CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class"); + + index M = this->getNrows(); + index N = this->getNcols(); + + + assert(a->getNrows() == M); + assert(a->getNcols() == N); + + assert(b->getNrows() == M); + assert(b->getNcols() == N); + + if (a->isMatrixEmpty() || b->isMatrixEmpty()) { + this->clearAndResizeStorageToDim(); + return; + } + + // Ensure csr proper csr format even if empty + a->resizeStorageToDim(); + b->resizeStorageToDim(); + + kernels::SpVectorEWiseMultInverted> spFunctor; + auto result = spFunctor(a->mMatrixImpl, b->mMatrixImpl); + + // Assign the actual impl result to this storage + this->mMatrixImpl = std::move(result); + } + +} diff --git a/cubool/sources/cuda/kernels/spewisemultinverted.cuh b/cubool/sources/cuda/kernels/spewisemultinverted.cuh new file mode 100644 index 0000000..353bc97 --- /dev/null +++ b/cubool/sources/cuda/kernels/spewisemultinverted.cuh @@ -0,0 +1,106 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef CUBOOL_SPEWISEMULT_CUH +#define CUBOOL_SPEWISEMULT_CUH + +#include +#include +#include +#include + +#include + +namespace cubool { + namespace kernels { + + template + struct SpVectorEWiseMultInverted { + template + using ContainerType = thrust::device_vector::other>; + using MatrixType = nsparse::matrix; + using LargeIndexType = unsigned long; + + static_assert(sizeof(LargeIndexType) > sizeof(IndexType), "Values intersection index must be larger"); + + static void fillIndices(const MatrixType& m, ContainerType& out) { + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(m.m_vals), + [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(), + outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) { + LargeIndexType row = findNearestRowIdx(valueId, nrows, rowOffset); + LargeIndexType col = colIndex[valueId]; + LargeIndexType index = row * ncols + col; + outIndices[valueId] = index; + }); + } + + MatrixType operator()(const MatrixType& a, const MatrixType& b) { + auto aNvals = a.m_vals; + auto bNvals = b.m_vals; + auto worst = aNvals; + + // Allocate memory for the worst case scenario + ContainerType inputA(aNvals); + ContainerType inputB(bNvals); + + fillIndices(a, inputA); + fillIndices(b, inputB); + + ContainerType intersected(worst); + + auto out = thrust::set_difference(inputA.begin(), inputA.end(), + inputB.begin(), inputB.end(), + intersected.begin()); + + // Count result nvals count + auto nvals = thrust::distance(intersected.begin(), out); + + ContainerType rowOffsetTmp(a.m_rows + 1); + ContainerType colIndex(nvals); + + thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0); + + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(nvals), + [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(), + nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) { + LargeIndexType i = intersected[valueId]; + LargeIndexType row = i / ncols; + LargeIndexType col = i % ncols; + atomicAdd((rowOffset + row).get(), 1); + colIndex[valueId] = (IndexType) col; + }); + + ContainerType rowOffset(a.m_rows + 1); + thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus()); + + assert(nvals == rowOffset.back()); + + return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals); + } + }; + + } +} + +#endif //CUBOOL_SPEWISEMULT_CUH diff --git a/cubool/sources/sequential/sq_ewisemultinverted.cpp b/cubool/sources/sequential/sq_ewisemultinverted.cpp new file mode 100644 index 0000000..ee810c9 --- /dev/null +++ b/cubool/sources/sequential/sq_ewisemultinverted.cpp @@ -0,0 +1,105 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include +#include + +namespace cubool { + + void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out) { + out.rowOffsets.resize(a.nrows + 1, 0); + + size_t nvals = 0; + + // Count nnz of the result matrix to allocate memory + for (index i = 0; i < a.nrows; i++) { + index ak = a.rowOffsets[i]; + index bk = b.rowOffsets[i]; + index asize = a.rowOffsets[i + 1] - ak; + index bsize = b.rowOffsets[i + 1] - bk; + + const index* ar = &a.colIndices[ak]; + const index* br = &b.colIndices[bk]; + const index* arend = ar + asize; + const index* brend = br + bsize; + + index nvalsInRow = 0; + + while (ar != arend && br != brend) { + if (*ar == *br) { + ar++; + br++; + } + else if (*ar < *br) { + nvalsInRow++; + ar++; + } + else { + br++; + } + } + nvalsInRow += arend - ar; + + nvals += nvalsInRow; + out.rowOffsets[i] = nvalsInRow; + } + + // Eval row offsets + exclusive_scan(out.rowOffsets.begin(), out.rowOffsets.end(), 0); + + // Allocate memory for values + out.nvals = nvals; + out.colIndices.resize(nvals); + + // Fill sorted column indices + size_t k = 0; + for (index i = 0; i < a.nrows; i++) { + const index* ar = &a.colIndices[a.rowOffsets[i]]; + const index* br = &b.colIndices[b.rowOffsets[i]]; + const index* arend = &a.colIndices[a.rowOffsets[i + 1]]; + const index* brend = &b.colIndices[b.rowOffsets[i + 1]]; + + while (ar != arend && br != brend) { + if (*ar == *br) { + ar++; + br++; + } + else if (*ar < *br) { + out.colIndices[k] = *ar; + k++; + ar++; + } + else { + br++; + } + } + + while (ar != arend) { + out.colIndices[k] = *ar; + k++; + ar++; + } + } + } +} diff --git a/cubool/sources/sequential/sq_ewisemultinverted.hpp b/cubool/sources/sequential/sq_ewisemultinverted.hpp new file mode 100644 index 0000000..99b2615 --- /dev/null +++ b/cubool/sources/sequential/sq_ewisemultinverted.hpp @@ -0,0 +1,42 @@ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2020, 2021 JetBrains-Research */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef CUBOOL_SQ_EWISEMULTINVERTED_HPP +#define CUBOOL_SQ_EWISEMULTINVERTED_HPP + +#include + +namespace cubool { + + /** + * Element-wise multiplication of the matrices `a` and inverted `b`. + * + * @param a Input matrix + * @param b Input matrix, which will be inverted before multiplication + * @param[out] out Where to store the result + */ + void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out); +} + +#endif //CUBOOL_SQ_EWISEMULTINVERTED_HPP diff --git a/cubool/sources/sequential/sq_matrix.cpp b/cubool/sources/sequential/sq_matrix.cpp index df96e4e..00473f1 100644 --- a/cubool/sources/sequential/sq_matrix.cpp +++ b/cubool/sources/sequential/sq_matrix.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,31 @@ namespace cubool { this->mData = std::move(out); } + void SqMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) { + auto a = dynamic_cast(&aBase); + auto b = dynamic_cast(&bBase); + + CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class"); + CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class"); + + assert(a->getNrows() == this->getNrows()); + assert(a->getNcols() == this->getNcols()); + assert(a->getNrows() == b->getNrows()); + assert(a->getNcols() == b->getNcols()); + + CsrData out; + out.nrows = this->getNrows(); + out.ncols = this->getNcols(); + + a->allocateStorage(); + b->allocateStorage(); + sq_ewisemultinverted(a->mData, b->mData, out); + + this->mData = std::move(out); + } + + + index SqMatrix::getNrows() const { return mData.nrows; } diff --git a/cubool/sources/sequential/sq_matrix.hpp b/cubool/sources/sequential/sq_matrix.hpp index 1a3f539..d348232 100644 --- a/cubool/sources/sequential/sq_matrix.hpp +++ b/cubool/sources/sequential/sq_matrix.hpp @@ -51,6 +51,7 @@ namespace cubool { void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override; void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; + void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override; index getNrows() const override; index getNcols() const override; diff --git a/cubool/tests/CMakeLists.txt b/cubool/tests/CMakeLists.txt index 0e0f620..84ddc96 100644 --- a/cubool/tests/CMakeLists.txt +++ b/cubool/tests/CMakeLists.txt @@ -34,6 +34,9 @@ target_link_libraries(test_matrix_ewiseadd PUBLIC testing) add_executable(test_matrix_ewisemult test_matrix_ewisemult.cpp) target_link_libraries(test_matrix_ewisemult PUBLIC testing) +add_executable(test_matrix_ewisemult_inverted test_matrix_ewisemult_inverted.cpp) +target_link_libraries(test_matrix_ewisemult_inverted PUBLIC testing) + add_executable(test_vector_misc test_vector_misc.cpp) target_link_libraries(test_vector_misc PUBLIC testing) diff --git a/cubool/tests/test_matrix_ewisemult_inverted.cpp b/cubool/tests/test_matrix_ewisemult_inverted.cpp new file mode 100644 index 0000000..2e32520 --- /dev/null +++ b/cubool/tests/test_matrix_ewisemult_inverted.cpp @@ -0,0 +1,134 @@ +#include "cubool/cubool.h" +#include +#include + +using DataMatrix = std::vector>; + +static void printTestingMatrix(const testing::Matrix &matrix, std::string name = "") { + if (name != "") { + std::cout << name << std::endl; + } + + for (int i = 0; i < matrix.nvals; i++) { + printf("(%d, %d)\n", matrix.rowsIndex[i], matrix.colsIndex[i]); + } +} + +static void printCuboolMatrix(cuBool_Matrix matrix, std::string name = "") { + if (name != "") { + std::cout << name << std::endl; + } + + cuBool_Index nvals; + cuBool_Matrix_Nvals(matrix, &nvals); + std::vector rows(nvals), cols(nvals); + cuBool_Matrix_ExtractPairs(matrix, rows.data(), cols.data(), &nvals); + + for (int i = 0; i < nvals; i++) { + printf("(%d, %d)\n", rows[i], cols[i]); + } +} + +void testApplyNotMask(const DataMatrix &matrix_data, const DataMatrix &mask_data) { + cuBool_Index nrows, ncols; + nrows = matrix_data.size(); + ncols = matrix_data[0].size(); + testing::Matrix test_matrix = testing::Matrix::generatet(nrows, ncols, + [&matrix_data](cuBool_Index i, cuBool_Index j) { return matrix_data[i][j]; }); + nrows = mask_data.size(); + ncols = mask_data[0].size(); + testing::Matrix test_mask = testing::Matrix::generatet(nrows, ncols, + [&mask_data](cuBool_Index i, cuBool_Index j) { return mask_data[i][j]; }); + + cuBool_Matrix matrix, mask, result; + ASSERT_EQ(cuBool_Matrix_New(&matrix, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_New(&mask, test_mask.nrows, test_mask.ncols), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_New(&result, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS); + + ASSERT_EQ(cuBool_Matrix_Build(matrix, test_matrix.rowsIndex.data(), test_matrix.colsIndex.data(), test_matrix.nvals, + CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS); + ASSERT_EQ(cuBool_Matrix_Build(mask, test_mask.rowsIndex.data(), test_mask.colsIndex.data(), test_mask.nvals, + CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS); + + cuBool_Matrix_EWiseMulInverted(result, matrix, mask, CUBOOL_HINT_NO); + + // validate value of algorithm + cuBool_Index nvals; + cuBool_Matrix_Nvals(result, &nvals); + std::vector rows(nvals), cols(nvals); + cuBool_Matrix_ExtractPairs(result, rows.data(), cols.data(), &nvals); + + cuBool_Matrix_Free(matrix); + cuBool_Matrix_Free(mask); + cuBool_Matrix_Free(result); + + auto mask_data_inverted = mask_data; + for (auto &row : mask_data_inverted) { + for (int &value : row) { + value = !value; + } + } + + std::vector result_data(matrix_data.size(), std::vector(matrix_data[0].size(), 0)); + for (int i = 0; i < nvals; i++) { + result_data[rows[i]][cols[i]] = 1; + } + + for (int i = 0; i < matrix_data.size(); i++) { + for (int j = 0; j < matrix_data[0].size(); j++) { + ASSERT_EQ(matrix_data[i][j] * mask_data_inverted[i][j], result_data[i][j]); + } + } + +} + +TEST(cuBool_Matrix, ApplyMatrix) { + ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS); + + DataMatrix matrix { + {1, 0, 0}, + {0, 0, 0}, + {0, 1, 0}, + }; + + DataMatrix mask { + {0, 1, 1}, + {1, 0, 1}, + {0, 1, 1}, + }; + // iverted is + // 1 0 0 + // 0 1 0 + // 1 0 0 + // matrix & ~mask must have (0, 0) + + testApplyNotMask(matrix, mask); + + ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS); +} + +TEST(cuBool_Matrix, ApplyMatrixRandom) { + ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS); + + for (int i = 0; i < 102; i++) { + int n = rand() % 10 + 1; + int m = rand() % 10 + 1; + + DataMatrix matrix(n, std::vector(m, 0)); + DataMatrix mask(n, std::vector(m, 0)); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < m; j++) { + matrix[i][j] = rand() & 1; + mask[i][j] = rand() & 1; + } + } + + testApplyNotMask(matrix, mask); + } + + ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS); +} + + +CUBOOL_GTEST_MAIN From 2f5daace52f8ff9aab55924e2612fa62aedd1c76 Mon Sep 17 00:00:00 2001 From: Mitya <8863314@gmail.com> Date: Sun, 24 Nov 2024 23:49:19 +0300 Subject: [PATCH 05/10] Fixed mistake in matrix ewise add --- cubool/sources/cuda/kernels/spmerge.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh index 8a14f8d..7a5b62d 100644 --- a/cubool/sources/cuda/kernels/spmerge.cuh +++ b/cubool/sources/cuda/kernels/spmerge.cuh @@ -53,7 +53,7 @@ namespace cubool { assert(a.m_cols == b.m_cols); IndexType rows = a.m_rows; - IndexType cols = a.m_rows; + IndexType cols = a.m_cols; constexpr auto config_merge = make_bin_seq< From 99f1d8ef722a7b88be8fd61b15456f5658799d5e Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Sun, 16 Feb 2025 23:43:32 +0300 Subject: [PATCH 06/10] Fixed errors in cmake: incorrect compile options for NVCC --- cubool/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index cd80032..451efd0 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -206,11 +206,7 @@ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MAJOR=${CUBOOL_VERSION_ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MINOR=${CUBOOL_VERSION_MINOR}) target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_SUB=${CUBOOL_VERSION_SUB}) -target_compile_features(cubool PUBLIC cxx_std_14) - -target_compile_options(cubool PRIVATE $<$: -Wall>) -target_compile_options(cubool PRIVATE $<$,$>: -O2>) -target_compile_options(cubool PRIVATE $<$,$>: -O0>) +target_compile_features(cubool PUBLIC cxx_std_17) set_target_properties(cubool PROPERTIES CXX_STANDARD 17) set_target_properties(cubool PROPERTIES CXX_STANDARD_REQUIRED ON) From b40486326dbd9bacb763afbf4956e23ecc61f9d3 Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Wed, 19 Feb 2025 00:01:52 +0300 Subject: [PATCH 07/10] Added option for using matrix addition optimization from nsparse --- CMakeLists.txt | 2 + cubool/CMakeLists.txt | 2 + cubool/sources/cuda/kernels/spmerge.cuh | 71 +++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index da9e6d9..a432980 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,8 @@ option(CUBOOL_WITH_NAIVE "Build library with naive and naive-shared dens option(CUBOOL_BUILD_TESTS "Build project unit-tests with gtest" ON) option(CUBOOL_COPY_TO_PY_PACKAGE "Copy compiled shared library into python package folder (for package use purposes)" ON) +option(CUBOOL_USE_NSPARSE_MERGE_FUNCTOR "Use nsparse optimiztion for matrix addition function" ON) + set(CUBOOL_VERSION_MAJOR 1) set(CUBOOL_VERSION_MINOR 0) set(CUBOOL_VERSION_SUB 0) diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 451efd0..5434c02 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -206,6 +206,8 @@ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MAJOR=${CUBOOL_VERSION_ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MINOR=${CUBOOL_VERSION_MINOR}) target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_SUB=${CUBOOL_VERSION_SUB}) +target_compile_definitions(cubool PRIVATE CUBOOL_USE_NSPARSE_MERGE_FUNCTOR=$) + target_compile_features(cubool PUBLIC cxx_std_17) set_target_properties(cubool PROPERTIES CXX_STANDARD 17) diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh index 7a5b62d..96805cb 100644 --- a/cubool/sources/cuda/kernels/spmerge.cuh +++ b/cubool/sources/cuda/kernels/spmerge.cuh @@ -29,9 +29,13 @@ #include #include +#include +#include + namespace cubool { namespace kernels { +#if CUBOOL_USE_NSPARSE_MERGE_FUNCTOR template class SpMergeFunctor { public: @@ -77,6 +81,73 @@ namespace cubool { private: nsparse::unique_merge_functor_t uniqueMergeFunctor; }; +#else + template + class SpMergeFunctor { + public: + template + using ContainerType = thrust::device_vector::other>; + using MatrixType = nsparse::matrix; + using LargeIndexType = unsigned long; + + static void fillIndices(const MatrixType& m, ContainerType& out) { + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(m.m_vals), + [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(), + outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) { + LargeIndexType row = findNearestRowIdx(valueId, nrows, rowOffset); + LargeIndexType col = colIndex[valueId]; + LargeIndexType index = row * ncols + col; + outIndices[valueId] = index; + }); + } + + MatrixType operator()(const MatrixType& a, const MatrixType& b) { + auto aNvals = a.m_vals; + auto bNvals = b.m_vals; + // auto worst = std::min(aNvals, bNvals); + auto worst = aNvals + bNvals; + + // Allocate memory for the worst case scenario + ContainerType inputA(aNvals); + ContainerType inputB(bNvals); + + fillIndices(a, inputA); + fillIndices(b, inputB); + + ContainerType intersected(worst); + + auto out = thrust::set_union(inputA.begin(), inputA.end(), + inputB.begin(), inputB.end(), + intersected.begin()); + + // Count result nvals count + auto nvals = thrust::distance(intersected.begin(), out); + + ContainerType rowOffsetTmp(a.m_rows + 1); + ContainerType colIndex(nvals); + + thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0); + + thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(nvals), + [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(), + nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) { + LargeIndexType i = intersected[valueId]; + LargeIndexType row = i / ncols; + LargeIndexType col = i % ncols; + atomicAdd((rowOffset + row).get(), 1); + colIndex[valueId] = (IndexType) col; + }); + + ContainerType rowOffset(a.m_rows + 1); + thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus()); + + assert(nvals == rowOffset.back()); + + return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals); + } + }; + +#endif } } From f81ccea03657b19641776d4bdbe215c653c36cd4 Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Sat, 19 Apr 2025 22:48:49 +0300 Subject: [PATCH 08/10] Updated gtest version to last release for compability with last cmake version --- deps/gtest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/gtest b/deps/gtest index d9c309f..6910c9d 160000 --- a/deps/gtest +++ b/deps/gtest @@ -1 +1 @@ -Subproject commit d9c309fdab807b716c2cf4d4a42989b8c34f712a +Subproject commit 6910c9d9165801d8827d628cb72eb7ea9dd538c5 From da6be345f8a3a0962887e7f1f934adc9b07058bb Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Sat, 3 May 2025 15:49:38 +0300 Subject: [PATCH 09/10] Restored python package build --- CMakeLists.txt | 12 ++---------- cubool/CMakeLists.txt | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a432980..a6404f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,14 +34,6 @@ endif() # Configure cuda dependencies if (CUBOOL_WITH_CUDA) - message(STATUS "Add cub as cuda utility") - # set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE) - # set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE) - # set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE) - # add_subdirectory(deps/cub) - # add_library(cub INTERFACE IMPORTED) - # target_link_libraries(cub INTERFACE CUB::CUB) - message(STATUS "Add nsparse library as crs matrix multiplication backend") add_subdirectory(deps/nsparse-um) endif() @@ -60,7 +52,7 @@ endif() add_subdirectory(cubool) # Copy scripts into binary directory -# file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) +file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) # Copy python related stuff -# file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) +file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt index 5434c02..ed3cb41 100644 --- a/cubool/CMakeLists.txt +++ b/cubool/CMakeLists.txt @@ -222,7 +222,6 @@ endforeach() if (CUBOOL_WITH_CUDA) set_target_properties(cubool PROPERTIES CUDA_STANDARD 14) set_target_properties(cubool PROPERTIES CUDA_STANDARD_REQUIRED ON) - # set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ @@ -249,14 +248,14 @@ if (CUBOOL_BUILD_TESTS) endif() # Copy cubool library after build if allowed -# if (CUBOOL_COPY_TO_PY_PACKAGE) -# set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME}) -# -# add_custom_command( -# TARGET cubool POST_BUILD -# COMMAND "${CMAKE_COMMAND}" -E -# copy -# "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}" -# "${CMAKE_BINARY_DIR}/python/pycubool" -# COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder") -# endif() +if (CUBOOL_COPY_TO_PY_PACKAGE) + set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME}) + + add_custom_command( + TARGET cubool POST_BUILD + COMMAND "${CMAKE_COMMAND}" -E + copy + "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}" + "${CMAKE_BINARY_DIR}/python/pycubool" + COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder") +endif() From 69d38a67d74cc032ab9c313721f40cba2f79125c Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Sat, 3 May 2025 17:07:42 +0300 Subject: [PATCH 10/10] Removed temporary solutions for preparing PR --- cubool/include/cubool/cubool.h | 14 +++++------ cubool/sources/core/matrix.cpp | 10 +++----- cubool/sources/cuda/kernels/spmerge.cuh | 1 - .../tests/test_matrix_ewisemult_inverted.cpp | 25 ------------------- deps/nsparse-um/CMakeLists.txt | 1 - .../include/nsparse/unified_allocator.h | 1 - deps/nsparse-um/test/CMakeLists.txt | 1 - deps/nsparse/test/CMakeLists.txt | 1 - 8 files changed, 11 insertions(+), 43 deletions(-) diff --git a/cubool/include/cubool/cubool.h b/cubool/include/cubool/cubool.h index ef7d9b5..2859542 100644 --- a/cubool/include/cubool/cubool.h +++ b/cubool/include/cubool/cubool.h @@ -926,20 +926,20 @@ CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Kronecker( ); /** - * Performs result = left + ~right, where - * '+' is boolean semiring 'or' operation + * Performs result = left * ~right, where + * '*' is boolean semiring 'and' operation * '~' is operation for invert matrix (0 swaps to 1 and 1 to 0) * - * @note Matrices must be compatible + * @note To perform this operation matrices must be compatible + * dim(left) = M x T + * dim(right) = T x N * dim(result) = M x N - * dim(left) = M x N - * dim(right) = M x N * * @note Pass `CUBOOL_HINT_TIME_CHECK` hint to measure operation time * * @param result[out] Destination matrix to store result - * @param left Source matrix to be added - * @param right Source matrix to be inverted and added + * @param left Source matrix to be multiplied + * @param right Source matrix to be inverted and multiplied * @param hints Hints for the operation * * @return Error code on this operation diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp index 730fe63..49e35c3 100644 --- a/cubool/sources/core/matrix.cpp +++ b/cubool/sources/core/matrix.cpp @@ -370,21 +370,19 @@ namespace cubool { msk->commitCache(); this->releaseCache(); - /* if (checkTime) { - TIMER_ACTION(timer, mHnd->eWiseMult(*a->mHnd, *b->mHnd, false)); + TIMER_ACTION(timer, mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false)); LogStream stream(*Library::getLogger()); stream << Logger::Level::Info << "Time: " << timer.getElapsedTimeMs() << " ms " - << "Matrix::eWiseMult: " + << "Matrix::eWiseMultInverted: " << this->getDebugMarker() << " = " - << a->getDebugMarker() << " + " - << b->getDebugMarker() << LogStream::cmt; + << mat->getDebugMarker() << " + " + << msk->getDebugMarker() << LogStream::cmt; return; } - */ mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false); } diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh index 96805cb..4734e35 100644 --- a/cubool/sources/cuda/kernels/spmerge.cuh +++ b/cubool/sources/cuda/kernels/spmerge.cuh @@ -104,7 +104,6 @@ namespace cubool { MatrixType operator()(const MatrixType& a, const MatrixType& b) { auto aNvals = a.m_vals; auto bNvals = b.m_vals; - // auto worst = std::min(aNvals, bNvals); auto worst = aNvals + bNvals; // Allocate memory for the worst case scenario diff --git a/cubool/tests/test_matrix_ewisemult_inverted.cpp b/cubool/tests/test_matrix_ewisemult_inverted.cpp index 2e32520..250ba55 100644 --- a/cubool/tests/test_matrix_ewisemult_inverted.cpp +++ b/cubool/tests/test_matrix_ewisemult_inverted.cpp @@ -4,31 +4,6 @@ using DataMatrix = std::vector>; -static void printTestingMatrix(const testing::Matrix &matrix, std::string name = "") { - if (name != "") { - std::cout << name << std::endl; - } - - for (int i = 0; i < matrix.nvals; i++) { - printf("(%d, %d)\n", matrix.rowsIndex[i], matrix.colsIndex[i]); - } -} - -static void printCuboolMatrix(cuBool_Matrix matrix, std::string name = "") { - if (name != "") { - std::cout << name << std::endl; - } - - cuBool_Index nvals; - cuBool_Matrix_Nvals(matrix, &nvals); - std::vector rows(nvals), cols(nvals); - cuBool_Matrix_ExtractPairs(matrix, rows.data(), cols.data(), &nvals); - - for (int i = 0; i < nvals; i++) { - printf("(%d, %d)\n", rows[i], cols[i]); - } -} - void testApplyNotMask(const DataMatrix &matrix_data, const DataMatrix &mask_data) { cuBool_Index nrows, ncols; nrows = matrix_data.size(); diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt index cdc4463..91ed084 100644 --- a/deps/nsparse-um/CMakeLists.txt +++ b/deps/nsparse-um/CMakeLists.txt @@ -3,7 +3,6 @@ project(nsparse_um LANGUAGES CXX CUDA) add_library(nsparse_um INTERFACE) target_include_directories(nsparse_um INTERFACE include/) -# target_link_libraries(nsparse_um INTERFACE cub) target_link_libraries(nsparse_um INTERFACE) target_compile_options(nsparse_um INTERFACE $<$: --expt-relaxed-constexpr --expt-extended-lambda>) diff --git a/deps/nsparse-um/include/nsparse/unified_allocator.h b/deps/nsparse-um/include/nsparse/unified_allocator.h index c15f19e..9c3b2f8 100644 --- a/deps/nsparse-um/include/nsparse/unified_allocator.h +++ b/deps/nsparse-um/include/nsparse/unified_allocator.h @@ -3,7 +3,6 @@ #include #include #include -// #include #include #include diff --git a/deps/nsparse-um/test/CMakeLists.txt b/deps/nsparse-um/test/CMakeLists.txt index 6a29690..6a1aaff 100644 --- a/deps/nsparse-um/test/CMakeLists.txt +++ b/deps/nsparse-um/test/CMakeLists.txt @@ -3,7 +3,6 @@ project(nsparse_um_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub) target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14) diff --git a/deps/nsparse/test/CMakeLists.txt b/deps/nsparse/test/CMakeLists.txt index 9927725..5abd415 100644 --- a/deps/nsparse/test/CMakeLists.txt +++ b/deps/nsparse/test/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.11) project(nsparse_test CXX CUDA) add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp) -# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub) target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)