From ead088f20ef52c77f517a9540b876ec8abc963ca Mon Sep 17 00:00:00 2001
From: Mitya <8863314@gmail.com>
Date: Tue, 22 Oct 2024 12:24:56 +0300
Subject: [PATCH 01/10] Compiled cuBool and run tests with CUDA 12.6

---
 .gitmodules                                         |  3 ---
 CMakeLists.txt                                      | 12 ++++++------
 cubool/sources/cuda/cuda_vector.cu                  |  2 ++
 cubool/sources/cuda/kernels/spewiseadd.cuh          |  2 ++
 cubool/sources/cuda/kernels/spewisemult.cuh         |  2 ++
 cubool/sources/cuda/kernels/spgemv.cuh              |  2 ++
 cubool/sources/cuda/kernels/spgemv_t.cuh            |  2 ++
 cubool/sources/cuda/kernels/spreduce.cuh            |  2 ++
 cubool/sources/cuda/kernels/sptranspose2.cuh        |  2 ++
 cubool/sources/utils/data_utils.hpp                 |  1 +
 deps/cub                                            |  1 -
 deps/nsparse-um/CMakeLists.txt                      |  3 ++-
 deps/nsparse-um/include/nsparse/unified_allocator.h |  2 +-
 deps/nsparse-um/test/CMakeLists.txt                 |  3 ++-
 deps/nsparse/test/CMakeLists.txt                    |  3 ++-
 15 files changed, 28 insertions(+), 14 deletions(-)
 delete mode 160000 deps/cub
diff --git a/.gitmodules b/.gitmodules
index 3adb3ec..f98d82d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "deps/gtest"]
 	path = deps/gtest
 	url = https://github.com/google/googletest.git
-[submodule "deps/cub"]
-	path = deps/cub
-	url = https://github.com/NVIDIA/cub.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fb9212a..e3ab3eb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,12 +33,12 @@ endif()
 # Configure cuda dependencies
 if (CUBOOL_WITH_CUDA)
     message(STATUS "Add cub as cuda utility")
-    set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE)
-    set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
-    set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
-    add_subdirectory(deps/cub)
-    add_library(cub INTERFACE IMPORTED)
-    target_link_libraries(cub INTERFACE CUB::CUB)
+    # set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE)
+    # set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
+    # set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
+    # add_subdirectory(deps/cub)
+    # add_library(cub INTERFACE IMPORTED)
+    # target_link_libraries(cub INTERFACE CUB::CUB)
 
     message(STATUS "Add nsparse library as crs matrix multiplication backend")
     add_subdirectory(deps/nsparse-um)
diff --git a/cubool/sources/cuda/cuda_vector.cu b/cubool/sources/cuda/cuda_vector.cu
index ced56a0..f2fa9b0 100644
--- a/cubool/sources/cuda/cuda_vector.cu
+++ b/cubool/sources/cuda/cuda_vector.cu
@@ -29,6 +29,8 @@
 #include <utils/data_utils.hpp>
 #include <limits>
 
+#include <thrust/sort.h>
+
 namespace cubool {
 
     CudaVector::CudaVector(size_t nrows, CudaInstance &instance)
diff --git a/cubool/sources/cuda/kernels/spewiseadd.cuh b/cubool/sources/cuda/kernels/spewiseadd.cuh
index f6ad892..d1e1e87 100644
--- a/cubool/sources/cuda/kernels/spewiseadd.cuh
+++ b/cubool/sources/cuda/kernels/spewiseadd.cuh
@@ -27,6 +27,8 @@
 
 #include <cuda/details/sp_vector.hpp>
 
+#include <thrust/set_operations.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/cuda/kernels/spewisemult.cuh b/cubool/sources/cuda/kernels/spewisemult.cuh
index a4705de..6d17a46 100644
--- a/cubool/sources/cuda/kernels/spewisemult.cuh
+++ b/cubool/sources/cuda/kernels/spewisemult.cuh
@@ -30,6 +30,8 @@
 #include <nsparse/matrix.h>
 #include <cmath>
 
+#include <thrust/set_operations.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/cuda/kernels/spgemv.cuh b/cubool/sources/cuda/kernels/spgemv.cuh
index 7340174..818ff62 100644
--- a/cubool/sources/cuda/kernels/spgemv.cuh
+++ b/cubool/sources/cuda/kernels/spgemv.cuh
@@ -32,6 +32,8 @@
 #include <nsparse/detail/meta.h>
 #include <limits>
 
+#include <thrust/sort.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/cuda/kernels/spgemv_t.cuh b/cubool/sources/cuda/kernels/spgemv_t.cuh
index 6225d71..2a5db83 100644
--- a/cubool/sources/cuda/kernels/spgemv_t.cuh
+++ b/cubool/sources/cuda/kernels/spgemv_t.cuh
@@ -32,6 +32,8 @@
 #include <nsparse/detail/meta.h>
 #include <limits>
 
+#include <thrust/sort.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/cuda/kernels/spreduce.cuh b/cubool/sources/cuda/kernels/spreduce.cuh
index 3a15bb1..cfcf398 100644
--- a/cubool/sources/cuda/kernels/spreduce.cuh
+++ b/cubool/sources/cuda/kernels/spreduce.cuh
@@ -30,6 +30,8 @@
 #include <thrust/device_vector.h>
 #include <nsparse/matrix.h>
 
+#include <thrust/sort.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/cuda/kernels/sptranspose2.cuh b/cubool/sources/cuda/kernels/sptranspose2.cuh
index 8c50e45..c686818 100644
--- a/cubool/sources/cuda/kernels/sptranspose2.cuh
+++ b/cubool/sources/cuda/kernels/sptranspose2.cuh
@@ -29,6 +29,8 @@
 #include <thrust/device_vector.h>
 #include <nsparse/matrix.h>
 
+#include <thrust/sort.h>
+
 namespace cubool {
     namespace kernels {
 
diff --git a/cubool/sources/utils/data_utils.hpp b/cubool/sources/utils/data_utils.hpp
index 2356382..b7863b3 100644
--- a/cubool/sources/utils/data_utils.hpp
+++ b/cubool/sources/utils/data_utils.hpp
@@ -27,6 +27,7 @@
 
 #include <core/config.hpp>
 #include <vector>
+#include <limits>
 
 namespace cubool {
 
diff --git a/deps/cub b/deps/cub
deleted file mode 160000
index b229817..0000000
--- a/deps/cub
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit b229817e3963fc942c7cc2c61715a6b2b2c49bed
diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt
index ee5ac0d..cdc4463 100644
--- a/deps/nsparse-um/CMakeLists.txt
+++ b/deps/nsparse-um/CMakeLists.txt
@@ -3,7 +3,8 @@ project(nsparse_um LANGUAGES CXX CUDA)
 
 add_library(nsparse_um INTERFACE)
 target_include_directories(nsparse_um INTERFACE include/)
-target_link_libraries(nsparse_um INTERFACE cub)
+# target_link_libraries(nsparse_um INTERFACE cub)
+target_link_libraries(nsparse_um INTERFACE)
 target_compile_options(nsparse_um INTERFACE $<$<COMPILE_LANGUAGE:CUDA>: --expt-relaxed-constexpr --expt-extended-lambda>)
 
 if (CUBOOL_BUILD_NSPARSE_TESTS)
diff --git a/deps/nsparse-um/include/nsparse/unified_allocator.h b/deps/nsparse-um/include/nsparse/unified_allocator.h
index ea6a98b..c15f19e 100644
--- a/deps/nsparse-um/include/nsparse/unified_allocator.h
+++ b/deps/nsparse-um/include/nsparse/unified_allocator.h
@@ -3,7 +3,7 @@
 #include <thrust/detail/config.h>
 #include <thrust/device_ptr.h>
 #include <thrust/mr/allocator.h>
-#include <thrust/memory/detail/device_system_resource.h>
+// #include <thrust/memory/detail/device_system_resource.h>
 
 #include <limits>
 #include <stdexcept>
diff --git a/deps/nsparse-um/test/CMakeLists.txt b/deps/nsparse-um/test/CMakeLists.txt
index 3bece29..6a29690 100644
--- a/deps/nsparse-um/test/CMakeLists.txt
+++ b/deps/nsparse-um/test/CMakeLists.txt
@@ -3,7 +3,8 @@ project(nsparse_um_test CXX CUDA)
 
 add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp)
 
-target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub)
+# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub)
+target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um)
 target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)
 
 set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17)
diff --git a/deps/nsparse/test/CMakeLists.txt b/deps/nsparse/test/CMakeLists.txt
index 9e71d90..9927725 100644
--- a/deps/nsparse/test/CMakeLists.txt
+++ b/deps/nsparse/test/CMakeLists.txt
@@ -2,7 +2,8 @@ cmake_minimum_required(VERSION 3.11)
 project(nsparse_test CXX CUDA)
 
 add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub)
+# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub)
+target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse)
 target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)
 
 set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17)

From 443d78deae3a4ef525e722afba2a5db3f1c08948 Mon Sep 17 00:00:00 2001
From: Mitya <8863314@gmail.com>
Date: Tue, 22 Oct 2024 12:47:31 +0300
Subject: [PATCH 02/10] Temporary removed python package support for compiling
 rpq algorithm

---
 CMakeLists.txt        |  4 ++--
 cubool/CMakeLists.txt | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e3ab3eb..da9e6d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,7 +58,7 @@ endif()
 add_subdirectory(cubool)
 
 # Copy scripts into binary directory
-file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/)
+# file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/)
 
 # Copy python related stuff
-file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/)
\ No newline at end of file
+# file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/)
diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index 092504b..88591de 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -246,14 +246,14 @@ if (CUBOOL_BUILD_TESTS)
 endif()
 
 # Copy cubool library after build if allowed
-if (CUBOOL_COPY_TO_PY_PACKAGE)
-    set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME})
-
-    add_custom_command(
-        TARGET cubool POST_BUILD
-        COMMAND "${CMAKE_COMMAND}" -E
-        copy
-        "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}"
-        "${CMAKE_BINARY_DIR}/python/pycubool"
-        COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder")
-endif()
\ No newline at end of file
+# if (CUBOOL_COPY_TO_PY_PACKAGE)
+#     set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME})
+# 
+#     add_custom_command(
+#         TARGET cubool POST_BUILD
+#         COMMAND "${CMAKE_COMMAND}" -E
+#         copy
+#         "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}"
+#         "${CMAKE_BINARY_DIR}/python/pycubool"
+#         COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder")
+# endif()

From f429420f50c6297171f8cd7c485e1a6379fef2d5 Mon Sep 17 00:00:00 2001
From: Mitya <8863314@gmail.com>
Date: Tue, 5 Nov 2024 20:57:12 +0300
Subject: [PATCH 03/10] Fixed cubool tests

---
 cubool/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index 88591de..406510c 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -219,7 +219,7 @@ endforeach()
 if (CUBOOL_WITH_CUDA)
     set_target_properties(cubool PROPERTIES CUDA_STANDARD 14)
     set_target_properties(cubool PROPERTIES CUDA_STANDARD_REQUIRED ON)
-    set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+    # set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
     # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
 

From e434bb145970a835f9470fc4870f1295b669e80b Mon Sep 17 00:00:00 2001
From: Mitya <8863314@gmail.com>
Date: Tue, 5 Nov 2024 22:41:40 +0300
Subject: [PATCH 04/10] Implemented applying inverted mask

---
 cubool/CMakeLists.txt                         |   5 +
 cubool/include/cubool/cubool.h                |  26 ++++
 cubool/sources/backend/matrix_base.hpp        |   3 +-
 cubool/sources/core/matrix.cpp                |  44 +++++-
 cubool/sources/core/matrix.hpp                |   3 +-
 .../cuBool_Matrix_EWiseMultInverted.cpp       |  44 ++++++
 cubool/sources/cuda/cuda_matrix.hpp           |   3 +-
 .../cuda/cuda_matrix_ewisemult_inverted.cu    |  63 ++++++++
 .../cuda/kernels/spewisemultinverted.cuh      | 106 ++++++++++++++
 .../sequential/sq_ewisemultinverted.cpp       | 105 ++++++++++++++
 .../sequential/sq_ewisemultinverted.hpp       |  42 ++++++
 cubool/sources/sequential/sq_matrix.cpp       |  26 ++++
 cubool/sources/sequential/sq_matrix.hpp       |   1 +
 cubool/tests/CMakeLists.txt                   |   3 +
 .../tests/test_matrix_ewisemult_inverted.cpp  | 134 ++++++++++++++++++
 15 files changed, 604 insertions(+), 4 deletions(-)
 create mode 100644 cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp
 create mode 100644 cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu
 create mode 100644 cubool/sources/cuda/kernels/spewisemultinverted.cuh
 create mode 100644 cubool/sources/sequential/sq_ewisemultinverted.cpp
 create mode 100644 cubool/sources/sequential/sq_ewisemultinverted.hpp
 create mode 100644 cubool/tests/test_matrix_ewisemult_inverted.cpp

diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index 406510c..cd80032 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -87,6 +87,7 @@ set(CUBOOL_C_API_SOURCES
     sources/cuBool_Matrix_Reduce2.cpp
     sources/cuBool_Matrix_EWiseAdd.cpp
     sources/cuBool_Matrix_EWiseMult.cpp
+    sources/cuBool_Matrix_EWiseMultInverted.cpp
     sources/cuBool_Vector_New.cpp
     sources/cuBool_Vector_Build.cpp
     sources/cuBool_Vector_SetElement.cpp
@@ -125,6 +126,7 @@ if (CUBOOL_WITH_CUDA)
         sources/cuda/cuda_matrix.cu
         sources/cuda/cuda_matrix_ewiseadd.cu
         sources/cuda/cuda_matrix_ewisemult.cu
+        sources/cuda/cuda_matrix_ewisemult_inverted.cu
         sources/cuda/cuda_matrix_kronecker.cu
         sources/cuda/cuda_matrix_multiply.cu
         sources/cuda/cuda_matrix_transpose.cu
@@ -147,6 +149,7 @@ if (CUBOOL_WITH_CUDA)
         sources/cuda/kernels/spgemv_t.cuh
         sources/cuda/kernels/spewiseadd.cuh
         sources/cuda/kernels/spewisemult.cuh
+        sources/cuda/kernels/spewisemultinverted.cuh
         sources/cuda/kernels/sptranspose.cuh
         sources/cuda/kernels/sptranspose2.cuh
         sources/cuda/kernels/spkron.cuh
@@ -173,6 +176,8 @@ if (CUBOOL_WITH_SEQUENTIAL)
         sources/sequential/sq_ewiseadd.hpp
         sources/sequential/sq_ewisemult.cpp
         sources/sequential/sq_ewisemult.hpp
+        sources/sequential/sq_ewisemultinverted.cpp
+        sources/sequential/sq_ewisemultinverted.hpp
         sources/sequential/sq_spgemm.cpp
         sources/sequential/sq_spgemm.hpp
         sources/sequential/sq_spgemv.cpp
diff --git a/cubool/include/cubool/cubool.h b/cubool/include/cubool/cubool.h
index cd16bb3..ef7d9b5 100644
--- a/cubool/include/cubool/cubool.h
+++ b/cubool/include/cubool/cubool.h
@@ -925,4 +925,30 @@ CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Kronecker(
     cuBool_Hints hints
 );
 
+/**
+ * Performs result = left + ~right, where
+ *     '+' is boolean semiring 'or' operation
+ *     '~' is operation for invert matrix (0 swaps to 1 and 1 to 0)
+ *
+ * @note Matrices must be compatible
+ *          dim(result) = M x N
+ *          dim(left) = M x N
+ *          dim(right) = M x N
+ *
+ * @note Pass `CUBOOL_HINT_TIME_CHECK` hint to measure operation time
+ *
+ * @param result[out] Destination matrix to store result
+ * @param left Source matrix to be added
+ * @param right Source matrix to be inverted and added
+ * @param hints Hints for the operation
+ *
+ * @return Error code on this operation
+ */
+CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted(
+    cuBool_Matrix result,
+    cuBool_Matrix matrix,
+    cuBool_Matrix mask,
+    cuBool_Hints hints
+);
+
 #endif //CUBOOL_CUBOOL_H
diff --git a/cubool/sources/backend/matrix_base.hpp b/cubool/sources/backend/matrix_base.hpp
index 5332031..8842eb9 100644
--- a/cubool/sources/backend/matrix_base.hpp
+++ b/cubool/sources/backend/matrix_base.hpp
@@ -49,6 +49,7 @@ namespace cubool {
         virtual void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0;
         virtual void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0;
         virtual void eWiseMult(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) = 0;
+        virtual void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) = 0;
 
         virtual index getNrows() const = 0;
         virtual index getNcols() const = 0;
@@ -59,4 +60,4 @@ namespace cubool {
 
 }
 
-#endif //CUBOOL_MATRIX_BASE_HPP
\ No newline at end of file
+#endif //CUBOOL_MATRIX_BASE_HPP
diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp
index 571b010..730fe63 100644
--- a/cubool/sources/core/matrix.cpp
+++ b/cubool/sources/core/matrix.cpp
@@ -349,6 +349,48 @@ namespace cubool {
         mHnd->eWiseMult(*a->mHnd, *b->mHnd, false);
     }
 
+    void Matrix::eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) {
+
+        const auto* mat = dynamic_cast<const Matrix*>(&matrix);
+        const auto* msk = dynamic_cast<const Matrix*>(&mask);
+
+        CHECK_RAISE_ERROR(mat != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class");
+        CHECK_RAISE_ERROR(msk != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class");
+
+        index M = mat->getNrows();
+        index N = msk->getNcols();
+
+        CHECK_RAISE_ERROR(M == msk->getNrows(), InvalidArgument, "Passed matrices have incompatible size");
+        CHECK_RAISE_ERROR(N == msk->getNcols(), InvalidArgument, "Passed matrices have incompatible size");
+
+        CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Matrix has incompatible size for operation result");
+        CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result");
+
+        mat->commitCache();
+        msk->commitCache();
+        this->releaseCache();
+
+        /*
+        if (checkTime) {
+            TIMER_ACTION(timer, mHnd->eWiseMult(*a->mHnd, *b->mHnd, false));
+
+            LogStream stream(*Library::getLogger());
+            stream << Logger::Level::Info
+                   << "Time: " << timer.getElapsedTimeMs() << " ms "
+                   << "Matrix::eWiseMult: "
+                   << this->getDebugMarker() << " = "
+                   << a->getDebugMarker() << " + "
+                   << b->getDebugMarker() << LogStream::cmt;
+
+            return;
+        }
+        */
+
+        mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false);
+    }
+
+
+
     index Matrix::getNrows() const {
         return mHnd->getNrows();
     }
@@ -396,4 +438,4 @@ namespace cubool {
         // Clear arrays
         releaseCache();
     }
-}
\ No newline at end of file
+}
diff --git a/cubool/sources/core/matrix.hpp b/cubool/sources/core/matrix.hpp
index 20cb890..6c4d38b 100644
--- a/cubool/sources/core/matrix.hpp
+++ b/cubool/sources/core/matrix.hpp
@@ -56,6 +56,7 @@ namespace cubool {
         void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override;
         void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override;
         void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
+        void eWiseMultInverted(const MatrixBase &matrix, const MatrixBase &mask, bool checkTime) override;
 
         index getNrows() const override;
         index getNcols() const override;
@@ -77,4 +78,4 @@ namespace cubool {
 
 }
 
-#endif //CUBOOL_MATRIX_HPP
\ No newline at end of file
+#endif //CUBOOL_MATRIX_HPP
diff --git a/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp
new file mode 100644
index 0000000..37fe607
--- /dev/null
+++ b/cubool/sources/cuBool_Matrix_EWiseMultInverted.cpp
@@ -0,0 +1,44 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#include <cuBool_Common.hpp>
+
+CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Matrix_EWiseMulInverted(
+    cuBool_Matrix result,
+    cuBool_Matrix matrix,
+    cuBool_Matrix mask,
+    cuBool_Hints hints
+) {
+    CUBOOL_BEGIN_BODY
+        CUBOOL_VALIDATE_LIBRARY
+        CUBOOL_ARG_NOT_NULL(result)
+        CUBOOL_ARG_NOT_NULL(matrix)
+        CUBOOL_ARG_NOT_NULL(mask)
+        auto resultM = (cubool::Matrix *) result;
+        auto matrixM = (cubool::Matrix *) matrix;
+        auto maskM = (cubool::Matrix *) mask;
+
+        resultM->eWiseMultInverted(*matrixM, *maskM, hints & CUBOOL_HINT_TIME_CHECK);
+    CUBOOL_END_BODY
+}
diff --git a/cubool/sources/cuda/cuda_matrix.hpp b/cubool/sources/cuda/cuda_matrix.hpp
index e19fec1..9f1b7a2 100644
--- a/cubool/sources/cuda/cuda_matrix.hpp
+++ b/cubool/sources/cuda/cuda_matrix.hpp
@@ -56,6 +56,7 @@ namespace cubool {
         void kronecker(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
         void eWiseAdd(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
         void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
+        void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
 
         index getNrows() const override;
         index getNcols() const override;
@@ -79,4 +80,4 @@ namespace cubool {
     };
 };
 
-#endif //CUBOOL_CUDA_MATRIX_HPP
\ No newline at end of file
+#endif //CUBOOL_CUDA_MATRIX_HPP
diff --git a/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu
new file mode 100644
index 0000000..2e0fa87
--- /dev/null
+++ b/cubool/sources/cuda/cuda_matrix_ewisemult_inverted.cu
@@ -0,0 +1,63 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#include <cuda/cuda_matrix.hpp>
+#include <cuda/kernels/spewisemultinverted.cuh>
+
+namespace cubool {
+
+    void CudaMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) {
+        auto a = dynamic_cast<const CudaMatrix*>(&aBase);
+        auto b = dynamic_cast<const CudaMatrix*>(&bBase);
+
+        CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class");
+        CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class");
+
+        index M = this->getNrows();
+        index N = this->getNcols();
+
+
+        assert(a->getNrows() == M);
+        assert(a->getNcols() == N);
+
+        assert(b->getNrows() == M);
+        assert(b->getNcols() == N);
+
+        if (a->isMatrixEmpty() || b->isMatrixEmpty()) {
+            this->clearAndResizeStorageToDim();
+            return;
+        }
+
+        // Ensure csr proper csr format even if empty
+        a->resizeStorageToDim();
+        b->resizeStorageToDim();
+
+        kernels::SpVectorEWiseMultInverted<index, DeviceAlloc<index>> spFunctor;
+        auto result = spFunctor(a->mMatrixImpl, b->mMatrixImpl);
+
+        // Assign the actual impl result to this storage
+        this->mMatrixImpl = std::move(result);
+    }
+
+}
diff --git a/cubool/sources/cuda/kernels/spewisemultinverted.cuh b/cubool/sources/cuda/kernels/spewisemultinverted.cuh
new file mode 100644
index 0000000..353bc97
--- /dev/null
+++ b/cubool/sources/cuda/kernels/spewisemultinverted.cuh
@@ -0,0 +1,106 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#ifndef CUBOOL_SPEWISEMULT_CUH
+#define CUBOOL_SPEWISEMULT_CUH
+
+#include <cuda/details/sp_vector.hpp>
+#include <cuda/kernels/bin_search.cuh>
+#include <nsparse/matrix.h>
+#include <cmath>
+
+#include <thrust/set_operations.h>
+
+namespace cubool {
+    namespace kernels {
+
+        template <typename IndexType, typename AllocType>
+        struct SpVectorEWiseMultInverted {
+            template<typename T>
+            using ContainerType = thrust::device_vector<T, typename AllocType::template rebind<T>::other>;
+            using MatrixType = nsparse::matrix<bool, IndexType, AllocType>;
+            using LargeIndexType = unsigned long;
+
+            static_assert(sizeof(LargeIndexType) > sizeof(IndexType), "Values intersection index must be larger");
+
+            static void fillIndices(const MatrixType& m, ContainerType<LargeIndexType>& out) {
+                thrust::for_each(thrust::counting_iterator<IndexType>(0), thrust::counting_iterator<IndexType>(m.m_vals),
+                        [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(),
+                         outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) {
+                    LargeIndexType row = findNearestRowIdx<index>(valueId, nrows, rowOffset);
+                    LargeIndexType col = colIndex[valueId];
+                    LargeIndexType index = row * ncols + col;
+                    outIndices[valueId] = index;
+                });
+            }
+
+            MatrixType operator()(const MatrixType& a, const MatrixType& b) {
+                auto aNvals = a.m_vals;
+                auto bNvals = b.m_vals;
+                auto worst = aNvals;
+
+                // Allocate memory for the worst case scenario
+                ContainerType<LargeIndexType> inputA(aNvals);
+                ContainerType<LargeIndexType> inputB(bNvals);
+
+                fillIndices(a, inputA);
+                fillIndices(b, inputB);
+
+                ContainerType<LargeIndexType> intersected(worst);
+
+                auto out = thrust::set_difference(inputA.begin(), inputA.end(),
+                                                  inputB.begin(), inputB.end(),
+                                                  intersected.begin());
+
+                // Count result nvals count
+                auto nvals = thrust::distance(intersected.begin(), out);
+
+                ContainerType<index> rowOffsetTmp(a.m_rows + 1);
+                ContainerType<index> colIndex(nvals);
+
+                thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0);
+
+                thrust::for_each(thrust::counting_iterator<IndexType>(0), thrust::counting_iterator<IndexType>(nvals),
+                        [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(),
+                         nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) {
+                    LargeIndexType i = intersected[valueId];
+                    LargeIndexType row = i / ncols;
+                    LargeIndexType col = i % ncols;
+                    atomicAdd((rowOffset + row).get(), 1);
+                    colIndex[valueId] = (IndexType) col;
+                });
+
+                ContainerType<index> rowOffset(a.m_rows + 1);
+                thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus<index>());
+
+                assert(nvals == rowOffset.back());
+
+                return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals);
+            }
+        };
+
+    }
+}
+
+#endif //CUBOOL_SPEWISEMULT_CUH
diff --git a/cubool/sources/sequential/sq_ewisemultinverted.cpp b/cubool/sources/sequential/sq_ewisemultinverted.cpp
new file mode 100644
index 0000000..ee810c9
--- /dev/null
+++ b/cubool/sources/sequential/sq_ewisemultinverted.cpp
@@ -0,0 +1,105 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#include <sequential/sq_ewisemult.hpp>
+#include <utils/algo_utils.hpp>
+
+namespace cubool {
+
+    void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out) {
+        out.rowOffsets.resize(a.nrows + 1, 0);
+
+        size_t nvals = 0;
+
+        // Count nnz of the result matrix to allocate memory
+        for (index i = 0; i < a.nrows; i++) {
+            index ak = a.rowOffsets[i];
+            index bk = b.rowOffsets[i];
+            index asize = a.rowOffsets[i + 1] - ak;
+            index bsize = b.rowOffsets[i + 1] - bk;
+
+            const index* ar = &a.colIndices[ak];
+            const index* br = &b.colIndices[bk];
+            const index* arend = ar + asize;
+            const index* brend = br + bsize;
+
+            index nvalsInRow = 0;
+
+            while (ar != arend && br != brend) {
+                if (*ar == *br) {
+                    ar++;
+                    br++;
+                }
+                else if (*ar < *br) {
+                    nvalsInRow++;
+                    ar++;
+                }
+                else {
+                    br++;
+                }
+            }
+            nvalsInRow += arend - ar;
+
+            nvals += nvalsInRow;
+            out.rowOffsets[i] = nvalsInRow;
+        }
+
+        // Eval row offsets
+        exclusive_scan(out.rowOffsets.begin(), out.rowOffsets.end(), 0);
+
+        // Allocate memory for values
+        out.nvals = nvals;
+        out.colIndices.resize(nvals);
+
+        // Fill sorted column indices
+        size_t k = 0;
+        for (index i = 0; i < a.nrows; i++) {
+            const index* ar = &a.colIndices[a.rowOffsets[i]];
+            const index* br = &b.colIndices[b.rowOffsets[i]];
+            const index* arend = &a.colIndices[a.rowOffsets[i + 1]];
+            const index* brend = &b.colIndices[b.rowOffsets[i + 1]];
+
+            while (ar != arend && br != brend) {
+                if (*ar == *br) {
+                    ar++;
+                    br++;
+                }
+                else if (*ar < *br) {
+                    out.colIndices[k] = *ar;
+                    k++;
+                    ar++;
+                }
+                else {
+                    br++;
+                }
+            }
+
+            while (ar != arend) {
+                out.colIndices[k] = *ar;
+                k++;
+                ar++;
+            }
+        }
+    }
+}
diff --git a/cubool/sources/sequential/sq_ewisemultinverted.hpp b/cubool/sources/sequential/sq_ewisemultinverted.hpp
new file mode 100644
index 0000000..99b2615
--- /dev/null
+++ b/cubool/sources/sequential/sq_ewisemultinverted.hpp
@@ -0,0 +1,42 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#ifndef CUBOOL_SQ_EWISEMULTINVERTED_HPP
+#define CUBOOL_SQ_EWISEMULTINVERTED_HPP
+
+#include <sequential/sq_data.hpp>
+
+namespace cubool {
+
+    /**
+     * Element-wise multiplication of the matrices `a` and inverted `b`.
+     *
+     * @param a Input matrix
+     * @param b Input matrix, which will be inverted before multiplication
+     * @param[out] out Where to store the result
+     */
+    void sq_ewisemultinverted(const CsrData& a, const CsrData& b, CsrData& out);
+}
+
+#endif //CUBOOL_SQ_EWISEMULTINVERTED_HPP
diff --git a/cubool/sources/sequential/sq_matrix.cpp b/cubool/sources/sequential/sq_matrix.cpp
index df96e4e..00473f1 100644
--- a/cubool/sources/sequential/sq_matrix.cpp
+++ b/cubool/sources/sequential/sq_matrix.cpp
@@ -28,6 +28,7 @@
 #include <sequential/sq_kronecker.hpp>
 #include <sequential/sq_ewiseadd.hpp>
 #include <sequential/sq_ewisemult.hpp>
+#include <sequential/sq_ewisemultinverted.hpp>
 #include <sequential/sq_spgemm.hpp>
 #include <sequential/sq_reduce.hpp>
 #include <utils/data_utils.hpp>
@@ -235,6 +236,31 @@ namespace cubool {
         this->mData = std::move(out);
     }
 
+    void SqMatrix::eWiseMultInverted(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) {
+        auto a = dynamic_cast<const SqMatrix*>(&aBase);
+        auto b = dynamic_cast<const SqMatrix*>(&bBase);
+
+        CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
+        CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
+
+        assert(a->getNrows() == this->getNrows());
+        assert(a->getNcols() == this->getNcols());
+        assert(a->getNrows() == b->getNrows());
+        assert(a->getNcols() == b->getNcols());
+
+        CsrData out;
+        out.nrows = this->getNrows();
+        out.ncols = this->getNcols();
+
+        a->allocateStorage();
+        b->allocateStorage();
+        sq_ewisemultinverted(a->mData, b->mData, out);
+
+        this->mData = std::move(out);
+    }
+
+
+
     index SqMatrix::getNrows() const {
         return mData.nrows;
     }
diff --git a/cubool/sources/sequential/sq_matrix.hpp b/cubool/sources/sequential/sq_matrix.hpp
index 1a3f539..d348232 100644
--- a/cubool/sources/sequential/sq_matrix.hpp
+++ b/cubool/sources/sequential/sq_matrix.hpp
@@ -51,6 +51,7 @@ namespace cubool {
         void kronecker(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override;
         void eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase, bool checkTime) override;
         void eWiseMult(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
+        void eWiseMultInverted(const MatrixBase &a, const MatrixBase &b, bool checkTime) override;
 
         index getNrows() const override;
         index getNcols() const override;
diff --git a/cubool/tests/CMakeLists.txt b/cubool/tests/CMakeLists.txt
index 0e0f620..84ddc96 100644
--- a/cubool/tests/CMakeLists.txt
+++ b/cubool/tests/CMakeLists.txt
@@ -34,6 +34,9 @@ target_link_libraries(test_matrix_ewiseadd PUBLIC testing)
 add_executable(test_matrix_ewisemult test_matrix_ewisemult.cpp)
 target_link_libraries(test_matrix_ewisemult PUBLIC testing)
 
+add_executable(test_matrix_ewisemult_inverted test_matrix_ewisemult_inverted.cpp)
+target_link_libraries(test_matrix_ewisemult_inverted PUBLIC testing)
+
 add_executable(test_vector_misc test_vector_misc.cpp)
 target_link_libraries(test_vector_misc PUBLIC testing)
 
diff --git a/cubool/tests/test_matrix_ewisemult_inverted.cpp b/cubool/tests/test_matrix_ewisemult_inverted.cpp
new file mode 100644
index 0000000..2e32520
--- /dev/null
+++ b/cubool/tests/test_matrix_ewisemult_inverted.cpp
@@ -0,0 +1,134 @@
+#include "cubool/cubool.h"
+#include <testing/testing.hpp>
+#include <algorithm>
+
+using DataMatrix = std::vector<std::vector<int>>;
+
+static void printTestingMatrix(const testing::Matrix &matrix, std::string name = "") {
+    if (name != "") {
+        std::cout << name << std::endl;
+    }
+
+    for (int i = 0; i < matrix.nvals; i++) {
+        printf("(%d, %d)\n", matrix.rowsIndex[i], matrix.colsIndex[i]);
+    }
+}
+
+static void printCuboolMatrix(cuBool_Matrix matrix, std::string name = "") {
+    if (name != "") {
+        std::cout << name << std::endl;
+    }
+
+    cuBool_Index nvals;
+    cuBool_Matrix_Nvals(matrix, &nvals);
+    std::vector<cuBool_Index> rows(nvals), cols(nvals);
+    cuBool_Matrix_ExtractPairs(matrix, rows.data(), cols.data(), &nvals);
+
+    for (int i = 0; i < nvals; i++) {
+        printf("(%d, %d)\n", rows[i], cols[i]);
+    }
+}
+
+void testApplyNotMask(const DataMatrix &matrix_data, const DataMatrix &mask_data) {
+    cuBool_Index nrows, ncols;
+    nrows = matrix_data.size();
+    ncols = matrix_data[0].size();
+    testing::Matrix test_matrix = testing::Matrix::generatet(nrows, ncols,
+        [&matrix_data](cuBool_Index i, cuBool_Index j) { return matrix_data[i][j]; });
+    nrows = mask_data.size();
+    ncols = mask_data[0].size();
+    testing::Matrix test_mask = testing::Matrix::generatet(nrows, ncols,
+        [&mask_data](cuBool_Index i, cuBool_Index j) { return mask_data[i][j]; });
+
+    cuBool_Matrix matrix, mask, result;
+    ASSERT_EQ(cuBool_Matrix_New(&matrix, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS);
+    ASSERT_EQ(cuBool_Matrix_New(&mask, test_mask.nrows, test_mask.ncols), CUBOOL_STATUS_SUCCESS);
+    ASSERT_EQ(cuBool_Matrix_New(&result, test_matrix.nrows, test_matrix.ncols), CUBOOL_STATUS_SUCCESS);
+
+    ASSERT_EQ(cuBool_Matrix_Build(matrix, test_matrix.rowsIndex.data(), test_matrix.colsIndex.data(), test_matrix.nvals,
+        CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS);
+    ASSERT_EQ(cuBool_Matrix_Build(mask, test_mask.rowsIndex.data(), test_mask.colsIndex.data(), test_mask.nvals,
+        CUBOOL_HINT_VALUES_SORTED & CUBOOL_HINT_NO_DUPLICATES), CUBOOL_STATUS_SUCCESS);
+
+    cuBool_Matrix_EWiseMulInverted(result, matrix, mask, CUBOOL_HINT_NO);
+
+    // validate value of algorithm
+    cuBool_Index nvals;
+    cuBool_Matrix_Nvals(result, &nvals);
+    std::vector<cuBool_Index> rows(nvals), cols(nvals);
+    cuBool_Matrix_ExtractPairs(result, rows.data(), cols.data(), &nvals);
+
+    cuBool_Matrix_Free(matrix);
+    cuBool_Matrix_Free(mask);
+    cuBool_Matrix_Free(result);
+
+    auto mask_data_inverted = mask_data;
+    for (auto &row : mask_data_inverted) {
+        for (int &value : row) {
+            value = !value;
+        }
+    }
+
+    std::vector result_data(matrix_data.size(), std::vector(matrix_data[0].size(), 0));
+    for (int i = 0; i < nvals; i++) {
+        result_data[rows[i]][cols[i]] = 1;
+    }
+
+    for (int i = 0; i < matrix_data.size(); i++) {
+        for (int j = 0; j < matrix_data[0].size(); j++) {
+            ASSERT_EQ(matrix_data[i][j] * mask_data_inverted[i][j], result_data[i][j]);
+        }
+    }
+
+}
+
+TEST(cuBool_Matrix, ApplyMatrix) {
+    ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS);
+
+    DataMatrix matrix {
+        {1, 0, 0},
+        {0, 0, 0},
+        {0, 1, 0},
+    };
+
+    DataMatrix mask {
+        {0, 1, 1},
+        {1, 0, 1},
+        {0, 1, 1},
+    };
+    // iverted is
+    // 1 0 0
+    // 0 1 0
+    // 1 0 0
+    // matrix & ~mask must have (0, 0)
+
+    testApplyNotMask(matrix, mask);
+
+    ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS);
+}
+
+TEST(cuBool_Matrix, ApplyMatrixRandom) {
+    ASSERT_EQ(cuBool_Initialize(CUBOOL_HINT_NO), CUBOOL_STATUS_SUCCESS);
+
+    for (int i = 0; i < 102; i++) {
+        int n = rand() % 10 + 1;
+        int m = rand() % 10 + 1;
+
+        DataMatrix matrix(n, std::vector(m, 0));
+        DataMatrix mask(n, std::vector(m, 0));
+
+        for (int i = 0; i < n; i++) {
+            for (int j = 0; j < m; j++) {
+                matrix[i][j] = rand() & 1;
+                mask[i][j] = rand() & 1;
+            }
+        }
+
+        testApplyNotMask(matrix, mask);
+    }
+
+    ASSERT_EQ(cuBool_Finalize(), CUBOOL_STATUS_SUCCESS);
+}
+
+
+CUBOOL_GTEST_MAIN

From 2f5daace52f8ff9aab55924e2612fa62aedd1c76 Mon Sep 17 00:00:00 2001
From: Mitya <8863314@gmail.com>
Date: Sun, 24 Nov 2024 23:49:19 +0300
Subject: [PATCH 05/10] Fixed mistake in matrix ewise add

---
 cubool/sources/cuda/kernels/spmerge.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh
index 8a14f8d..7a5b62d 100644
--- a/cubool/sources/cuda/kernels/spmerge.cuh
+++ b/cubool/sources/cuda/kernels/spmerge.cuh
@@ -53,7 +53,7 @@ namespace cubool {
                 assert(a.m_cols == b.m_cols);
 
                 IndexType rows = a.m_rows;
-                IndexType cols = a.m_rows;
+                IndexType cols = a.m_cols;
 
                 constexpr auto config_merge =
                     make_bin_seq<

From 99f1d8ef722a7b88be8fd61b15456f5658799d5e Mon Sep 17 00:00:00 2001
From: Dmitriy Kozenko <8863314@gmail.com>
Date: Sun, 16 Feb 2025 23:43:32 +0300
Subject: [PATCH 06/10] Fixed errors in cmake: incorrect compile options for
 NVCC

---
 cubool/CMakeLists.txt | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index cd80032..451efd0 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -206,11 +206,7 @@ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MAJOR=${CUBOOL_VERSION_
 target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MINOR=${CUBOOL_VERSION_MINOR})
 target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_SUB=${CUBOOL_VERSION_SUB})
 
-target_compile_features(cubool PUBLIC cxx_std_14)
-
-target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -Wall>)
-target_compile_options(cubool PRIVATE $<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:CXX>>: -O2>)
-target_compile_options(cubool PRIVATE $<$<AND:$<CONFIG:Release>,$<COMPILE_LANGUAGE:CXX>>: -O0>)
+target_compile_features(cubool PUBLIC cxx_std_17)
 
 set_target_properties(cubool PROPERTIES CXX_STANDARD 17)
 set_target_properties(cubool PROPERTIES CXX_STANDARD_REQUIRED ON)

From b40486326dbd9bacb763afbf4956e23ecc61f9d3 Mon Sep 17 00:00:00 2001
From: Dmitriy Kozenko <8863314@gmail.com>
Date: Wed, 19 Feb 2025 00:01:52 +0300
Subject: [PATCH 07/10] Added option for using matrix addition optimization
 from nsparse

---
 CMakeLists.txt                          |  2 +
 cubool/CMakeLists.txt                   |  2 +
 cubool/sources/cuda/kernels/spmerge.cuh | 71 +++++++++++++++++++++++++
 3 files changed, 75 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da9e6d9..a432980 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,8 @@ option(CUBOOL_WITH_NAIVE         "Build library with naive and naive-shared dens
 option(CUBOOL_BUILD_TESTS        "Build project unit-tests with gtest" ON)
 option(CUBOOL_COPY_TO_PY_PACKAGE "Copy compiled shared library into python package folder (for package use purposes)" ON)
 
+option(CUBOOL_USE_NSPARSE_MERGE_FUNCTOR "Use nsparse optimiztion for matrix addition function" ON)
+
 set(CUBOOL_VERSION_MAJOR 1)
 set(CUBOOL_VERSION_MINOR 0)
 set(CUBOOL_VERSION_SUB 0)
diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index 451efd0..5434c02 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -206,6 +206,8 @@ target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MAJOR=${CUBOOL_VERSION_
 target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_MINOR=${CUBOOL_VERSION_MINOR})
 target_compile_definitions(cubool PRIVATE CUBOOL_VERSION_SUB=${CUBOOL_VERSION_SUB})
 
+target_compile_definitions(cubool PRIVATE CUBOOL_USE_NSPARSE_MERGE_FUNCTOR=$<BOOL:${CUBOOL_USE_NSPARSE_MERGE_FUNCTOR}>)
+
 target_compile_features(cubool PUBLIC cxx_std_17)
 
 set_target_properties(cubool PROPERTIES CXX_STANDARD 17)
diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh
index 7a5b62d..96805cb 100644
--- a/cubool/sources/cuda/kernels/spmerge.cuh
+++ b/cubool/sources/cuda/kernels/spmerge.cuh
@@ -29,9 +29,13 @@
 #include <nsparse/matrix.h>
 #include <nsparse/detail/merge.h>
 
+#include <cuda/kernels/bin_search.cuh>
+#include <thrust/set_operations.h>
+
 namespace cubool {
     namespace kernels {
 
+#if CUBOOL_USE_NSPARSE_MERGE_FUNCTOR
         template <typename IndexType, typename AllocType>
         class SpMergeFunctor {
         public:
@@ -77,6 +81,73 @@ namespace cubool {
         private:
             nsparse::unique_merge_functor_t<IndexType, AllocType> uniqueMergeFunctor;
         };
+#else
+        template <typename IndexType, typename AllocType>
+        class SpMergeFunctor {
+        public:
+            template<typename T>
+            using ContainerType = thrust::device_vector<T, typename AllocType::template rebind<T>::other>;
+            using MatrixType = nsparse::matrix<bool, IndexType, AllocType>;
+            using LargeIndexType = unsigned long;
+
+            static void fillIndices(const MatrixType& m, ContainerType<LargeIndexType>& out) {
+                thrust::for_each(thrust::counting_iterator<IndexType>(0), thrust::counting_iterator<IndexType>(m.m_vals),
+                        [rowOffset=m.m_row_index.data(), colIndex=m.m_col_index.data(),
+                         outIndices=out.data(), nrows=m.m_rows, ncols=m.m_cols] __device__ (IndexType valueId) {
+                    LargeIndexType row = findNearestRowIdx<index>(valueId, nrows, rowOffset);
+                    LargeIndexType col = colIndex[valueId];
+                    LargeIndexType index = row * ncols + col;
+                    outIndices[valueId] = index;
+                });
+            }
+
+            MatrixType operator()(const MatrixType& a, const MatrixType& b) {
+                auto aNvals = a.m_vals;
+                auto bNvals = b.m_vals;
+                // auto worst = std::min(aNvals, bNvals);
+                auto worst = aNvals + bNvals;
+
+                // Allocate memory for the worst case scenario
+                ContainerType<LargeIndexType> inputA(aNvals);
+                ContainerType<LargeIndexType> inputB(bNvals);
+
+                fillIndices(a, inputA);
+                fillIndices(b, inputB);
+
+                ContainerType<LargeIndexType> intersected(worst);
+
+                auto out = thrust::set_union(inputA.begin(), inputA.end(),
+                                             inputB.begin(), inputB.end(),
+                                             intersected.begin());
+
+                // Count result nvals count
+                auto nvals = thrust::distance(intersected.begin(), out);
+
+                ContainerType<index> rowOffsetTmp(a.m_rows + 1);
+                ContainerType<index> colIndex(nvals);
+
+                thrust::fill(rowOffsetTmp.begin(), rowOffsetTmp.end(), 0);
+
+                thrust::for_each(thrust::counting_iterator<IndexType>(0), thrust::counting_iterator<IndexType>(nvals),
+                        [rowOffset=rowOffsetTmp.data(), colIndex=colIndex.data(), intersected=intersected.data(),
+                         nrows=a.m_rows, ncols=a.m_cols] __device__ (IndexType valueId) {
+                    LargeIndexType i = intersected[valueId];
+                    LargeIndexType row = i / ncols;
+                    LargeIndexType col = i % ncols;
+                    atomicAdd((rowOffset + row).get(), 1);
+                    colIndex[valueId] = (IndexType) col;
+                });
+
+                ContainerType<index> rowOffset(a.m_rows + 1);
+                thrust::exclusive_scan(rowOffsetTmp.begin(), rowOffsetTmp.end(), rowOffset.begin(), 0, thrust::plus<index>());
+
+                assert(nvals == rowOffset.back());
+
+                return MatrixType(std::move(colIndex), std::move(rowOffset), a.m_rows, a.m_cols, nvals);
+            }
+        };
+
+#endif
 
     }
 }

From f81ccea03657b19641776d4bdbe215c653c36cd4 Mon Sep 17 00:00:00 2001
From: Dmitriy Kozenko <8863314@gmail.com>
Date: Sat, 19 Apr 2025 22:48:49 +0300
Subject: [PATCH 08/10] Updated gtest version to last release for compability
 with last cmake version

---
 deps/gtest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/gtest b/deps/gtest
index d9c309f..6910c9d 160000
--- a/deps/gtest
+++ b/deps/gtest
@@ -1 +1 @@
-Subproject commit d9c309fdab807b716c2cf4d4a42989b8c34f712a
+Subproject commit 6910c9d9165801d8827d628cb72eb7ea9dd538c5

From da6be345f8a3a0962887e7f1f934adc9b07058bb Mon Sep 17 00:00:00 2001
From: Dmitriy Kozenko <8863314@gmail.com>
Date: Sat, 3 May 2025 15:49:38 +0300
Subject: [PATCH 09/10] Restored python package build

---
 CMakeLists.txt        | 12 ++----------
 cubool/CMakeLists.txt | 23 +++++++++++------------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a432980..a6404f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,14 +34,6 @@ endif()
 
 # Configure cuda dependencies
 if (CUBOOL_WITH_CUDA)
-    message(STATUS "Add cub as cuda utility")
-    # set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE)
-    # set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
-    # set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
-    # add_subdirectory(deps/cub)
-    # add_library(cub INTERFACE IMPORTED)
-    # target_link_libraries(cub INTERFACE CUB::CUB)
-
     message(STATUS "Add nsparse library as crs matrix multiplication backend")
     add_subdirectory(deps/nsparse-um)
 endif()
@@ -60,7 +52,7 @@ endif()
 add_subdirectory(cubool)
 
 # Copy scripts into binary directory
-# file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/)
+file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/)
 
 # Copy python related stuff
-# file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/)
+file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/)
diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
index 5434c02..ed3cb41 100644
--- a/cubool/CMakeLists.txt
+++ b/cubool/CMakeLists.txt
@@ -222,7 +222,6 @@ endforeach()
 if (CUBOOL_WITH_CUDA)
     set_target_properties(cubool PROPERTIES CUDA_STANDARD 14)
     set_target_properties(cubool PROPERTIES CUDA_STANDARD_REQUIRED ON)
-    # set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
     # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
 
@@ -249,14 +248,14 @@ if (CUBOOL_BUILD_TESTS)
 endif()
 
 # Copy cubool library after build if allowed
-# if (CUBOOL_COPY_TO_PY_PACKAGE)
-#     set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME})
-# 
-#     add_custom_command(
-#         TARGET cubool POST_BUILD
-#         COMMAND "${CMAKE_COMMAND}" -E
-#         copy
-#         "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}"
-#         "${CMAKE_BINARY_DIR}/python/pycubool"
-#         COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder")
-# endif()
+if (CUBOOL_COPY_TO_PY_PACKAGE)
+    set(LIBRARY_FILE_NAME ${TARGET_FILE_NAME})
+
+    add_custom_command(
+        TARGET cubool POST_BUILD
+        COMMAND "${CMAKE_COMMAND}" -E
+        copy
+        "${CMAKE_BINARY_DIR}/cubool/${LIBRARY_FILE_NAME}"
+        "${CMAKE_BINARY_DIR}/python/pycubool"
+        COMMENT "Copy ${LIBRARY_FILE_NAME} compiled lib into python folder")
+endif()

From 69d38a67d74cc032ab9c313721f40cba2f79125c Mon Sep 17 00:00:00 2001
From: Dmitriy Kozenko <8863314@gmail.com>
Date: Sat, 3 May 2025 17:07:42 +0300
Subject: [PATCH 10/10] Removed temporary solutions for preparing PR

---
 cubool/include/cubool/cubool.h                | 14 +++++------
 cubool/sources/core/matrix.cpp                | 10 +++-----
 cubool/sources/cuda/kernels/spmerge.cuh       |  1 -
 .../tests/test_matrix_ewisemult_inverted.cpp  | 25 -------------------
 deps/nsparse-um/CMakeLists.txt                |  1 -
 .../include/nsparse/unified_allocator.h       |  1 -
 deps/nsparse-um/test/CMakeLists.txt           |  1 -
 deps/nsparse/test/CMakeLists.txt              |  1 -
 8 files changed, 11 insertions(+), 43 deletions(-)

diff --git a/cubool/include/cubool/cubool.h b/cubool/include/cubool/cubool.h
index ef7d9b5..2859542 100644
--- a/cubool/include/cubool/cubool.h
+++ b/cubool/include/cubool/cubool.h
@@ -926,20 +926,20 @@ CUBOOL_EXPORT CUBOOL_API cuBool_Status cuBool_Kronecker(
 );
 
 /**
- * Performs result = left + ~right, where
- *     '+' is boolean semiring 'or' operation
+ * Performs result = left * ~right, where
+ *     '*' is boolean semiring 'and' operation
  *     '~' is operation for invert matrix (0 swaps to 1 and 1 to 0)
  *
- * @note Matrices must be compatible
+ * @note To perform this operation matrices must be compatible
+ *          dim(left) = M x T
+ *          dim(right) = T x N
  *          dim(result) = M x N
- *          dim(left) = M x N
- *          dim(right) = M x N
  *
  * @note Pass `CUBOOL_HINT_TIME_CHECK` hint to measure operation time
  *
  * @param result[out] Destination matrix to store result
- * @param left Source matrix to be added
- * @param right Source matrix to be inverted and added
+ * @param left Source matrix to be multiplied
+ * @param right Source matrix to be inverted and multiplied
  * @param hints Hints for the operation
  *
  * @return Error code on this operation
diff --git a/cubool/sources/core/matrix.cpp b/cubool/sources/core/matrix.cpp
index 730fe63..49e35c3 100644
--- a/cubool/sources/core/matrix.cpp
+++ b/cubool/sources/core/matrix.cpp
@@ -370,21 +370,19 @@ namespace cubool {
         msk->commitCache();
         this->releaseCache();
 
-        /*
         if (checkTime) {
-            TIMER_ACTION(timer, mHnd->eWiseMult(*a->mHnd, *b->mHnd, false));
+            TIMER_ACTION(timer, mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false));
 
             LogStream stream(*Library::getLogger());
             stream << Logger::Level::Info
                    << "Time: " << timer.getElapsedTimeMs() << " ms "
-                   << "Matrix::eWiseMult: "
+                   << "Matrix::eWiseMultInverted: "
                    << this->getDebugMarker() << " = "
-                   << a->getDebugMarker() << " + "
-                   << b->getDebugMarker() << LogStream::cmt;
+                   << mat->getDebugMarker() << " + "
+                   << msk->getDebugMarker() << LogStream::cmt;
 
             return;
         }
-        */
 
         mHnd->eWiseMultInverted(*mat->mHnd, *msk->mHnd, false);
     }
diff --git a/cubool/sources/cuda/kernels/spmerge.cuh b/cubool/sources/cuda/kernels/spmerge.cuh
index 96805cb..4734e35 100644
--- a/cubool/sources/cuda/kernels/spmerge.cuh
+++ b/cubool/sources/cuda/kernels/spmerge.cuh
@@ -104,7 +104,6 @@ namespace cubool {
             MatrixType operator()(const MatrixType& a, const MatrixType& b) {
                 auto aNvals = a.m_vals;
                 auto bNvals = b.m_vals;
-                // auto worst = std::min(aNvals, bNvals);
                 auto worst = aNvals + bNvals;
 
                 // Allocate memory for the worst case scenario
diff --git a/cubool/tests/test_matrix_ewisemult_inverted.cpp b/cubool/tests/test_matrix_ewisemult_inverted.cpp
index 2e32520..250ba55 100644
--- a/cubool/tests/test_matrix_ewisemult_inverted.cpp
+++ b/cubool/tests/test_matrix_ewisemult_inverted.cpp
@@ -4,31 +4,6 @@
 
 using DataMatrix = std::vector<std::vector<int>>;
 
-static void printTestingMatrix(const testing::Matrix &matrix, std::string name = "") {
-    if (name != "") {
-        std::cout << name << std::endl;
-    }
-
-    for (int i = 0; i < matrix.nvals; i++) {
-        printf("(%d, %d)\n", matrix.rowsIndex[i], matrix.colsIndex[i]);
-    }
-}
-
-static void printCuboolMatrix(cuBool_Matrix matrix, std::string name = "") {
-    if (name != "") {
-        std::cout << name << std::endl;
-    }
-
-    cuBool_Index nvals;
-    cuBool_Matrix_Nvals(matrix, &nvals);
-    std::vector<cuBool_Index> rows(nvals), cols(nvals);
-    cuBool_Matrix_ExtractPairs(matrix, rows.data(), cols.data(), &nvals);
-
-    for (int i = 0; i < nvals; i++) {
-        printf("(%d, %d)\n", rows[i], cols[i]);
-    }
-}
-
 void testApplyNotMask(const DataMatrix &matrix_data, const DataMatrix &mask_data) {
     cuBool_Index nrows, ncols;
     nrows = matrix_data.size();
diff --git a/deps/nsparse-um/CMakeLists.txt b/deps/nsparse-um/CMakeLists.txt
index cdc4463..91ed084 100644
--- a/deps/nsparse-um/CMakeLists.txt
+++ b/deps/nsparse-um/CMakeLists.txt
@@ -3,7 +3,6 @@ project(nsparse_um LANGUAGES CXX CUDA)
 
 add_library(nsparse_um INTERFACE)
 target_include_directories(nsparse_um INTERFACE include/)
-# target_link_libraries(nsparse_um INTERFACE cub)
 target_link_libraries(nsparse_um INTERFACE)
 target_compile_options(nsparse_um INTERFACE $<$<COMPILE_LANGUAGE:CUDA>: --expt-relaxed-constexpr --expt-extended-lambda>)
 
diff --git a/deps/nsparse-um/include/nsparse/unified_allocator.h b/deps/nsparse-um/include/nsparse/unified_allocator.h
index c15f19e..9c3b2f8 100644
--- a/deps/nsparse-um/include/nsparse/unified_allocator.h
+++ b/deps/nsparse-um/include/nsparse/unified_allocator.h
@@ -3,7 +3,6 @@
 #include <thrust/detail/config.h>
 #include <thrust/device_ptr.h>
 #include <thrust/mr/allocator.h>
-// #include <thrust/memory/detail/device_system_resource.h>
 
 #include <limits>
 #include <stdexcept>
diff --git a/deps/nsparse-um/test/CMakeLists.txt b/deps/nsparse-um/test/CMakeLists.txt
index 6a29690..6a1aaff 100644
--- a/deps/nsparse-um/test/CMakeLists.txt
+++ b/deps/nsparse-um/test/CMakeLists.txt
@@ -3,7 +3,6 @@ project(nsparse_um_test CXX CUDA)
 
 add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp)
 
-# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um cub)
 target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse_um)
 target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)
 
diff --git a/deps/nsparse/test/CMakeLists.txt b/deps/nsparse/test/CMakeLists.txt
index 9927725..5abd415 100644
--- a/deps/nsparse/test/CMakeLists.txt
+++ b/deps/nsparse/test/CMakeLists.txt
@@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.11)
 project(nsparse_test CXX CUDA)
 
 add_executable(${PROJECT_NAME} src/nsparse_test.cu src/utils.cpp)
-# target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse cub)
 target_link_libraries(${PROJECT_NAME} PRIVATE gtest_main nsparse)
 target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)