Malkovsky · RSMT98 · Dec 10, 2025 · Dec 19, 2025 · Dec 21, 2025 · Dec 22, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,6 +22,83 @@ if(ENABLE_ADDRESS_SANITIZER)
     message(STATUS "AddressSanitizer is ON")
 endif()
 
+set(SDSL_ROOT "" CACHE PATH "Installation prefix of sdsl-lite (contains include/ and lib/)")
+
+find_path(
+    SDSL_INCLUDE_DIR
+    NAMES sdsl/config.hpp
+    HINTS
+        "${SDSL_ROOT}"
+    PATH_SUFFIXES
+        include
+    PATHS
+        "$ENV{HOME}"
+        /usr
+        /usr/local
+)
+
+if(NOT SDSL_INCLUDE_DIR)
+    message(FATAL_ERROR "Could not find sdsl/config.hpp. "
+                        "Set SDSL_ROOT or SDSL_INCLUDE_DIR manually.")
+endif()
+
+find_library(
+    SDSL_LIBRARY
+    NAMES sdsl
+    HINTS
+        "${SDSL_ROOT}"
+    PATH_SUFFIXES
+        lib
+        lib64
+    PATHS
+        "$ENV{HOME}"
+        /usr
+        /usr/local
+)
+
+find_library(
+    DIVSUFSORT_LIBRARY
+    NAMES divsufsort
+    HINTS
+        "${SDSL_ROOT}"
+    PATH_SUFFIXES
+        lib
+        lib64
+    PATHS
+        "$ENV{HOME}"
+        /usr
+        /usr/local
+)
+
+find_library(
+    DIVSUFSORT64_LIBRARY
+    NAMES divsufsort64
+    HINTS
+        "${SDSL_ROOT}"
+    PATH_SUFFIXES
+        lib
+        lib64
+    PATHS
+        "$ENV{HOME}"
+        /usr
+        /usr/local
+)
+
+if(NOT SDSL_LIBRARY OR NOT DIVSUFSORT_LIBRARY OR NOT DIVSUFSORT64_LIBRARY)
+    message(FATAL_ERROR "Could not find sdsl-lite libraries (sdsl, divsufsort, divsufsort64). "
+                        "Set SDSL_ROOT or SDSL_LIBRARY*/DIVSUFSORT* manually.")
+endif()
+
+add_library(sdsl_lite INTERFACE)
+target_include_directories(sdsl_lite INTERFACE "${SDSL_INCLUDE_DIR}")
+target_link_libraries(sdsl_lite INTERFACE
+    "${SDSL_LIBRARY}"
+    "${DIVSUFSORT_LIBRARY}"
+    "${DIVSUFSORT64_LIBRARY}"
+)
+message(STATUS "Found sdsl-lite includes in: ${SDSL_INCLUDE_DIR}")
+message(STATUS "Found sdsl-lite libs: ${SDSL_LIBRARY}; ${DIVSUFSORT_LIBRARY}; ${DIVSUFSORT64_LIBRARY}")
+
 include(FetchContent)
 FetchContent_Declare(
     googletest
@@ -103,6 +180,17 @@ target_link_libraries(test_rmm
     gtest
     gtest_main)
 
+add_executable(bench_rmm_sdsl
+    src/bench_rmm_sdsl.cpp
+)
+target_include_directories(bench_rmm_sdsl
+    PUBLIC include
+)
+target_link_libraries(bench_rmm_sdsl
+    PRIVATE
+        benchmark
+        sdsl_lite
+)
 
 FetchContent_Declare(
     doxygen-awesome-css

diff --git a/README.md b/README.md
@@ -70,7 +70,14 @@ Benchmarks are random 50/50 0-1 bitvectors up to $2^{34}$ bits.
 ./bench_rmm
 ```
 
-For visualization, write the JSON output to a file using `--benchmark_out=<file>` (e.g. `./bench_rmm --benchmark_out=rmm_bench.json`) and plot it with `misc/plot_rmm.py`.
+For comparison with range min-max tree implementation from [sdsl-lite](https://github.com/simongog/sdsl-lite) (Release build required: `cmake .. -DCMAKE_BUILD_TYPE=Release`):
+
+```bash
+sudo cpupower frequency-set --governor performance
+./bench_rmm_sdsl --benchmark_out=rmm_bench_sdsl.json
+```
+
+For visualization, write the JSON output to a file using `--benchmark_out=<file>` (e.g. `./bench_rmm --benchmark_out=rmm_bench.json`) and plot it with `misc/plot_rmm.py` (add `--sdsl-json rmm_bench_sdsl.json` for comparison).
 
 ---
 

diff --git a/REPORT.md b/REPORT.md
diff --git a/include/bits.h b/include/bits.h
@@ -4,7 +4,9 @@
 
 #include <array>
 #include <bit>
+#include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <numeric>
 
 #if defined(__AVX512VPOPCNTDQ__) && defined(__AVX512F__) && \
@@ -17,7 +19,7 @@
 // Lookup table for 4-bit popcount
 // This table maps each 4-bit value (0-15) to its population count
 // clang-format off
-const __m256i lookup_popcount_4 = _mm256_setr_epi8(
+static inline const __m256i lookup_popcount_4 = _mm256_setr_epi8(
     0, 1, 1, 2,  // 0000, 0001, 0010, 0011
     1, 2, 2, 3,  // 0100, 0101, 0110, 0111
     1, 2, 2, 3,  // 1000, 1001, 1010, 1011
@@ -30,7 +32,7 @@ const __m256i lookup_popcount_4 = _mm256_setr_epi8(
     2, 3, 3, 4   // 1100, 1101, 1110, 1111
 );
 
-const __m256i mask_first_half = _mm256_setr_epi8(
+static inline const __m256i mask_first_half = _mm256_setr_epi8(
     0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF,
@@ -405,108 +407,3 @@ void rank_32x8(const uint8_t* x, uint8_t* result) {
   }
 #endif
 }
-
-/**
- * @brief Efficiently searches for the first occurrence of a 16-bit value in
- * the range [@p begin, @p end_excl) using AVX2 when available.
- * @details Loads 16 consecutive int16_t elements (256 bits) per iteration.
- * Compares them against the @p target value using vectorized equality.
- * If any match is found, extracts the index of the first matching lane from
- * the comparison mask. Falls back to a scalar tail loop for leftover
- * elements, or to a fully scalar search if AVX2 is not supported.
- * @returns The index of the first match, or @p npos if the value is not found.
- */
-static inline size_t find_forward_equal_i16_avx2(const int16_t* arr,
-                                                 const size_t& begin,
-                                                 const size_t& end_excl,
-                                                 const int16_t& target,
-                                                 const size_t& npos) noexcept {
-#ifdef PIXIE_AVX2_SUPPORT
-  static constexpr size_t STEP = 16;
-  __m256i vtarget = _mm256_set1_epi16(target);
-  size_t i = begin;
-  size_t n = end_excl;
-  for (; i + STEP <= n; i += STEP) {
-    unsigned mask = _mm256_movemask_epi8(_mm256_cmpeq_epi16(
-        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(arr + i)),
-        vtarget));
-    if (mask) {
-      return i + (std::countr_zero(mask) >> 1);
-    }
-  }
-  for (; i < n; ++i) {
-    if (arr[i] == target) {
-      return i;
-    }
-  }
-#else
-  for (size_t i = begin; i < end_excl; ++i) {
-    if (arr[i] == target) {
-      return i;
-    }
-  }
-#endif
-  return npos;
-}
-
-/**
- * @brief Performs a backward search for a 16-bit value in a given range.
- * @details Scans the array segment [@p begin .. @p end_incl] from right to
- * left.
- * If AVX2 is available, processes data in 256-bit blocks (16 × int16_t) using
- * vectorized equality comparison for higher throughput. Falls back to a
- * scalar backward scan when AVX2 is not supported. Returns the index of the
- * rightmost occurrence of @p target, or @p npos if no match is found.
- */
-static inline size_t find_backward_equal_i16_avx2(const int16_t* arr,
-                                                  const size_t& begin,
-                                                  const size_t& end_incl,
-                                                  const int16_t& target,
-                                                  const size_t& npos) noexcept {
-  if (begin > end_incl) {
-    return npos;
-  }
-#ifdef PIXIE_AVX2_SUPPORT
-  static constexpr size_t STEP = 16;
-  size_t len = end_incl + 1 - begin;
-  size_t nblocks = len / STEP;
-  __m256i vtarget = _mm256_set1_epi16(target);
-  if (nblocks > 0) {
-    size_t first_block = begin + (len % STEP);
-    for (size_t p = first_block + (nblocks - 1) * STEP;;) {
-      unsigned mask = _mm256_movemask_epi8(_mm256_cmpeq_epi16(
-          _mm256_loadu_si256(reinterpret_cast<const __m256i*>(arr + p)),
-          vtarget));
-      if (mask) {
-        return p + ((31u - std::countl_zero(mask)) >> 1);
-      }
-      if (p == first_block) {
-        break;
-      }
-      p -= STEP;
-    }
-
-    for (size_t i = first_block; i > begin;) {
-      --i;
-      if (arr[i] == target) {
-        return i;
-      }
-    }
-  } else {
-    for (size_t i = end_incl + 1; i > begin;) {
-      --i;
-      if (arr[i] == target) {
-        return i;
-      }
-    }
-  }
-#else
-  for (size_t i = end_incl + 1; i > begin;) {
-    --i;
-    if (arr[i] == target) {
-      return i;
-    }
-  }
-#endif
-  return npos;
-}