Merge remote-tracking branch 'origin/main' into dev/add-yml-linter

ahuber21 · ahuber21 · commit 911d49171718 · 2025-12-11T03:04:21.000-08:00
diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml
@@ -30,20 +30,38 @@ concurrency:
 
 jobs:
   build:
-    name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }}
+    name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }}, asan=${{ matrix.asan }}
     runs-on: ubuntu-22.04
     strategy:
       matrix:
         build_type: [RelWithDebugInfo]
         ivf: [OFF, ON]
         cxx: [g++-11, g++-12, clang++-15]
+        asan: [OFF]
+        cmake_extra_args: [-DSVS_BUILD_BINARIES=YES -DSVS_BUILD_EXAMPLES=YES]
+        ctest_args: ['']
         include:
         - cxx: g++-11
           cc: gcc-11
         - cxx: g++-12
           cc: gcc-12
         - cxx: clang++-15
           cc: clang-15
+        - cxx: clang++-18
+          cc: clang-18
+          build_type: Debug
+          ivf: OFF
+          asan: ON
+            # address sanitizer flags
+          cmake_extra_args: >-
+            -DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g'
+            -DCMAKE_C_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g'
+            -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address'
+            -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=address'
+            -DSVS_BUILD_BINARIES=NO
+            -DSVS_BUILD_EXAMPLES=NO
+           # skip longer-running tests
+          ctest_args: -LE long
         exclude:
         - cxx: g++-12
           ivf: ON
@@ -60,6 +78,13 @@ jobs:
         source /opt/intel/oneapi/setvars.sh
         printenv >> $GITHUB_ENV
 
+    - name: Install Clang 18
+      if: matrix.cxx == 'clang++-18'
+      run: |
+        wget https://apt.llvm.org/llvm.sh
+        chmod +x llvm.sh
+        sudo ./llvm.sh 18
+
     - name: Configure build
       working-directory: ${{ runner.temp }}
       env:
@@ -69,12 +94,10 @@ jobs:
       run: |
         cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \
               -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-              -DSVS_BUILD_BINARIES=YES \
               -DSVS_BUILD_TESTS=YES \
-              -DSVS_BUILD_EXAMPLES=YES \
-              -DSVS_EXPERIMENTAL_LEANVEC=YES \
               -DSVS_NO_AVX512=NO \
-              -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }}
+              -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \
+              ${{ matrix.cmake_extra_args }}
 
     - name: Build Tests and Utilities
       working-directory: ${{ runner.temp }}/build
@@ -84,10 +107,11 @@ jobs:
       env:
         CTEST_OUTPUT_ON_FAILURE: 1
       working-directory: ${{ runner.temp }}/build/tests
-      run: ctest -C ${{ matrix.build_type }}
+      run: ctest -C ${{ matrix.build_type }} ${{ matrix.ctest_args }}
 
     - name: Run Cpp Examples
+      if: matrix.asan != 'ON'
       env:
         CTEST_OUTPUT_ON_FAILURE: 1
       working-directory: ${{ runner.temp }}/build/examples/cpp
-      run: ctest -C RelWithDebugInfo
+      run: ctest -C ${{ matrix.build_type }}
diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h
@@ -19,6 +19,7 @@
 #if defined(__i386__) || defined(__x86_64__)
 
 #include <array>
+#include <cstring>
 #include <limits>
 #include <type_traits>
 
@@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> {
     // from float
     static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); }
     static __m256 load(mask_t m, const float* ptr) {
-        // AVX2 doesn't have native masked load, so we load and then blend
-        auto data = _mm256_loadu_ps(ptr);
-        auto zero = _mm256_setzero_ps();
-        auto mask_vec = create_blend_mask_avx2(m);
-        return _mm256_blendv_ps(zero, data, mask_vec);
+        // Full width load with blending may cause out-of-bounds read (SEGV)
+        // Therefore we use _mm256_maskload_ps which safely handles masked loads
+        auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m));
+        return _mm256_maskload_ps(ptr, mask_vec);
     }
 
     // from float16
@@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> {
     }
 
     static __m256 load(mask_t m, const Float16* ptr) {
-        auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr)));
-        auto zero = _mm256_setzero_ps();
-        auto mask_vec = create_blend_mask_avx2(m);
-        return _mm256_blendv_ps(zero, data, mask_vec);
+        // Safe masked load using a temporary buffer to avoid SEGV
+        __m128i buffer = _mm_setzero_si128();
+        std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16));
+        return _mm256_cvtph_ps(buffer);
     }
 
     // from uint8
@@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> {
     }
 
     static __m256 load(mask_t m, const uint8_t* ptr) {
-        auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(
-            _mm_cvtsi64_si128(*(reinterpret_cast<const int64_t*>(ptr)))
-        ));
-        auto zero = _mm256_setzero_ps();
-        auto mask_vec = create_blend_mask_avx2(m);
-        return _mm256_blendv_ps(zero, data, mask_vec);
+        // Safe masked load using a temporary buffer to avoid SEGV
+        int64_t buffer = 0;
+        std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t));
+        return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer)));
     }
 
     // from int8
@@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> {
     }
 
     static __m256 load(mask_t m, const int8_t* ptr) {
-        auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(
-            _mm_cvtsi64_si128(*(reinterpret_cast<const int64_t*>(ptr)))
-        ));
-        auto zero = _mm256_setzero_ps();
-        auto mask_vec = create_blend_mask_avx2(m);
-        return _mm256_blendv_ps(zero, data, mask_vec);
+        // Safe masked load using a temporary buffer to avoid SEGV
+        int64_t buffer = 0;
+        std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t));
+        return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer)));
     }
 
     // We do not need to treat the left or right-hand differently.
diff --git a/include/svs/index/vamana/search_buffer.h b/include/svs/index/vamana/search_buffer.h
@@ -340,7 +340,7 @@ template <typename Idx, typename Cmp = std::less<>> class SearchBuffer {
     /// returns ``true``.
     ///
     bool can_skip(float distance) const {
-        return compare_(back().distance(), distance) && full();
+        return full() && (capacity() == 0 || compare_(back().distance(), distance));
     }
 
     ///
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -37,7 +37,7 @@ set(CMAKE_CXX_STANDARD ${SVS_CXX_STANDARD})
 FetchContent_Declare(
     Catch2
     GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-    GIT_TAG v3.4.0
+    GIT_TAG v3.11.0
 )
 
 FetchContent_MakeAvailable(Catch2)
@@ -230,5 +230,4 @@ target_include_directories(tests PRIVATE ${PROJECT_SOURCE_DIR})
 list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras)
 include(CTest)
 include(Catch)
-catch_discover_tests(tests)
-
+catch_discover_tests(tests ADD_TAGS_AS_LABELS SKIP_IS_FAILURE)
diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp
@@ -18,8 +18,14 @@
 #include "svs/core/distance.h"
 
 // catch 2
+#include "catch2/catch_template_test_macros.hpp"
 #include "catch2/catch_test_macros.hpp"
 
+#include <numeric>
+#include <vector>
+
+#include "svs/lib/avx_detection.h"
+
 namespace {
 
 std::string_view test_table = R"(
@@ -94,3 +100,70 @@ CATCH_TEST_CASE("Distance Utils", "[core][distance][distance_type]") {
         }
     }
 }
+
+CATCH_TEMPLATE_TEST_CASE(
+    "Distance ASan",
+    "[distance][simd][asan]",
+    svs::DistanceL2,
+    svs::DistanceIP,
+    svs::DistanceCosineSimilarity
+) {
+    using Distance = TestType;
+
+    auto run_test = []() {
+        // some full-width AVX2/AVX512 registers plus (crucially) ragged epilogue
+        constexpr size_t size = 64 + 2;
+        std::vector<float> a(size);
+        std::vector<float> b(size);
+
+        std::iota(a.begin(), a.end(), 1.0f);
+        std::iota(b.begin(), b.end(), 2.0f);
+
+        // Ensure no spare capacity
+        a.shrink_to_fit();
+        b.shrink_to_fit();
+
+        auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b));
+        CATCH_REQUIRE(dist >= 0);
+    };
+
+    CATCH_SECTION("Default") { run_test(); }
+
+#ifdef __x86_64__
+    if (svs::detail::avx_runtime_flags.is_avx512vnni_supported()) {
+        CATCH_SECTION("No AVX512VNNI") {
+            auto& mutable_flags =
+                const_cast<svs::detail::AVXRuntimeFlags&>(svs::detail::avx_runtime_flags);
+            auto original = mutable_flags;
+            mutable_flags.avx512vnni = false;
+            run_test();
+            mutable_flags = original;
+        }
+    }
+
+    if (svs::detail::avx_runtime_flags.is_avx512f_supported()) {
+        CATCH_SECTION("No AVX512F") {
+            auto& mutable_flags =
+                const_cast<svs::detail::AVXRuntimeFlags&>(svs::detail::avx_runtime_flags);
+            auto original = mutable_flags;
+            mutable_flags.avx512vnni = false;
+            mutable_flags.avx512f = false;
+            run_test();
+            mutable_flags = original;
+        }
+    }
+
+    if (svs::detail::avx_runtime_flags.is_avx2_supported()) {
+        CATCH_SECTION("No AVX2") {
+            auto& mutable_flags =
+                const_cast<svs::detail::AVXRuntimeFlags&>(svs::detail::avx_runtime_flags);
+            auto original = mutable_flags;
+            mutable_flags.avx512vnni = false;
+            mutable_flags.avx512f = false;
+            mutable_flags.avx2 = false;
+            run_test();
+            mutable_flags = original;
+        }
+    }
+#endif // __x86_64__
+}
diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp
@@ -385,7 +385,7 @@ void test_end_to_end_clustering(
 
 } // namespace
 
-CATCH_TEST_CASE("Random Clustering - End to End", "[inverted][random_clustering]") {
+CATCH_TEST_CASE("Random Clustering - End to End", "[long][inverted][random_clustering]") {
     CATCH_SECTION("Uncompressed Data") {
         auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
         test_end_to_end_clustering(data, svs::DistanceL2(), 1.2f);
diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp
@@ -23,7 +23,7 @@
 #include "tests/utils/test_dataset.h"
 #include <filesystem>
 
-CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") {
+CATCH_TEST_CASE("InvertedIndex Logging Test", "[long][logging]") {
     // Vector to store captured log messages
     std::vector<std::string> captured_logs;
     std::vector<std::string> global_captured_logs;
diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp
@@ -181,7 +181,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") {
     CATCH_REQUIRE(captured_logs[2].find("Batch Size:") != std::string::npos);
 }
 
-CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") {
+CATCH_TEST_CASE("Vamana Index Default Parameters", "[long][parameter][vamana]") {
     using Catch::Approx;
     std::filesystem::path data_path = test_dataset::data_svs_file();
 
diff --git a/tests/svs/index/vamana/multi.cpp b/tests/svs/index/vamana/multi.cpp
@@ -48,7 +48,7 @@ template <typename Distance> float pick_alpha(Distance SVS_UNUSED(dist)) {
 
 CATCH_TEMPLATE_TEST_CASE(
     "Multi-vector dynamic vamana index",
-    "[index][vamana][multi]",
+    "[long][index][vamana][multi]",
     svs::DistanceL2,
     svs::DistanceIP,
     svs::DistanceCosineSimilarity
diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp
@@ -29,4 +29,15 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") {
               << svs::detail::avx_runtime_flags.is_avx512f_supported() << "\n";
     std::cout << "AVX512VNNI: " << std::boolalpha
               << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n";
+
+#ifdef __x86_64__
+    CATCH_SECTION("Patching") {
+        auto& mutable_flags =
+            const_cast<svs::detail::AVXRuntimeFlags&>(svs::detail::avx_runtime_flags);
+        auto original = mutable_flags.avx512f;
+        mutable_flags.avx512f = false;
+        CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false);
+        mutable_flags.avx512f = original;
+    }
+#endif
 }

Original file line number	Diff line number	Diff line change
`@@ -340,7 +340,7 @@ template <typename Idx, typename Cmp = std::less<>> class SearchBuffer {`
`340`	`340`	/// returns ``true``.
`341`	`341`	`///`
`342`	`342`	`bool can_skip(float distance) const {`
`343`		`- return compare_(back().distance(), distance) && full();`
	`343`	`+ return full() && (capacity() == 0 \|\| compare_(back().distance(), distance));`
`344`	`344`	`}`
`345`	`345`
`346`	`346`	`///`
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") {`
`181`	`181`	`CATCH_REQUIRE(captured_logs[2].find("Batch Size:") != std::string::npos);`
`182`	`182`	`}`
`183`	`183`
`184`		`-CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") {`
	`184`	`+CATCH_TEST_CASE("Vamana Index Default Parameters", "[long][parameter][vamana]") {`
`185`	`185`	`using Catch::Approx;`
`186`	`186`	`std::filesystem::path data_path = test_dataset::data_svs_file();`
`187`	`187`