From 3e8820761ebfada8e64ffedfcb45f0866eeff4ec Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 12 Dec 2025 04:40:21 -0800 Subject: [PATCH 1/3] add more pre-commit hooks --- .pre-commit-config.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7771f5cd..a63256b4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,3 +27,13 @@ repos: hooks: - id: check-yaml files: \.ya?ml$ + - id: check-added-large-files + - id: check-illegal-windows-names + - id: check-json + files: \.json$ + - id: check-merge-conflict + - id: check-toml + files: \.toml$ + - id: end-of-file-fixer + - id: trailing-whitespace + - id: mixed-line-ending From 6f5759d33ab94301b80d4d4aa9c368febbe44a9b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 12 Dec 2025 04:41:58 -0800 Subject: [PATCH 2/3] whitespace, end-of-file, end-of-line linter fixes --- .github/CODEOWNERS | 12 +++++------ CMakeLists.txt | 1 - HISTORY.md | 1 - LICENSE | 7 +++---- SECURITY.md | 1 - bindings/cpp/src/api_defs.cpp | 2 +- bindings/python/.gitignore | 1 - bindings/python/CMakeLists.txt | 1 - bindings/python/src/svs/__init__.py | 1 - bindings/python/src/svs/upgrader.py | 1 - bindings/python/tests/__init__.py | 1 - bindings/python/tests/dynamic.py | 1 - bindings/python/tests/test_dynamic_flat.py | 2 +- bindings/python/tests/test_dynamic_vamana.py | 1 - bindings/python/tests/test_reconstruction.py | 1 - bindings/python/tests/test_vamana_common.py | 1 - cmake/FindNuma.cmake | 1 - cmake/openmp.cmake | 1 - cmake/options.cmake | 1 - cmake/patches/tomlplusplus_v330.patch | 4 ++-- data/serialization.toml | 1 - docker/x86_64/manylinux2014/Dockerfile | 1 - .../python/vamana_with_compression_lvq.py | 20 +++++++++---------- include/svs/index/flat/dynamic_flat.h | 2 +- include/svs/index/vamana/build_params.h | 2 +- tests/svs/index/inverted/memory_based.cpp | 2 +- tools/clang-format.sh | 1 - 27 files changed, 26 insertions(+), 45 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8a0f4191..fd2ef793 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,10 +1,10 @@ # C/C++ library code /include/ @ibhati @ahuber21 -/cmake/ @mihaic @ibhati @ahuber21 -/tests/ @mihaic @ibhati @ahuber21 @yuejiaointel -/benchmark/ @mihaic @ibhati @ahuber21 -/tools/ @mihaic @ibhati @ahuber21 -/utils/ @mihaic @ibhati @ahuber21 +/cmake/ @mihaic @ibhati @ahuber21 +/tests/ @mihaic @ibhati @ahuber21 @yuejiaointel +/benchmark/ @mihaic @ibhati @ahuber21 +/tools/ @mihaic @ibhati @ahuber21 +/utils/ @mihaic @ibhati @ahuber21 # Python and other language bindings /bindings/ @ethanglaser @ibhati @ahuber21 @@ -14,7 +14,7 @@ /data/ @ibhati @ahuber21 # Docker and build environment -/docker/ @mihaic @ahuber21 +/docker/ @mihaic @ahuber21 # CI and infra /.github @mihaic @homksei @yuejiaointel @ethanglaser diff --git a/CMakeLists.txt b/CMakeLists.txt index ad9ef0e5..a7aea24c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,4 +177,3 @@ install(FILES ${SVS_CMAKE_FIND_FILES} DESTINATION "${LIB_CONFIG_INSTALL_DIR}" ) - diff --git a/HISTORY.md b/HISTORY.md index 7ca57170..315672c2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -707,4 +707,3 @@ affect performance or users relying on internal APIs. ## Third Party * Bump [fmtlib](https://github.com/fmtlib/fmt) from 9.1.0 to 10.1.1. - diff --git a/LICENSE b/LICENSE index 32c3d339..95fe50cc 100644 --- a/LICENSE +++ b/LICENSE @@ -202,10 +202,10 @@ limitations under the License. ============================================================================ Intel® oneAPI Math Kernel Library (Intel® oneMKL): Copyright 2022 Intel Corporation - + Intel® Scalable Vector Search: Copyright (C) Intel Corporation provides shared library in binary form with optional algorithms implementations that can be used with Scalable Vector Search library - - ------------------------------ + + ------------------------------ Intel Simplified Software License (Version October 2022) Use and Redistribution. You may use and redistribute the software, which is @@ -277,4 +277,3 @@ objections. THE UNITED NATIONS CONVENTION ON CONTRACTS FOR THE INTERNATIONAL SALE OF GOODS (1980) IS SPECIFICALLY EXCLUDED AND WILL NOT APPLY TO THE SOFTWARE. - diff --git a/SECURITY.md b/SECURITY.md index de80538a..8e862752 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -19,4 +19,3 @@ Intel is committed to rapidly addressing security vulnerabilities affecting our ## Reporting a Vulnerability Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). - diff --git a/bindings/cpp/src/api_defs.cpp b/bindings/cpp/src/api_defs.cpp index a845ff80..f76ce0ce 100644 --- a/bindings/cpp/src/api_defs.cpp +++ b/bindings/cpp/src/api_defs.cpp @@ -43,4 +43,4 @@ void Status::destroy_message() noexcept { message_storage_ = nullptr; } } // namespace runtime -} // namespace svs \ No newline at end of file +} // namespace svs diff --git a/bindings/python/.gitignore b/bindings/python/.gitignore index c5749e03..37befcfb 100644 --- a/bindings/python/.gitignore +++ b/bindings/python/.gitignore @@ -2,4 +2,3 @@ _skbuild/ build/ *.egg-info - diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 41aa381b..2733d32a 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -90,4 +90,3 @@ if(DEFINED SKBUILD) # wheel. install(FILES "${CMAKE_CURRENT_LIST_DIR}/../../data/serialization.toml" DESTINATION .) endif() - diff --git a/bindings/python/src/svs/__init__.py b/bindings/python/src/svs/__init__.py index 947c67dc..8a11b26d 100644 --- a/bindings/python/src/svs/__init__.py +++ b/bindings/python/src/svs/__init__.py @@ -30,4 +30,3 @@ # Make the upgrader available without explicit import. from . import upgrader - diff --git a/bindings/python/src/svs/upgrader.py b/bindings/python/src/svs/upgrader.py index 908eefef..4d731124 100644 --- a/bindings/python/src/svs/upgrader.py +++ b/bindings/python/src/svs/upgrader.py @@ -297,4 +297,3 @@ def upgrade( ) print(f"File {path} is up to date!") - diff --git a/bindings/python/tests/__init__.py b/bindings/python/tests/__init__.py index f8db5064..f7059a1a 100644 --- a/bindings/python/tests/__init__.py +++ b/bindings/python/tests/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/bindings/python/tests/dynamic.py b/bindings/python/tests/dynamic.py index 3d79dc95..6ce957ca 100644 --- a/bindings/python/tests/dynamic.py +++ b/bindings/python/tests/dynamic.py @@ -81,4 +81,3 @@ def ground_truth(self, num_neighbors: int): index = svs.Flat(sub_dataset, svs.DistanceType.L2, self.num_threads) I, D = index.search(self.queries, num_neighbors) return ids_np[I] - diff --git a/bindings/python/tests/test_dynamic_flat.py b/bindings/python/tests/test_dynamic_flat.py index 57a4e8f5..ff830421 100644 --- a/bindings/python/tests/test_dynamic_flat.py +++ b/bindings/python/tests/test_dynamic_flat.py @@ -72,7 +72,7 @@ def recall_check( print(f"Reloaded index has {len(reloaded.all_ids())} IDs") print(f"Original IDs sample: {sorted(list(index.all_ids()))[:10]}") print(f"Reloaded IDs sample: {sorted(list(reloaded.all_ids()))[:10]}") - + I, D = reloaded.search(reference.queries, num_neighbors) reloaded_recall = svs.k_recall_at(gt, I, num_neighbors, num_neighbors) diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 5580e045..0c286bcc 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -201,4 +201,3 @@ def test_build_from_loader(self): recall = svs.k_recall_at(groundtruth, I, k, k) # Recall in plausible range self.assertTrue(0.5 < recall <= 1.0) - diff --git a/bindings/python/tests/test_reconstruction.py b/bindings/python/tests/test_reconstruction.py index c8e0e08b..e5744307 100644 --- a/bindings/python/tests/test_reconstruction.py +++ b/bindings/python/tests/test_reconstruction.py @@ -90,4 +90,3 @@ def test_reconstruction(self): self.assertTrue(np.array_equal(shuffled_data, r)) else: raise Exception(f"Unhandled loader kind: {loader}") - diff --git a/bindings/python/tests/test_vamana_common.py b/bindings/python/tests/test_vamana_common.py index 6e61e975..a9194505 100644 --- a/bindings/python/tests/test_vamana_common.py +++ b/bindings/python/tests/test_vamana_common.py @@ -64,4 +64,3 @@ def test_vamana_search_parameters(self): x = svs.VamanaSearchParameters(svs.SearchBufferConfig(10, 20), True) self.assertEqual(x.buffer_config, svs.SearchBufferConfig(10, 20)) self.assertEqual(x.search_buffer_visited_set, True) - diff --git a/cmake/FindNuma.cmake b/cmake/FindNuma.cmake index a187c69b..ea78df43 100644 --- a/cmake/FindNuma.cmake +++ b/cmake/FindNuma.cmake @@ -49,4 +49,3 @@ endif() mark_as_advanced(NUMA_INCLUDE_DIRS NUMA_LIBRARY_DIR NUMA_LIBRARY) find_package_handle_standard_args(Numa REQUIRED_VARS NUMA_INCLUDE_DIRS NUMA_LIBRARY) - diff --git a/cmake/openmp.cmake b/cmake/openmp.cmake index 61bcc4c2..bad38005 100644 --- a/cmake/openmp.cmake +++ b/cmake/openmp.cmake @@ -18,4 +18,3 @@ if (OPENMP_CXX_FOUND) else() message(FATAL_ERROR "no OpenMP support") endif() - diff --git a/cmake/options.cmake b/cmake/options.cmake index 2f9dd416..15d6c22b 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -208,4 +208,3 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(svs_compile_options INTERFACE -Wno-uninitialized) endif() endif() - diff --git a/cmake/patches/tomlplusplus_v330.patch b/cmake/patches/tomlplusplus_v330.patch index 0b277951..14514fbb 100644 --- a/cmake/patches/tomlplusplus_v330.patch +++ b/cmake/patches/tomlplusplus_v330.patch @@ -4,7 +4,7 @@ index 1179d37..59cf878 100644 +++ b/CMakeLists.txt @@ -31,7 +31,8 @@ target_include_directories( target_compile_features(tomlplusplus_tomlplusplus INTERFACE cxx_std_17) - + # ---- Install rules and examples ---- -if(PROJECT_IS_TOP_LEVEL) +option(TOMLPLUSPLUS_INSTALL "Enable cmake installation" OFF) @@ -19,7 +19,7 @@ index 4b754b7..5ebc665 100644 @@ -134,6 +134,18 @@ TOML_IMPL_NAMESPACE_START return node_ptr{ make_node_impl(static_cast(val), flags) }; } - + + TOML_NODISCARD + inline node_ptr make_node(node_ptr&& val, value_flags flags = preserve_source_value_flags) + { diff --git a/data/serialization.toml b/data/serialization.toml index 54655cd5..24554ef0 100644 --- a/data/serialization.toml +++ b/data/serialization.toml @@ -321,4 +321,3 @@ schema_version = "v0.0.0" graph = "str" queries_f32 = "str" queries_in_training_set = "int" - diff --git a/docker/x86_64/manylinux2014/Dockerfile b/docker/x86_64/manylinux2014/Dockerfile index 78726782..1935f6ae 100644 --- a/docker/x86_64/manylinux2014/Dockerfile +++ b/docker/x86_64/manylinux2014/Dockerfile @@ -26,4 +26,3 @@ RUN yum --disablerepo=epel install -y \ # Enable CMake to find the config files for Intel(R) MKL. COPY ./entry.sh . ENTRYPOINT ["./entry.sh"] - diff --git a/examples/python/vamana_with_compression_lvq.py b/examples/python/vamana_with_compression_lvq.py index 47fc47b4..80530ca8 100644 --- a/examples/python/vamana_with_compression_lvq.py +++ b/examples/python/vamana_with_compression_lvq.py @@ -60,7 +60,7 @@ def run(): svs.DataType.float32 ) - # Next - we construct a LVQLoader which is configured to use LVQ compression with 4 + # Next - we construct a LVQLoader which is configured to use LVQ compression with 4 # bits for the primary and 8 bits for the residual quantization. B1 = 4 # Number of bits for the first level LVQ quantization B2 = 8 # Number of bits for the residuals quantization @@ -69,9 +69,9 @@ def run(): residual=B2, ) # [create-loader] - + # An index can be constructed using a LeanVec dataset. - # [build-parameters] + # [build-parameters] parameters = svs.VamanaBuildParameters( graph_max_degree = 64, window_size = 128, @@ -89,19 +89,19 @@ def run(): # Set the search window size of the index and perform queries and load the queries. # [perform-queries] - n_neighbors = 10 + n_neighbors = 10 index.search_window_size = 20 - index.num_threads = 4 + index.num_threads = 4 - queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs")) + queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs")) I, D = index.search(queries, n_neighbors) # [perform-queries] # Compare with the groundtruth. # [recall] - groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs")) + groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs")) recall = svs.k_recall_at(groundtruth, I, n_neighbors, n_neighbors) - print(f"Recall = {recall}") + print(f"Recall = {recall}") # [recall] assert_equal(recall, 0.953) @@ -112,10 +112,10 @@ def run(): os.path.join(test_data_dir, "example_graph"), os.path.join(test_data_dir, "example_data"), ) - + index = svs.Vamana( os.path.join(test_data_dir, "example_config"), - os.path.join(test_data_dir, "example_graph"), + os.path.join(test_data_dir, "example_graph"), os.path.join(test_data_dir, "example_data"), svs.DistanceType.L2, num_threads = 4, diff --git a/include/svs/index/flat/dynamic_flat.h b/include/svs/index/flat/dynamic_flat.h index 5a83fe22..868054ba 100644 --- a/include/svs/index/flat/dynamic_flat.h +++ b/include/svs/index/flat/dynamic_flat.h @@ -767,4 +767,4 @@ auto auto_dynamic_assemble( ); } -} // namespace svs::index::flat \ No newline at end of file +} // namespace svs::index::flat diff --git a/include/svs/index/vamana/build_params.h b/include/svs/index/vamana/build_params.h index 65b5039c..80c1df23 100644 --- a/include/svs/index/vamana/build_params.h +++ b/include/svs/index/vamana/build_params.h @@ -130,4 +130,4 @@ struct VamanaBuildParameters { ); } }; -} // namespace svs::index::vamana \ No newline at end of file +} // namespace svs::index::vamana diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index ad7d01b4..d418dd83 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -72,4 +72,4 @@ CATCH_TEST_CASE("InvertedIndex Logging Test", "[long][logging]") { CATCH_REQUIRE(global_captured_logs.empty()); CATCH_REQUIRE(captured_logs[0].find("Vamana Build Parameters:") != std::string::npos); CATCH_REQUIRE(captured_logs[1].find("Number of syncs") != std::string::npos); -} \ No newline at end of file +} diff --git a/tools/clang-format.sh b/tools/clang-format.sh index 7646ae5a..f87a7cdd 100755 --- a/tools/clang-format.sh +++ b/tools/clang-format.sh @@ -22,4 +22,3 @@ for i in "${DIRECTORIES[@]}" do find "./$i" \( -iname "*.h" -o -iname "*.cpp" \) ! -iname "*toml_impl.h" | xargs "$CLANGFORMAT" -i done - From 5b4d925326ccd3296eac4499280bf1b70be63f06 Mon Sep 17 00:00:00 2001 From: Andreas Huber <9201869+ahuber21@users.noreply.github.com> Date: Mon, 15 Dec 2025 17:57:46 +0100 Subject: [PATCH 3/3] Update .pre-commit-config.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mihai Capotă --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a63256b4..24447613 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,4 +36,5 @@ repos: files: \.toml$ - id: end-of-file-fixer - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] - id: mixed-line-ending