lightonai
diff --git a/‎.github/workflows/publish-280.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish-280.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/publish-290.yaml‎
Lines changed: 88 additions & 0 deletions b/‎.github/workflows/publish-290.yaml‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 2 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Makefile‎
Lines changed: 3 additions & 3 deletions b/‎Makefile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 24 additions & 9 deletions b/‎README.md‎
Lines changed: 24 additions & 9 deletions
diff --git a/‎benchmark/benchmark.py‎
Lines changed: 5 additions & 70 deletions b/‎benchmark/benchmark.py‎
Lines changed: 5 additions & 70 deletions
@@ -3,7 +3,7 @@ on:
 
 jobs:
   build_wheels:
-    name: Build wheels for Python ${{ matrix.python-version }} on ${{ matrix.os }} - 271
+    name: Build wheels for Python ${{ matrix.python-version }} on ${{ matrix.os }} - 280
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
 
@@ -0,0 +1,88 @@
+on:
+  workflow_dispatch:
+
+jobs:
+  build_wheels:
+    name: Build wheels for Python ${{ matrix.python-version }} on ${{ matrix.os }} - 290
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Prepare pyproject.toml
+        shell: bash
+        run: |
+          echo "Deleting existing pyproject.toml (if any)..."
+          rm -f pyproject.toml
+          echo "Using ci-290.toml as pyproject.toml..."
+          cp ci-290.toml pyproject.toml
+          echo "File preparation complete."
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Prepare Python Version for CIBW_BUILD
+        id: prepare_python_version
+        shell: bash
+        run: |
+          PYTHON_VERSION_NO_DOT=$(echo "${{ matrix.python-version }}" | tr -d '.')
+          echo "PYTHON_VERSION_NO_DOT=$PYTHON_VERSION_NO_DOT" >> $GITHUB_OUTPUT
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.23.3
+        env:
+          CIBW_BUILD: "cp${{ steps.prepare_python_version.outputs.PYTHON_VERSION_NO_DOT }}-*"
+          CIBW_SKIP: "*-manylinux_i686 *-musllinux_* *-win32"
+          CIBW_BUILD_VERBOSITY: 1
+          LIBTORCH_BYPASS_VERSION_CHECK: 1
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
+          CIBW_MANYLINUX_ARM64_IMAGE: manylinux_2_28
+          CIBW_ENVIRONMENT: |
+            PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu/torch_stable.html"
+          CIBW_PIP_ARGS: --no-cache-dir
+          CIBW_BEFORE_BUILD: "pip install torch==2.9.0 numpy maturin delvewheel"
+          CIBW_REPAIR_WHEEL_COMMAND_LINUX: >
+            LD_LIBRARY_PATH=$(python -c 'import torch, os; print(os.path.join(os.path.dirname(torch.__file__), "lib"))'):$LD_LIBRARY_PATH auditwheel repair -w {dest_dir} {wheel} --exclude libtorch.so --exclude libtorch_cpu.so --exclude libtorch_python.so
+          CIBW_REPAIR_WHEEL_COMMAND_MACOS: >
+            DYLD_LIBRARY_PATH=$(python -c 'import torch, os; print(os.path.join(os.path.dirname(torch.__file__), "lib"))') delocate-wheel -w {dest_dir} -v {wheel} --exclude libtorch.dylib --exclude libtorch_cpu.dylib --exclude libtorch_python.dylib
+          CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: >-
+            FOR /F "usebackq tokens=*" %i IN (`python -c "import torch, os; print(os.path.join(os.path.dirname(torch.__file__), 'lib'))"`) DO (set "PATH=%i;%PATH%" && delvewheel repair -w {dest_dir} {wheel} --no-dll torch.dll --no-dll torch_cpu.dll --no-dll torch_python.dll)
+
+      - name: Upload wheels to artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-${{ matrix.os }}-py${{ matrix.python-version }}
+          path: ./wheelhouse/*.whl
+
+  publish:
+    name: Publish 290 to PyPI
+    needs: build_wheels
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Download all wheels
+        uses: actions/download-artifact@v4
+        with:
+          pattern: wheels-*-py*
+          path: dist
+          merge-multiple: true
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@v1.12.4
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
+          skip-existing: true
@@ -156,4 +156,5 @@ quora/
 nq/
 dbpedia-entity/
 hotpotqa/
-msmarco/
+msmarco/
+index/
@@ -1,6 +1,6 @@
 [package]
 name = "fast_plaid_rust"
-version = "1.2.4"
+version = "1.2.5"
 edition = "2021"
 build = "build.rs"
 
@@ -18,7 +18,7 @@ serde_json = "1.0.140"
 libc = "0.2.172"
 parking_lot = "0.12.3"
 once_cell = "1.21.3"
-indicatif = "0.17.11"
+indicatif = "0.18.2"
 pyo3 = { version = "0.24.2", features = ["extension-module"] }
 pyo3-tch = "0.20.0"
 rand = "0.9.1"
 
@@ -1,16 +1,16 @@
 lint:
 	cargo clean
-	uv pip install torch==2.8.0
+	uv pip install torch==2.9.0
 	uv run --extra dev pre-commit run --files python/**/**/**.py
 
 install:
 	cargo clean
-	uv pip install torch==2.8.0
+	uv pip install torch==2.9.0
 	uv pip install -e ".[dev]"
 
 test:
 	cargo clean
 	uv run tests/test.py
 
 evaluate:
-	uv run test.py
+	uv run benchmark/benchmark.py
@@ -44,10 +44,11 @@ FastPlaid is available in multiple versions to support different PyTorch version
 
 | FastPlaid Version | PyTorch Version | Installation Command                |
 | ----------------- | --------------- | ----------------------------------- |
-| 1.2.4.280         | 2.8.0           | `pip install fast-plaid==1.2.4.280` |
-| 1.2.4.271         | 2.7.1           | `pip install fast-plaid==1.2.4.271` |
-| 1.2.4.270         | 2.7.0           | `pip install fast-plaid==1.2.4.270` |
-| 1.2.4.260         | 2.6.0           | `pip install fast-plaid==1.2.4.260` |
+| 1.2.5.290         | 2.9.0           | `pip install fast-plaid==1.2.5.290` |
+| 1.2.5.280         | 2.8.0           | `pip install fast-plaid==1.2.5.280` |
+| 1.2.5.271         | 2.7.1           | `pip install fast-plaid==1.2.5.271` |
+| 1.2.5.270         | 2.7.0           | `pip install fast-plaid==1.2.5.270` |
+| 1.2.5.260         | 2.6.0           | `pip install fast-plaid==1.2.5.260` |
 
 ### Adding FastPlaid as a Dependency
 
@@ -56,23 +57,23 @@ You can add FastPlaid to your project dependencies with version ranges to ensure
 **For requirements.txt:**
 
 ```
-fast-plaid>=1.2.4.260,<=1.2.4.280
+fast-plaid>=1.2.5.260,<=1.2.5.290
 ```
 
 **For pyproject.toml:**
 
 ```toml
 [project]
 dependencies = [
-    "fast-plaid>=1.2.4.260,<=1.2.4.280"
+    "fast-plaid>=1.2.5.260,<=1.2.5.290"
 ]
 ```
 
 **For setup.py:**
 
 ```python
 install_requires=[
-    "fast-plaid>=1.2.4.260,<=1.2.4.280"
+    "fast-plaid>=1.2.5.260,<=1.2.5.290"
 ]
 ```
 
@@ -316,6 +317,7 @@ class FastPlaid:
         self,
         index: str,
         device: str | list[str] | None = None,
+        preload_index: bool = True,
     ) -> None:
 ```
 
@@ -331,6 +333,11 @@ device: str | list[str] | None = None
     - Can be a list of device strings (e.g., ["cuda:0", "cuda:1"]).
     - If multiple GPUs are specified and available, multiprocessing is automatically set up for parallel execution.
       Remember to include your code within an `if __name__ == "__main__":` block for proper multiprocessing behavior.
+
+preload_index: bool = True (optional)
+    If `True`, the index will be loaded into memory upon initialization. This can
+    speed up the first search operation by "warming up" the index. If `False`,
+    the index will be loaded when doing the search and unloaded afterward.
 ```
 
 ### Creating an Index
@@ -345,6 +352,7 @@ The **`create` method** builds the multi-vector index from your document embeddi
         max_points_per_centroid: int = 256,
         nbits: int = 4,
         n_samples_kmeans: int | None = None,
+        batch_size: int = 25_000,
         seed: int = 42,
         use_triton_kmeans: bool | None = None,
         metadata: list[dict[str, Any]] | None = None,
@@ -376,6 +384,9 @@ n_samples_kmeans: int | None = None (optional)
     clustering quality. If you have a large dataset, you might want to set this to a
     smaller value to speed up the indexing process and save some memory.
 
+batch_size: int = 25_000 (optional)
+    Batch size for processing embeddings during index creation.
+
 seed: int = 42 (optional)
     Seed for the random number generator used in index creation.
     Setting this ensures reproducible results across multiple runs.
@@ -402,6 +413,7 @@ The **`update` method** provides an efficient way to add new documents to an exi
         self,
         documents_embeddings: list[torch.Tensor] | torch.Tensor,
         metadata: list[dict[str, Any]] | None = None,
+        batch_size: int = 25_000,
     ) -> "FastPlaid":
 ```
 
@@ -416,6 +428,9 @@ metadata: list[dict[str, Any]] | None = None
     Each dictionary can contain arbitrary key-value pairs that you want to associate with the document.
     If provided, the length of this list must match the number of new documents being added.
     The metadata will be stored in a SQLite database within the index directory for filtering during searches.
+
+batch_size: int = 25_000 (optional)
+    Batch size for processing embeddings during the update.
 ```
 
 ### Searching the Index
@@ -427,7 +442,7 @@ The **`search` method** lets you query the created index with your query embeddi
         self,
         queries_embeddings: torch.Tensor | list[torch.Tensor],
         top_k: int = 10,
-        batch_size: int = 1 << 18,
+        batch_size: int = 25_000,
         n_full_scores: int = 4096,
         n_ivf_probe: int = 8,
         show_progress: bool = True,
@@ -444,7 +459,7 @@ queries_embeddings: torch.Tensor | list[torch.Tensor]
 top_k: int = 10 (optional)
     The number of top-scoring documents to retrieve for each query.
 
-batch_size: int = 1 << 18 (optional)
+batch_size: int = 25_000 (optional)
     The internal batch size used for processing queries.
     A larger batch size might improve throughput on powerful GPUs but can consume more memory.
 
 
@@ -9,6 +9,8 @@
 from fast_plaid import evaluation, search
 from pylate import models
 
+print("Torch version:", torch.__version__)
+
 parser = argparse.ArgumentParser(
     description="Run Fast-PLAiD evaluation on a BEIR dataset."
 )
@@ -88,11 +90,11 @@
 
 large_queries_embeddings = torch.cat(
     ([queries_embeddings] * ((1000 // queries_embeddings.shape[0]) + 1))[:1000]
-)
+).to("cpu")
 
 print(f"🔍 50_000 queries on {dataset_name}...")
 start_search = time.time()
-_ = index.search(queries_embeddings=large_queries_embeddings)
+_ = index.search(queries_embeddings=large_queries_embeddings, top_k=10, n_full_scores=4096, n_ivf_probe=8)
 end_search = time.time()
 heavy_search_time = end_search - start_search
 queries_per_second = large_queries_embeddings.shape[0] / heavy_search_time
@@ -139,71 +141,4 @@
 with open(output_filepath, "w") as f:
     json.dump(output_data, f, indent=4)
 
-print(f"🎉 Finished evaluation for dataset: {dataset_name}\n")
-
-# Pylate
-
-from pylate import evaluation, indexes, retrieve
-
-index = indexes.PLAID(
-    override=True,
-    index_name=f"{dataset_name}_pylate",
-    embedding_size=96,
-    nbits=4,
-)
-
-retriever = retrieve.ColBERT(index=index)
-
-start = time.time()
-index.add_documents(
-    documents_ids=[document["id"] for document in documents],
-    documents_embeddings=documents_embeddings,
-)
-end = time.time()
-indexing_time = end - start
-print(f"🏗️  Pylate index on {dataset_name}: {end - start:.2f} seconds")
-
-start = time.time()
-scores = retriever.retrieve(queries_embeddings=queries_embeddings, k=20)
-end = time.time()
-search_time = end - start
-print(f"🔍 Pylate search on {dataset_name}: {search_time:.2f} seconds")
-
-
-start = time.time()
-_ = retriever.retrieve(queries_embeddings=large_queries_embeddings, k=20)
-end = time.time()
-heavy_search_time = end - start
-queries_per_second = large_queries_embeddings.shape[0] / heavy_search_time
-
-for (query_id, query), query_scores in zip(queries.items(), scores):
-    for score in query_scores:
-        if score["id"] == query_id:
-            # Remove the query_id from the score
-            query_scores.remove(score)
-
-evaluation_scores = evaluation.evaluate(
-    scores=scores,
-    qrels=qrels,
-    queries=list(queries.values()),
-    metrics=["map", "ndcg@10", "ndcg@100", "recall@10", "recall@100"],
-)
-
-print(f"\n--- 📈 Final Scores for {dataset_name} (Pylate) ---")
-print(evaluation_scores)
-
-output_data = {
-    "dataset": dataset_name,
-    "indexing": round(indexing_time, 3),
-    "search": round(search_time, 3),
-    "qps": round(queries_per_second, 2),
-    "size": len(documents),
-    "queries": num_queries,
-    "scores": evaluation_scores,
-}
-
-output_filepath = os.path.join(output_dir, f"{dataset_name}_pylate.json")
-with open(output_filepath, "w") as f:
-    json.dump(output_data, f, indent=4)
-
-print(f"💾 Exporting Pylate results to {output_filepath}")
+print(f"🎉 Finished evaluation for dataset: {dataset_name}\n")