From d8a31cd676300d3956716d942bd90b9517d291ba Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Thu, 25 Sep 2025 11:41:02 -0700 Subject: [PATCH 01/21] introduce vector provider call to get scorer --- .../apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java | 9 +++++++++ .../lucene104/Lucene104ScalarQuantizedVectorsFormat.java | 4 ++-- .../lucene104/Lucene104ScalarQuantizedVectorsReader.java | 5 +++-- .../lucene104/Lucene104ScalarQuantizedVectorsWriter.java | 5 +++-- .../vectorization/DefaultVectorizationProvider.java | 6 ++++++ .../internal/vectorization/VectorizationProvider.java | 3 +++ .../vectorization/PanamaVectorizationProvider.java | 8 ++++++++ 7 files changed, 34 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java index 123c18e00c08..2faa1b0a92fd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java @@ -41,4 +41,13 @@ public static FlatVectorsScorer getLucene99FlatVectorsScorer() { public static FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { return IMPL.getLucene99ScalarQuantizedVectorsScorer(); } + + /** + * Returns a FlatVectorsScorer that supports the Lucene104 scalar quantized format. Scorers + * retrieved through this method may be optimized on certain platforms. Otherwise, a + * DefaultFlatVectorScorer is returned. + */ + public static FlatVectorsScorer getLucene104ScalarQuantizedFlatVectorsScorer() { + return IMPL.getLucene104ScalarQuantizedVectorsScorer(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java index 44e1ef092c12..8198f4ef24f3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java @@ -21,6 +21,7 @@ import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; @@ -105,8 +106,7 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat { private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); - private static final Lucene104ScalarQuantizedVectorScorer scorer = - new Lucene104ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); + private static final FlatVectorsScorer scorer = FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer(); private final ScalarEncoding encoding; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java index 8012d6095c27..5d3a4b22ace9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java @@ -27,6 +27,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding; import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; import org.apache.lucene.index.ByteVectorValues; @@ -66,12 +67,12 @@ class Lucene104ScalarQuantizedVectorsReader extends FlatVectorsReader private final Map fields = new HashMap<>(); private final IndexInput quantizedVectorData; private final FlatVectorsReader rawVectorsReader; - private final Lucene104ScalarQuantizedVectorScorer vectorScorer; + private final FlatVectorsScorer vectorScorer; Lucene104ScalarQuantizedVectorsReader( SegmentReadState state, FlatVectorsReader rawVectorsReader, - Lucene104ScalarQuantizedVectorScorer vectorsScorer) + FlatVectorsScorer vectorsScorer) throws IOException { super(vectorsScorer); this.vectorScorer = vectorsScorer; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java index 4238aed03600..84481c0a8e9a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java @@ -32,6 +32,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding; import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; @@ -67,7 +68,7 @@ public class Lucene104ScalarQuantizedVectorsWriter extends FlatVectorsWriter { private final IndexOutput meta, vectorData; private final ScalarEncoding encoding; private final FlatVectorsWriter rawVectorDelegate; - private final Lucene104ScalarQuantizedVectorScorer vectorsScorer; + private final FlatVectorsScorer vectorsScorer; private boolean finished; /** @@ -79,7 +80,7 @@ protected Lucene104ScalarQuantizedVectorsWriter( SegmentWriteState state, ScalarEncoding encoding, FlatVectorsWriter rawVectorDelegate, - Lucene104ScalarQuantizedVectorScorer vectorsScorer) + FlatVectorsScorer vectorsScorer) throws IOException { super(vectorsScorer); this.encoding = encoding; diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java index 21977fa3dc77..5ebadda480fa 100644 --- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java @@ -19,6 +19,7 @@ import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer; import org.apache.lucene.store.IndexInput; @@ -46,6 +47,11 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { return new Lucene99ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); } + @Override + public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() { + return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + } + @Override public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) { return new PostingDecodingUtil(input); diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java index cf9c56c59774..44109191250b 100644 --- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java @@ -112,6 +112,9 @@ public static VectorizationProvider getInstance() { /** Returns a FlatVectorsScorer that supports the Lucene99 format. */ public abstract FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer(); + /** Returns a FlatVectorsScorer that supports the Lucene104 quantized format. */ + public abstract FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer(); + /** Create a new {@link PostingDecodingUtil} for the given {@link IndexInput}. */ public abstract PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException; diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index cf3ab94f417c..79a16dd0bc1f 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -21,7 +21,9 @@ import java.util.Locale; import java.util.logging.Logger; import jdk.incubator.vector.FloatVector; +import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.Constants; @@ -83,6 +85,12 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { return Lucene99MemorySegmentScalarQuantizedVectorScorer.INSTANCE; } + @Override + public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() { + // XXX DO NOT MERGE + return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + } + @Override public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException { if (input instanceof MemorySegmentAccessInput msai) { From 475416601e5696c185c688858e1a6542482d8461 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 26 Sep 2025 14:44:06 -0700 Subject: [PATCH 02/21] factor out that parts of score computation that are not amenable to vectorization --- .../Lucene104ScalarQuantizedVectorScorer.java | 102 ++++++-------- ...Lucene104ScalarQuantizedVectorsFormat.java | 3 +- ...Lucene104ScalarQuantizedVectorsReader.java | 4 +- ...imizedScalarQuantizedVectorSimilarity.java | 129 ++++++++++++++++++ 4 files changed, 170 insertions(+), 68 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java index a2072a297887..add7355de621 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java @@ -17,8 +17,6 @@ package org.apache.lucene.codecs.lucene104; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; -import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; -import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; import java.io.IOException; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; @@ -29,6 +27,7 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; /** Vector scorer over OptimizedScalarQuantized vectors */ @@ -78,10 +77,19 @@ public RandomVectorScorer getRandomVectorScorer( quantizer.scalarQuantize( target, targetQuantized, qv.getScalarEncoding().getBits(), qv.getCentroid()); return new RandomVectorScorer.AbstractRandomVectorScorer(qv) { + private final OptimizedScalarQuantizedVectorSimilarity similarity = + new OptimizedScalarQuantizedVectorSimilarity( + similarityFunction, + qv.dimension(), + qv.getCentroidDP(), + qv.getScalarEncoding().getBits()); + @Override public float score(int node) throws IOException { - return quantizedScore( - targetQuantized, targetCorrectiveTerms, qv, node, similarityFunction); + return similarity.score( + dotProduct(targetQuantized, qv, node), + targetCorrectiveTerms, + qv.getCorrectiveTerms(node)); } }; } @@ -107,12 +115,25 @@ private static final class ScalarQuantizedVectorScorerSupplier implements RandomVectorScorerSupplier { private final QuantizedByteVectorValues targetValues; private final QuantizedByteVectorValues values; - private final VectorSimilarityFunction similarity; + private final OptimizedScalarQuantizedVectorSimilarity similarity; public ScalarQuantizedVectorScorerSupplier( QuantizedByteVectorValues values, VectorSimilarityFunction similarity) throws IOException { this.targetValues = values.copy(); this.values = values; + this.similarity = + new OptimizedScalarQuantizedVectorSimilarity( + similarity, + values.dimension(), + values.getCentroidDP(), + values.getScalarEncoding().getBits()); + } + + private ScalarQuantizedVectorScorerSupplier( + QuantizedByteVectorValues values, OptimizedScalarQuantizedVectorSimilarity similarity) + throws IOException { + this.targetValues = values.copy(); + this.values = values; this.similarity = similarity; } @@ -124,7 +145,10 @@ public UpdateableRandomVectorScorer scorer() throws IOException { @Override public float score(int node) throws IOException { - return quantizedScore(targetVector, targetCorrectiveTerms, values, node, similarity); + return similarity.score( + dotProduct(targetVector, values, node), + targetCorrectiveTerms, + values.getCorrectiveTerms(node)); } @Override @@ -151,64 +175,14 @@ public RandomVectorScorerSupplier copy() throws IOException { } } - private static final float[] SCALE_LUT = - new float[] { - 1f, - 1f / ((1 << 2) - 1), - 1f / ((1 << 3) - 1), - 1f / ((1 << 4) - 1), - 1f / ((1 << 5) - 1), - 1f / ((1 << 6) - 1), - 1f / ((1 << 7) - 1), - 1f / ((1 << 8) - 1), - }; - - private static float quantizedScore( - byte[] quantizedQuery, - OptimizedScalarQuantizer.QuantizationResult queryCorrections, - QuantizedByteVectorValues targetVectors, - int targetOrd, - VectorSimilarityFunction similarityFunction) - throws IOException { + private static float dotProduct( + byte[] query, QuantizedByteVectorValues targetVectors, int targetOrd) throws IOException { var scalarEncoding = targetVectors.getScalarEncoding(); - byte[] quantizedDoc = targetVectors.vectorValue(targetOrd); - float qcDist = - switch (scalarEncoding) { - case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(quantizedQuery, quantizedDoc); - case SEVEN_BIT -> VectorUtil.dotProduct(quantizedQuery, quantizedDoc); - case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(quantizedQuery, quantizedDoc); - }; - OptimizedScalarQuantizer.QuantizationResult indexCorrections = - targetVectors.getCorrectiveTerms(targetOrd); - float scale = SCALE_LUT[scalarEncoding.getBits() - 1]; - float x1 = indexCorrections.quantizedComponentSum(); - float ax = indexCorrections.lowerInterval(); - // Here we must scale according to the bits - float lx = (indexCorrections.upperInterval() - ax) * scale; - float ay = queryCorrections.lowerInterval(); - float ly = (queryCorrections.upperInterval() - ay) * scale; - float y1 = queryCorrections.quantizedComponentSum(); - float score = - ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist; - // For euclidean, we need to invert the score and apply the additional correction, which is - // assumed to be the squared l2norm of the centroid centered vectors. - if (similarityFunction == EUCLIDEAN) { - score = - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - 2 * score; - return Math.max(1 / (1f + score), 0); - } else { - // For cosine and max inner product, we need to apply the additional correction, which is - // assumed to be the non-centered dot-product between the vector and the centroid - score += - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - targetVectors.getCentroidDP(); - if (similarityFunction == MAXIMUM_INNER_PRODUCT) { - return VectorUtil.scaleMaxInnerProductScore(score); - } - return Math.max((1f + score) / 2f, 0); - } + byte[] doc = targetVectors.vectorValue(targetOrd); + return switch (scalarEncoding) { + case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(query, doc); + case SEVEN_BIT -> VectorUtil.dotProduct(query, doc); + case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(query, doc); + }; } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java index 8198f4ef24f3..aa076a184a3c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java @@ -106,7 +106,8 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat { private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); - private static final FlatVectorsScorer scorer = FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer(); + private static final FlatVectorsScorer scorer = + FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer(); private final ScalarEncoding encoding; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java index 5d3a4b22ace9..6e054206c481 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java @@ -70,9 +70,7 @@ class Lucene104ScalarQuantizedVectorsReader extends FlatVectorsReader private final FlatVectorsScorer vectorScorer; Lucene104ScalarQuantizedVectorsReader( - SegmentReadState state, - FlatVectorsReader rawVectorsReader, - FlatVectorsScorer vectorsScorer) + SegmentReadState state, FlatVectorsReader rawVectorsReader, FlatVectorsScorer vectorsScorer) throws IOException { super(vectorsScorer); this.vectorScorer = vectorsScorer; diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java new file mode 100644 index 000000000000..8b6795bd11bf --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.quantization; + +import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; +import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; + +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.util.VectorUtil; + +/** + * Common utility functions for computing similarity scores between two optimized scalar quantized + * vectors. + */ +public class OptimizedScalarQuantizedVectorSimilarity { + // Precomputed scale factors for each quantization bit count (1 to 8 bits). + private static final float[] SCALE_LUT = + new float[] { + 1f, + 1f / ((1 << 2) - 1), + 1f / ((1 << 3) - 1), + 1f / ((1 << 4) - 1), + 1f / ((1 << 5) - 1), + 1f / ((1 << 6) - 1), + 1f / ((1 << 7) - 1), + 1f / ((1 << 8) - 1), + }; + + private final VectorSimilarityFunction similarityFunction; + private final int dimensions; + private final float centroidDotProduct; + private final float queryScale; + private final float indexScale; + + /** + * Create a new vector similarity computer for optimized scalar quantized vectors. + * + * @param similarityFunction - the similarity function to use. + * @param dimensions - the number of dimensions in each vector. + * @param centroidDotProduct - the dot product of the segment centroid with itself. + * @param bits - the number of bits used for each dimension in [1,8]. + */ + public OptimizedScalarQuantizedVectorSimilarity( + VectorSimilarityFunction similarityFunction, + int dimensions, + float centroidDotProduct, + int bits) { + this(similarityFunction, dimensions, centroidDotProduct, bits, bits); + } + + /** + * Create a new vector similarity computer for optimized scalar quantized vectors. + * + * @param similarityFunction - the similarity function to use. + * @param dimensions - the number of dimensions in each vector. + * @param centroidDotProduct - the dot product of the segment centroid with itself. + * @param queryBits - the number of bits used in the query vector for each dimension in [1,8]. + * @param indexBits - the number of bits used in the query vector for each dimension in [1,8]. + */ + public OptimizedScalarQuantizedVectorSimilarity( + VectorSimilarityFunction similarityFunction, + int dimensions, + float centroidDotProduct, + int queryBits, + int indexBits) { + this.similarityFunction = similarityFunction; + this.dimensions = dimensions; + this.centroidDotProduct = centroidDotProduct; + this.queryScale = SCALE_LUT[queryBits - 1]; + this.indexScale = SCALE_LUT[indexBits - 1]; + } + + /** + * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot + * product of the two vectors and their corrective factors. + * + * @param dotProduct - dot product of the two quantized vectors. + * @param queryCorrections - corrective factors for vector 'y'. + * @param indexCorrections - corrective factors for vector 'x'. + * @return - a similarity score value between 0 and 1; higher values are better. + */ + public float score( + float dotProduct, + OptimizedScalarQuantizer.QuantizationResult queryCorrections, + OptimizedScalarQuantizer.QuantizationResult indexCorrections) { + float x1 = indexCorrections.quantizedComponentSum(); + float ax = indexCorrections.lowerInterval(); + // Here we must scale according to the bits + float lx = (indexCorrections.upperInterval() - ax) * indexScale; + float ay = queryCorrections.lowerInterval(); + float ly = (queryCorrections.upperInterval() - ay) * queryScale; + float y1 = queryCorrections.quantizedComponentSum(); + float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct; + // For euclidean, we need to invert the score and apply the additional correction, which is + // assumed to be the squared l2norm of the centroid centered vectors. + if (similarityFunction == EUCLIDEAN) { + score = + queryCorrections.additionalCorrection() + + indexCorrections.additionalCorrection() + - 2 * score; + return Math.max(1 / (1f + score), 0); + } else { + // For cosine and max inner product, we need to apply the additional correction, which is + // assumed to be the non-centered dot-product between the vector and the centroid + score += + queryCorrections.additionalCorrection() + + indexCorrections.additionalCorrection() + - centroidDotProduct; + if (similarityFunction == MAXIMUM_INNER_PRODUCT) { + return VectorUtil.scaleMaxInnerProductScore(score); + } + return Math.max((1f + score) / 2f, 0); + } + } +} From 767a52e38371d6c4ebf50b65f0efc70cfdbf9ca5 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 26 Sep 2025 15:00:31 -0700 Subject: [PATCH 03/21] share with 102 binarized vectors --- .../Lucene102BinaryFlatVectorsScorer.java | 95 ++++++++----------- 1 file changed, 42 insertions(+), 53 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java index 02e37a9e89d8..65d3c768386f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java @@ -16,10 +16,9 @@ */ package org.apache.lucene.codecs.lucene102; +import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS; import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; -import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; -import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.transposeHalfByte; import java.io.IOException; @@ -31,13 +30,13 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer.QuantizationResult; /** Vector scorer over binarized vector values */ public class Lucene102BinaryFlatVectorsScorer implements FlatVectorsScorer { private final FlatVectorsScorer nonQuantizedDelegate; - private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1); public Lucene102BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { this.nonQuantizedDelegate = nonQuantizedDelegate; @@ -73,10 +72,20 @@ public RandomVectorScorer getRandomVectorScorer( quantizer.scalarQuantize(target, initial, (byte) 4, centroid); transposeHalfByte(initial, quantized); return new RandomVectorScorer.AbstractRandomVectorScorer(binarizedVectors) { + private final OptimizedScalarQuantizedVectorSimilarity similarity = + new OptimizedScalarQuantizedVectorSimilarity( + similarityFunction, + binarizedVectors.dimension(), + binarizedVectors.getCentroidDP(), + QUERY_BITS, + INDEX_BITS); + @Override public float score(int node) throws IOException { - return quantizedScore( - quantized, queryCorrections, binarizedVectors, node, similarityFunction); + var indexVector = binarizedVectors.vectorValue(node); + var indexCorrections = binarizedVectors.getCorrectiveTerms(node); + float dotProduct = VectorUtil.int4BitDotProduct(quantized, indexVector); + return similarity.score(dotProduct, queryCorrections, indexCorrections); } }; } @@ -93,7 +102,8 @@ public RandomVectorScorer getRandomVectorScorer( RandomVectorScorerSupplier getRandomVectorScorerSupplier( VectorSimilarityFunction similarityFunction, Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues scoringVectors, - BinarizedByteVectorValues targetVectors) { + BinarizedByteVectorValues targetVectors) + throws IOException { return new BinarizedRandomVectorScorerSupplier( scoringVectors, targetVectors, similarityFunction); } @@ -108,15 +118,31 @@ static class BinarizedRandomVectorScorerSupplier implements RandomVectorScorerSu private final Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors; private final BinarizedByteVectorValues targetVectors; - private final VectorSimilarityFunction similarityFunction; + private final OptimizedScalarQuantizedVectorSimilarity similarity; + + BinarizedRandomVectorScorerSupplier( + Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors, + BinarizedByteVectorValues targetVectors, + VectorSimilarityFunction similarityFunction) + throws IOException { + this.queryVectors = queryVectors; + this.targetVectors = targetVectors; + this.similarity = + new OptimizedScalarQuantizedVectorSimilarity( + similarityFunction, + targetVectors.dimension(), + targetVectors.getCentroidDP(), + QUERY_BITS, + INDEX_BITS); + } BinarizedRandomVectorScorerSupplier( Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors, BinarizedByteVectorValues targetVectors, - VectorSimilarityFunction similarityFunction) { + OptimizedScalarQuantizedVectorSimilarity similarity) { this.queryVectors = queryVectors; this.targetVectors = targetVectors; - this.similarityFunction = similarityFunction; + this.similarity = similarity; } @Override @@ -139,7 +165,12 @@ public float score(int node) throws IOException { if (vector == null || queryCorrections == null) { throw new IllegalStateException("setScoringOrdinal was not called"); } - return quantizedScore(vector, queryCorrections, targetVectors, node, similarityFunction); + var indexVector = targetVectors.vectorValue(node); + var indexCorrections = targetVectors.getCorrectiveTerms(node); + return similarity.score( + VectorUtil.int4BitDotProduct(vector, indexVector), + queryCorrections, + indexCorrections); } }; } @@ -147,49 +178,7 @@ public float score(int node) throws IOException { @Override public RandomVectorScorerSupplier copy() throws IOException { return new BinarizedRandomVectorScorerSupplier( - queryVectors.copy(), targetVectors.copy(), similarityFunction); - } - } - - static float quantizedScore( - byte[] quantizedQuery, - OptimizedScalarQuantizer.QuantizationResult queryCorrections, - BinarizedByteVectorValues targetVectors, - int targetOrd, - VectorSimilarityFunction similarityFunction) - throws IOException { - byte[] binaryCode = targetVectors.vectorValue(targetOrd); - float qcDist = VectorUtil.int4BitDotProduct(quantizedQuery, binaryCode); - OptimizedScalarQuantizer.QuantizationResult indexCorrections = - targetVectors.getCorrectiveTerms(targetOrd); - float x1 = indexCorrections.quantizedComponentSum(); - float ax = indexCorrections.lowerInterval(); - // Here we assume `lx` is simply bit vectors, so the scaling isn't necessary - float lx = indexCorrections.upperInterval() - ax; - float ay = queryCorrections.lowerInterval(); - float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE; - float y1 = queryCorrections.quantizedComponentSum(); - float score = - ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist; - // For euclidean, we need to invert the score and apply the additional correction, which is - // assumed to be the squared l2norm of the centroid centered vectors. - if (similarityFunction == EUCLIDEAN) { - score = - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - 2 * score; - return Math.max(1 / (1f + score), 0); - } else { - // For cosine and max inner product, we need to apply the additional correction, which is - // assumed to be the non-centered dot-product between the vector and the centroid - score += - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - targetVectors.getCentroidDP(); - if (similarityFunction == MAXIMUM_INNER_PRODUCT) { - return VectorUtil.scaleMaxInnerProductScore(score); - } - return Math.max((1f + score) / 2f, 0); + queryVectors.copy(), targetVectors.copy(), similarity); } } } From 7511cab23423c6d6f14c220b4ed1af2a45ba4e74 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 08:59:28 -0700 Subject: [PATCH 04/21] make qbvv public so I can use it in the accelerated code path --- .../lucene/codecs/lucene104/QuantizedByteVectorValues.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java index 48d0c4e665f1..a49ad5068373 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java @@ -23,7 +23,7 @@ import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; /** Scalar quantized byte vector values */ -abstract class QuantizedByteVectorValues extends ByteVectorValues { +public abstract class QuantizedByteVectorValues extends ByteVectorValues { /** * Retrieve the corrective terms for the given vector ordinal. For the dot-product family of From 4ca050a71891cfa25ae064dbeb5d7a4e666686e4 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 11:33:31 -0700 Subject: [PATCH 05/21] feature complete --- ...Lucene104ScalarQuantizedVectorsWriter.java | 8 +- .../OffHeapScalarQuantizedVectorValues.java | 5 + .../lucene104/QuantizedByteVectorValues.java | 3 +- ...orySegmentScalarQuantizedVectorScorer.java | 271 ++++++++++++++++++ .../PanamaVectorizationProvider.java | 9 +- 5 files changed, 290 insertions(+), 6 deletions(-) create mode 100644 lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java index 84481c0a8e9a..8edb412e44e9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java @@ -137,7 +137,8 @@ public FlatFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOExceptio public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { rawVectorDelegate.flush(maxDoc, sortMap); for (FieldWriter field : fields) { - // after raw vectors are written, normalize vectors for clustering and quantization + // after raw vectors are written, normalize vectors for clustering and + // quantization if (VectorSimilarityFunction.COSINE == field.fieldInfo.getVectorSimilarityFunction()) { field.normalizeVectors(); } @@ -728,6 +729,11 @@ public int dimension() { return values.dimension(); } + @Override + public IndexInput getSlice() { + return null; + } + @Override public OptimizedScalarQuantizer getQuantizer() { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java index d2c678d8f8ba..8bd050827d54 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java @@ -141,6 +141,11 @@ public int getVectorByteLength() { return dimension; } + @Override + public IndexInput getSlice() { + return slice; + } + static void packNibbles(byte[] unpacked, byte[] packed) { assert unpacked.length == packed.length * 2; for (int i = 0; i < packed.length; i++) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java index a49ad5068373..91637b15fe34 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java @@ -18,12 +18,13 @@ import java.io.IOException; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding; +import org.apache.lucene.codecs.lucene95.HasIndexSlice; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; /** Scalar quantized byte vector values */ -public abstract class QuantizedByteVectorValues extends ByteVectorValues { +public abstract class QuantizedByteVectorValues extends ByteVectorValues implements HasIndexSlice { /** * Retrieve the corrective terms for the given vector ordinal. For the dot-product family of diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java new file mode 100644 index 000000000000..66f7ed71b06a --- /dev/null +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.internal.vectorization; + +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; + +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues; +import org.apache.lucene.index.KnnVectorValues; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.MemorySegmentAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.VectorUtil; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; + +class Lucene104MemorySegmentScalarQuantizedVectorScorer implements FlatVectorsScorer { + static final Lucene104MemorySegmentScalarQuantizedVectorScorer INSTANCE = + new Lucene104MemorySegmentScalarQuantizedVectorScorer(); + + private static final FlatVectorsScorer DELEGATE = + new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + + private static final int CORRECTIVE_TERMS_SIZE = Float.BYTES * 3 + Integer.BYTES; + + private Lucene104MemorySegmentScalarQuantizedVectorScorer() {} + + @Override + public RandomVectorScorerSupplier getRandomVectorScorerSupplier( + VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues) + throws IOException { + if (vectorValues instanceof QuantizedByteVectorValues quantized + && quantized.getSlice() instanceof MemorySegmentAccessInput input) { + return new RandomVectorScorerSupplierImpl(similarityFunction, quantized, input); + } + return DELEGATE.getRandomVectorScorerSupplier(similarityFunction, vectorValues); + } + + @Override + public RandomVectorScorer getRandomVectorScorer( + VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target) + throws IOException { + if (vectorValues instanceof QuantizedByteVectorValues quantized + && quantized.getSlice() instanceof MemorySegmentAccessInput input) { + return new RandomVectorScorerImpl(similarityFunction, quantized, input, target); + } + return DELEGATE.getRandomVectorScorer(similarityFunction, vectorValues, target); + } + + @Override + public RandomVectorScorer getRandomVectorScorer( + VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target) + throws IOException { + return DELEGATE.getRandomVectorScorer(similarityFunction, vectorValues, target); + } + + @Override + public String toString() { + return "Lucene104MemorySegmentScalarQuantizedVectorScorer()"; + } + + private abstract static class RandomVectorScorerBase + extends RandomVectorScorer.AbstractRandomVectorScorer { + + private final QuantizedByteVectorValues values; + private final MemorySegmentAccessInput input; + private final int vectorByteSize; + private final int nodeSize; + private final OptimizedScalarQuantizedVectorSimilarity similarity; + private byte[] scratch = null; + + RandomVectorScorerBase( + VectorSimilarityFunction similarityFunction, + QuantizedByteVectorValues values, + MemorySegmentAccessInput input) + throws IOException { + super(values); + + this.values = values; + this.input = input; + this.vectorByteSize = values.getVectorByteLength(); + this.nodeSize = this.vectorByteSize + CORRECTIVE_TERMS_SIZE; + this.similarity = + new OptimizedScalarQuantizedVectorSimilarity( + similarityFunction, + values.dimension(), + values.getCentroidDP(), + values.getScalarEncoding().getBits()); + checkInvariants(); + } + + final void checkInvariants() { + if (input.length() < (long) nodeSize * maxOrd()) { + throw new IllegalArgumentException("input length is less than expected vector data"); + } + } + + final void checkOrdinal(int ord) { + if (ord < 0 || ord >= maxOrd()) { + throw new IllegalArgumentException("illegal ordinal: " + ord); + } + } + + private static final ValueLayout.OfInt INT_UNALIGNED_LE = + JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + @SuppressWarnings("restricted") + MemorySegment getVector(int ord) throws IOException { + checkOrdinal(ord); + long byteOffset = (long) ord * nodeSize; + MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize); + if (vector == null) { + if (scratch == null) { + scratch = new byte[nodeSize]; + } + input.readBytes(byteOffset, scratch, 0, nodeSize); + vector = MemorySegment.ofArray(scratch).reinterpret(vectorByteSize); + } + return vector; + } + + @SuppressWarnings("restricted") + OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException { + checkOrdinal(ord); + long byteOffset = (long) ord * nodeSize + vectorByteSize; + MemorySegment node = input.segmentSliceOrNull(byteOffset, CORRECTIVE_TERMS_SIZE); + if (node == null) { + if (scratch == null) { + scratch = new byte[nodeSize]; + } + input.readBytes(byteOffset, scratch, 0, CORRECTIVE_TERMS_SIZE); + node = MemorySegment.ofArray(scratch).reinterpret(CORRECTIVE_TERMS_SIZE); + } + return new OptimizedScalarQuantizer.QuantizationResult( + Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, 0)), + Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES)), + Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES * 2)), + node.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); + } + + OptimizedScalarQuantizedVectorSimilarity getSimilarity() { + return similarity; + } + + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() { + return values.getScalarEncoding(); + } + } + + private static class RandomVectorScorerImpl extends RandomVectorScorerBase { + private final byte[] query; + private final OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms; + + RandomVectorScorerImpl( + VectorSimilarityFunction similarityFunction, + QuantizedByteVectorValues values, + MemorySegmentAccessInput input, + float[] target) + throws IOException { + super(similarityFunction, values, input); + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding = + values.getScalarEncoding(); + OptimizedScalarQuantizer quantizer = values.getQuantizer(); + query = + new byte + [OptimizedScalarQuantizer.discretize( + target.length, scalarEncoding.getDimensionsPerByte())]; + // We make a copy as the quantization process mutates the input + float[] copy = ArrayUtil.copyOfSubArray(target, 0, target.length); + if (similarityFunction == COSINE) { + VectorUtil.l2normalize(copy); + } + target = copy; + queryCorrectiveTerms = + quantizer.scalarQuantize(target, query, scalarEncoding.getBits(), values.getCentroid()); + } + + @Override + public float score(int node) throws IOException { + MemorySegment doc = getVector(node); + float dotProduct = + switch (getScalarEncoding()) { + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc); + }; + // Call getCorrectiveTerms() after computing dot product since corrective terms + // bytes appear + // after the vector bytes, so this sequence of calls is more cache friendly. + return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); + } + } + + private record RandomVectorScorerSupplierImpl( + VectorSimilarityFunction similarityFunction, + QuantizedByteVectorValues values, + MemorySegmentAccessInput input) + implements RandomVectorScorerSupplier { + + @Override + public UpdateableRandomVectorScorer scorer() throws IOException { + return new UpdateableRandomVectorScorerImpl(similarityFunction, values, input); + } + + @Override + public RandomVectorScorerSupplier copy() { + return new RandomVectorScorerSupplierImpl(similarityFunction, values, input); + } + } + + private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorerBase + implements UpdateableRandomVectorScorer { + private MemorySegment query; + private OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms; + + UpdateableRandomVectorScorerImpl( + VectorSimilarityFunction similarityFunction, + QuantizedByteVectorValues values, + MemorySegmentAccessInput input) + throws IOException { + super(similarityFunction, values, input); + } + + @Override + public void setScoringOrdinal(int ord) throws IOException { + checkOrdinal(ord); + query = getVector(ord); + queryCorrectiveTerms = getCorrectiveTerms(ord); + } + + @Override + public float score(int node) throws IOException { + MemorySegment doc = getVector(node); + float dotProduct = + switch (getScalarEncoding()) { + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc); + }; + // Call getCorrectiveTerms() after computing dot product since corrective terms + // bytes appear + // after the vector bytes, so this sequence of calls is more cache friendly. + return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); + } + } +} diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index 79a16dd0bc1f..cb177dd93e6a 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -32,8 +32,10 @@ /** A vectorization provider that leverages the Panama Vector API. */ final class PanamaVectorizationProvider extends VectorizationProvider { - // NOTE: Avoid static fields or initializers which rely on the vector API, as these initializers - // would get called before we have a chance to perform sanity checks around the vector API in the + // NOTE: Avoid static fields or initializers which rely on the vector API, as + // these initializers + // would get called before we have a chance to perform sanity checks around the + // vector API in the // constructor of this class. Put them in PanamaVectorConstants instead. private final VectorUtilSupport vectorUtilSupport; @@ -87,8 +89,7 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { @Override public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() { - // XXX DO NOT MERGE - return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE; } @Override From 99918a0b64f70cca4178b29365b811245f08574a Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 11:42:25 -0700 Subject: [PATCH 06/21] fix tidy --- .../internal/vectorization/PanamaVectorizationProvider.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index cb177dd93e6a..9eca005367ca 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -21,9 +21,7 @@ import java.util.Locale; import java.util.logging.Logger; import jdk.incubator.vector.FloatVector; -import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.Constants; From 2894e4415f5976db935fe01eec753951456dc01f Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 11:48:01 -0700 Subject: [PATCH 07/21] explicitly cast long -> float --- .../codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java index 65d3c768386f..6ffef975c9ce 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java @@ -168,7 +168,7 @@ public float score(int node) throws IOException { var indexVector = targetVectors.vectorValue(node); var indexCorrections = targetVectors.getCorrectiveTerms(node); return similarity.score( - VectorUtil.int4BitDotProduct(vector, indexVector), + (float) VectorUtil.int4BitDotProduct(vector, indexVector), queryCorrections, indexCorrections); } From fd0d85e4a2c9d8cda3b353ed3b1045c8af89464d Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 12:54:31 -0700 Subject: [PATCH 08/21] fix toString tests --- ...ucene104HnswScalarQuantizedVectorsFormat.java | 16 +++++++++++----- ...estLucene104ScalarQuantizedVectorsFormat.java | 10 +++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java index 6164b0062f02..91687e200182 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java @@ -70,16 +70,20 @@ public KnnVectorsFormat knnVectorsFormat() { "Lucene104HnswScalarQuantizedVectorsFormat(name=Lucene104HnswScalarQuantizedVectorsFormat, maxConn=10, beamWidth=20," + " flatVectorFormat=Lucene104ScalarQuantizedVectorsFormat(name=Lucene104ScalarQuantizedVectorsFormat," + " encoding=UNSIGNED_BYTE," - + " flatVectorScorer=Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=%s())," + + " flatVectorScorer=%s," + " rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s())))"; var defaultScorer = - format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer", "DefaultFlatVectorScorer"); + format( + Locale.ROOT, + expectedPattern, + "Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())", + "DefaultFlatVectorScorer"); var memSegScorer = format( Locale.ROOT, expectedPattern, - "Lucene99MemorySegmentFlatVectorsScorer", + "Lucene104MemorySegmentScalarQuantizedVectorScorer()", "Lucene99MemorySegmentFlatVectorsScorer"); assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer))); } @@ -112,7 +116,8 @@ public void testSingleVectorCase() throws Exception { Integer.MAX_VALUE); assertEquals(1, td.totalHits.value()); assertTrue(td.scoreDocs[0].score >= 0); - // When it's the only vector in a segment, the score should be very close to the true + // When it's the only vector in a segment, the score should be very close to the + // true // score assertEquals(trueScore, td.scoreDocs[0].score, 0.01f); } @@ -144,7 +149,8 @@ public void testLimits() { ScalarEncoding.UNSIGNED_BYTE, 20, 100, 1, new SameThreadExecutorService())); } - // Ensures that all expected vector similarity functions are translatable in the format. + // Ensures that all expected vector similarity functions are translatable in the + // format. public void testVectorSimilarityFuncs() { // This does not necessarily have to be all similarity functions, but // differences should be considered carefully. diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java index 29041b5b07f0..925995e7327c 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java @@ -110,15 +110,19 @@ public KnnVectorsFormat knnVectorsFormat() { "Lucene104ScalarQuantizedVectorsFormat(" + "name=Lucene104ScalarQuantizedVectorsFormat, " + "encoding=UNSIGNED_BYTE, " - + "flatVectorScorer=Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=%s()), " + + "flatVectorScorer=%s, " + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s()))"; var defaultScorer = - format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer", "DefaultFlatVectorScorer"); + format( + Locale.ROOT, + expectedPattern, + "Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())", + "DefaultFlatVectorScorer"); var memSegScorer = format( Locale.ROOT, expectedPattern, - "Lucene99MemorySegmentFlatVectorsScorer", + "Lucene104MemorySegmentScalarQuantizedVectorScorer()", "Lucene99MemorySegmentFlatVectorsScorer"); assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer))); } From cc328d59b527e87eb4f30e8debae0f71734985d4 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 14:13:21 -0700 Subject: [PATCH 09/21] fix bug in off heap sqvv that affects invariant checks in memory segment codec --- .../codecs/lucene104/OffHeapScalarQuantizedVectorValues.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java index 8bd050827d54..71b6a2fbe8f9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java @@ -138,7 +138,7 @@ public float[] getCentroid() { @Override public int getVectorByteLength() { - return dimension; + return this.encoding.getPackedLength(dimension); } @Override From 9261f41790b6a29c52d9d547a7307c6ac11cc06c Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 16:03:22 -0700 Subject: [PATCH 10/21] try to create fewer memory segments --- ...orySegmentScalarQuantizedVectorScorer.java | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index 66f7ed71b06a..a29fdafe8b50 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -129,6 +129,10 @@ final void checkOrdinal(int ord) { private static final ValueLayout.OfInt INT_UNALIGNED_LE = JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + // XXX I need to return something wraps the MemorySegment and can produce the + // corrective terms + // on demand. rep is probably (MemorySegment, MemorySegment) with a slice for + // the corrective terms. @SuppressWarnings("restricted") MemorySegment getVector(int ord) throws IOException { checkOrdinal(ord); @@ -163,6 +167,32 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I node.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); } + record Node(MemorySegment vector, MemorySegment correctiveTerms) { + OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() { + return new OptimizedScalarQuantizer.QuantizationResult( + Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, 0)), + Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES)), + Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 2)), + correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); + } + } + + @SuppressWarnings("restricted") + Node getNode(int ord) throws IOException { + checkOrdinal(ord); + long byteOffset = (long) ord * nodeSize; + MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize); + if (vector == null) { + if (scratch == null) { + scratch = new byte[nodeSize]; + } + input.readBytes(byteOffset, scratch, 0, nodeSize); + vector = MemorySegment.ofArray(scratch); + } + MemorySegment correctiveTerms = vector.asSlice(vectorByteSize, CORRECTIVE_TERMS_SIZE); + return new Node(vector.reinterpret(vectorByteSize), correctiveTerms); + } + OptimizedScalarQuantizedVectorSimilarity getSimilarity() { return similarity; } @@ -202,17 +232,18 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase { @Override public float score(int node) throws IOException { - MemorySegment doc = getVector(node); + Node doc = getNode(node); float dotProduct = switch (getScalarEncoding()) { - case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); - case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); - case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc); + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector); + case PACKED_NIBBLE -> + PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc.vector); }; // Call getCorrectiveTerms() after computing dot product since corrective terms - // bytes appear - // after the vector bytes, so this sequence of calls is more cache friendly. - return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); + // bytes appear after the vector bytes, so this sequence of calls is more cache + // friendly. + return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.getQuantizationResult()); } } From d012ebf6fd642b00368beee6dd9a7062f7f4817f Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Mon, 29 Sep 2025 16:05:09 -0700 Subject: [PATCH 11/21] nodeSize --- .../Lucene104MemorySegmentScalarQuantizedVectorScorer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index a29fdafe8b50..da79e85bcc76 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -181,7 +181,7 @@ OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() { Node getNode(int ord) throws IOException { checkOrdinal(ord); long byteOffset = (long) ord * nodeSize; - MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize); + MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize); if (vector == null) { if (scratch == null) { scratch = new byte[nodeSize]; From 78388d39713d2fcc4d21afedb68791773563e84d Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Tue, 30 Sep 2025 09:05:26 -0700 Subject: [PATCH 12/21] try to avoid allocating multiple memory segments --- ...orySegmentScalarQuantizedVectorScorer.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index da79e85bcc76..b429c9be3997 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -167,15 +167,8 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I node.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); } - record Node(MemorySegment vector, MemorySegment correctiveTerms) { - OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() { - return new OptimizedScalarQuantizer.QuantizationResult( - Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, 0)), - Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES)), - Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 2)), - correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); - } - } + record Node( + MemorySegment vector, OptimizedScalarQuantizer.QuantizationResult correctiveTerms) {} @SuppressWarnings("restricted") Node getNode(int ord) throws IOException { @@ -189,7 +182,13 @@ Node getNode(int ord) throws IOException { input.readBytes(byteOffset, scratch, 0, nodeSize); vector = MemorySegment.ofArray(scratch); } - MemorySegment correctiveTerms = vector.asSlice(vectorByteSize, CORRECTIVE_TERMS_SIZE); + var correctiveTerms = + new OptimizedScalarQuantizer.QuantizationResult( + Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)), + Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)), + Float.intBitsToFloat( + vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)), + vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3)); return new Node(vector.reinterpret(vectorByteSize), correctiveTerms); } @@ -243,7 +242,7 @@ public float score(int node) throws IOException { // Call getCorrectiveTerms() after computing dot product since corrective terms // bytes appear after the vector bytes, so this sequence of calls is more cache // friendly. - return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.getQuantizationResult()); + return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.correctiveTerms); } } From 5e18d3a6c758825e87d292d03c25e1e2460dab0b Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Tue, 30 Sep 2025 11:12:12 -0700 Subject: [PATCH 13/21] try flattening the corrective terms into the node --- ...imizedScalarQuantizedVectorSimilarity.java | 57 ++++++++++++++++++- ...orySegmentScalarQuantizedVectorScorer.java | 34 +++++++---- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java index 8b6795bd11bf..c0e391851f14 100644 --- a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java @@ -105,7 +105,8 @@ public float score( float ly = (queryCorrections.upperInterval() - ay) * queryScale; float y1 = queryCorrections.quantizedComponentSum(); float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct; - // For euclidean, we need to invert the score and apply the additional correction, which is + // For euclidean, we need to invert the score and apply the additional + // correction, which is // assumed to be the squared l2norm of the centroid centered vectors. if (similarityFunction == EUCLIDEAN) { score = @@ -114,8 +115,10 @@ public float score( - 2 * score; return Math.max(1 / (1f + score), 0); } else { - // For cosine and max inner product, we need to apply the additional correction, which is - // assumed to be the non-centered dot-product between the vector and the centroid + // For cosine and max inner product, we need to apply the additional correction, + // which is + // assumed to be the non-centered dot-product between the vector and the + // centroid score += queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() @@ -126,4 +129,52 @@ public float score( return Math.max((1f + score) / 2f, 0); } } + + // XXX DO NOT MERGE duplication with above. + /** + * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot + * product of the two vectors and their corrective factors. + * + * @param dotProduct - dot product of the two quantized vectors. + * @param queryCorrections - corrective factors for vector 'y'. + * @param indexLowerInterval - corrective factors for vector 'x'. + * @param indexUpperInterval - corrective factors for vector 'x'. + * @param indexAdditionalCorrection - corrective factors for vector 'x'. + * @param indexQuantizedComponentSum - corrective factors for vector 'x'. + * @return - a similarity score value between 0 and 1; higher values are better. + */ + public float score( + float dotProduct, + OptimizedScalarQuantizer.QuantizationResult queryCorrections, + float indexLowerInterval, + float indexUpperInterval, + float indexAdditionalCorrection, + int indexQuantizedComponentSum) { + float x1 = indexQuantizedComponentSum; + float ax = indexLowerInterval; + // Here we must scale according to the bits + float lx = (indexUpperInterval - ax) * indexScale; + float ay = queryCorrections.lowerInterval(); + float ly = (queryCorrections.upperInterval() - ay) * queryScale; + float y1 = queryCorrections.quantizedComponentSum(); + float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct; + // For euclidean, we need to invert the score and apply the additional + // correction, which is + // assumed to be the squared l2norm of the centroid centered vectors. + if (similarityFunction == EUCLIDEAN) { + score = queryCorrections.additionalCorrection() + indexAdditionalCorrection - 2 * score; + return Math.max(1 / (1f + score), 0); + } else { + // For cosine and max inner product, we need to apply the additional correction, + // which is + // assumed to be the non-centered dot-product between the vector and the + // centroid + score += + queryCorrections.additionalCorrection() + indexAdditionalCorrection - centroidDotProduct; + if (similarityFunction == MAXIMUM_INNER_PRODUCT) { + return VectorUtil.scaleMaxInnerProductScore(score); + } + return Math.max((1f + score) / 2f, 0); + } + } } diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index b429c9be3997..70015719518e 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -168,7 +168,11 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I } record Node( - MemorySegment vector, OptimizedScalarQuantizer.QuantizationResult correctiveTerms) {} + MemorySegment vector, + float lowerInterval, + float upperInterval, + float additionalCorrection, + int componentSum) {} @SuppressWarnings("restricted") Node getNode(int ord) throws IOException { @@ -182,14 +186,17 @@ Node getNode(int ord) throws IOException { input.readBytes(byteOffset, scratch, 0, nodeSize); vector = MemorySegment.ofArray(scratch); } - var correctiveTerms = - new OptimizedScalarQuantizer.QuantizationResult( - Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)), - Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)), - Float.intBitsToFloat( - vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)), - vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3)); - return new Node(vector.reinterpret(vectorByteSize), correctiveTerms); + // XXX investigate reordering the vector so that corrective terms appear first. + // we're forced to read them immediately to avoid creating a second memory + // segment which is + // not cheap, so they might as well be read first to avoid additional memory + // latency. + return new Node( + vector.reinterpret(vectorByteSize), + Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)), + Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)), + Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)), + vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3)); } OptimizedScalarQuantizedVectorSimilarity getSimilarity() { @@ -242,7 +249,14 @@ public float score(int node) throws IOException { // Call getCorrectiveTerms() after computing dot product since corrective terms // bytes appear after the vector bytes, so this sequence of calls is more cache // friendly. - return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.correctiveTerms); + return getSimilarity() + .score( + dotProduct, + queryCorrectiveTerms, + doc.lowerInterval, + doc.upperInterval, + doc.additionalCorrection, + doc.componentSum); } } From 955d083a724420b17c0167bd8d65384337510b5a Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Tue, 30 Sep 2025 11:32:59 -0700 Subject: [PATCH 14/21] cleanup --- ...imizedScalarQuantizedVectorSimilarity.java | 39 ++-------- ...orySegmentScalarQuantizedVectorScorer.java | 77 +++++++------------ 2 files changed, 33 insertions(+), 83 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java index c0e391851f14..0d9ff2d5432c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java @@ -97,40 +97,15 @@ public float score( float dotProduct, OptimizedScalarQuantizer.QuantizationResult queryCorrections, OptimizedScalarQuantizer.QuantizationResult indexCorrections) { - float x1 = indexCorrections.quantizedComponentSum(); - float ax = indexCorrections.lowerInterval(); - // Here we must scale according to the bits - float lx = (indexCorrections.upperInterval() - ax) * indexScale; - float ay = queryCorrections.lowerInterval(); - float ly = (queryCorrections.upperInterval() - ay) * queryScale; - float y1 = queryCorrections.quantizedComponentSum(); - float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct; - // For euclidean, we need to invert the score and apply the additional - // correction, which is - // assumed to be the squared l2norm of the centroid centered vectors. - if (similarityFunction == EUCLIDEAN) { - score = - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - 2 * score; - return Math.max(1 / (1f + score), 0); - } else { - // For cosine and max inner product, we need to apply the additional correction, - // which is - // assumed to be the non-centered dot-product between the vector and the - // centroid - score += - queryCorrections.additionalCorrection() - + indexCorrections.additionalCorrection() - - centroidDotProduct; - if (similarityFunction == MAXIMUM_INNER_PRODUCT) { - return VectorUtil.scaleMaxInnerProductScore(score); - } - return Math.max((1f + score) / 2f, 0); - } + return score( + dotProduct, + queryCorrections, + indexCorrections.lowerInterval(), + indexCorrections.upperInterval(), + indexCorrections.additionalCorrection(), + indexCorrections.quantizedComponentSum()); } - // XXX DO NOT MERGE duplication with above. /** * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot * product of the two vectors and their corrective factors. diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index 70015719518e..9715313bb738 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -129,44 +129,6 @@ final void checkOrdinal(int ord) { private static final ValueLayout.OfInt INT_UNALIGNED_LE = JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - // XXX I need to return something wraps the MemorySegment and can produce the - // corrective terms - // on demand. rep is probably (MemorySegment, MemorySegment) with a slice for - // the corrective terms. - @SuppressWarnings("restricted") - MemorySegment getVector(int ord) throws IOException { - checkOrdinal(ord); - long byteOffset = (long) ord * nodeSize; - MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize); - if (vector == null) { - if (scratch == null) { - scratch = new byte[nodeSize]; - } - input.readBytes(byteOffset, scratch, 0, nodeSize); - vector = MemorySegment.ofArray(scratch).reinterpret(vectorByteSize); - } - return vector; - } - - @SuppressWarnings("restricted") - OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException { - checkOrdinal(ord); - long byteOffset = (long) ord * nodeSize + vectorByteSize; - MemorySegment node = input.segmentSliceOrNull(byteOffset, CORRECTIVE_TERMS_SIZE); - if (node == null) { - if (scratch == null) { - scratch = new byte[nodeSize]; - } - input.readBytes(byteOffset, scratch, 0, CORRECTIVE_TERMS_SIZE); - node = MemorySegment.ofArray(scratch).reinterpret(CORRECTIVE_TERMS_SIZE); - } - return new OptimizedScalarQuantizer.QuantizationResult( - Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, 0)), - Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES)), - Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES * 2)), - node.get(INT_UNALIGNED_LE, Integer.BYTES * 3)); - } - record Node( MemorySegment vector, float lowerInterval, @@ -188,9 +150,8 @@ Node getNode(int ord) throws IOException { } // XXX investigate reordering the vector so that corrective terms appear first. // we're forced to read them immediately to avoid creating a second memory - // segment which is - // not cheap, so they might as well be read first to avoid additional memory - // latency. + // segment which is not cheap, so they might as well be read first to avoid + // additional memory latency. return new Node( vector.reinterpret(vectorByteSize), Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)), @@ -260,7 +221,7 @@ public float score(int node) throws IOException { } } - private record RandomVectorScorerSupplierImpl( + record RandomVectorScorerSupplierImpl( VectorSimilarityFunction similarityFunction, QuantizedByteVectorValues values, MemorySegmentAccessInput input) @@ -293,23 +254,37 @@ private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorer @Override public void setScoringOrdinal(int ord) throws IOException { checkOrdinal(ord); - query = getVector(ord); - queryCorrectiveTerms = getCorrectiveTerms(ord); + Node node = getNode(ord); + query = node.vector(); + queryCorrectiveTerms = + new OptimizedScalarQuantizer.QuantizationResult( + node.lowerInterval(), + node.upperInterval(), + node.additionalCorrection(), + node.componentSum()); } @Override public float score(int node) throws IOException { - MemorySegment doc = getVector(node); + Node doc = getNode(node); float dotProduct = switch (getScalarEncoding()) { - case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); - case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); - case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc); + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector()); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector()); + case PACKED_NIBBLE -> + PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc.vector()); }; // Call getCorrectiveTerms() after computing dot product since corrective terms - // bytes appear - // after the vector bytes, so this sequence of calls is more cache friendly. - return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); + // bytes appear after the vector bytes, so this sequence of calls is more cache + // friendly. + return getSimilarity() + .score( + dotProduct, + queryCorrectiveTerms, + doc.lowerInterval(), + doc.upperInterval(), + doc.additionalCorrection(), + doc.componentSum()); } } } From e7178bc28598f09944a2a4d162b16daa90cb239a Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Tue, 30 Sep 2025 12:39:54 -0700 Subject: [PATCH 15/21] try another formulation of vector handling --- ...orySegmentScalarQuantizedVectorScorer.java | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index 9715313bb738..3604eec6e5d3 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -160,6 +160,43 @@ Node getNode(int ord) throws IOException { vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3)); } + MemorySegment getRawVector(int ord) throws IOException { + checkOrdinal(ord); + long byteOffset = (long) ord * nodeSize; + MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize); + if (vector != null) { + return vector; + } + + if (scratch == null) { + scratch = new byte[nodeSize]; + } + input.readBytes(byteOffset, scratch, 0, nodeSize); + return MemorySegment.ofArray(scratch); + } + + @SuppressWarnings("restricted") + MemorySegment getVector(MemorySegment rawVector) { + return rawVector.reinterpret(vectorByteSize); + } + + float getLowerInterval(MemorySegment rawVector) { + return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize)); + } + + float getUpperInterval(MemorySegment rawVector) { + return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)); + } + + float getAdditionalCorrection(MemorySegment rawVector) { + return Float.intBitsToFloat( + rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)); + } + + int getComponentSum(MemorySegment rawVector) { + return rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3); + } + OptimizedScalarQuantizedVectorSimilarity getSimilarity() { return similarity; } @@ -199,13 +236,14 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase { @Override public float score(int node) throws IOException { - Node doc = getNode(node); + MemorySegment rawDoc = getRawVector(node); + MemorySegment docVector = getVector(rawDoc); float dotProduct = switch (getScalarEncoding()) { - case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector); - case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector); + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector); case PACKED_NIBBLE -> - PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc.vector); + PanamaVectorUtilSupport.int4DotProductSinglePacked(query, docVector); }; // Call getCorrectiveTerms() after computing dot product since corrective terms // bytes appear after the vector bytes, so this sequence of calls is more cache @@ -214,10 +252,10 @@ public float score(int node) throws IOException { .score( dotProduct, queryCorrectiveTerms, - doc.lowerInterval, - doc.upperInterval, - doc.additionalCorrection, - doc.componentSum); + getLowerInterval(rawDoc), + getUpperInterval(rawDoc), + getAdditionalCorrection(rawDoc), + getComponentSum(rawDoc)); } } From 151d2395e2c262a8e5a09743792fe57b5113ed05 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Thu, 2 Oct 2025 15:24:33 -0700 Subject: [PATCH 16/21] settle on a single path --- ...orySegmentScalarQuantizedVectorScorer.java | 114 ++++-------------- .../PanamaVectorUtilSupport.java | 58 ++++++--- 2 files changed, 61 insertions(+), 111 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index 3604eec6e5d3..0c7735819c8c 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -16,13 +16,10 @@ */ package org.apache.lucene.internal.vectorization; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; import java.io.IOException; import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.nio.ByteOrder; import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; @@ -126,75 +123,30 @@ final void checkOrdinal(int ord) { } } - private static final ValueLayout.OfInt INT_UNALIGNED_LE = - JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - - record Node( - MemorySegment vector, - float lowerInterval, - float upperInterval, - float additionalCorrection, - int componentSum) {} - @SuppressWarnings("restricted") - Node getNode(int ord) throws IOException { - checkOrdinal(ord); - long byteOffset = (long) ord * nodeSize; - MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize); - if (vector == null) { - if (scratch == null) { - scratch = new byte[nodeSize]; - } - input.readBytes(byteOffset, scratch, 0, nodeSize); - vector = MemorySegment.ofArray(scratch); - } - // XXX investigate reordering the vector so that corrective terms appear first. - // we're forced to read them immediately to avoid creating a second memory - // segment which is not cheap, so they might as well be read first to avoid - // additional memory latency. - return new Node( - vector.reinterpret(vectorByteSize), - Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)), - Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)), - Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)), - vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3)); - } - - MemorySegment getRawVector(int ord) throws IOException { + MemorySegment getVector(int ord) throws IOException { checkOrdinal(ord); long byteOffset = (long) ord * nodeSize; - MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize); + MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize); if (vector != null) { return vector; } if (scratch == null) { - scratch = new byte[nodeSize]; + scratch = new byte[vectorByteSize]; } - input.readBytes(byteOffset, scratch, 0, nodeSize); + input.readBytes(byteOffset, scratch, 0, vectorByteSize); return MemorySegment.ofArray(scratch); } - @SuppressWarnings("restricted") - MemorySegment getVector(MemorySegment rawVector) { - return rawVector.reinterpret(vectorByteSize); - } - - float getLowerInterval(MemorySegment rawVector) { - return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize)); - } - - float getUpperInterval(MemorySegment rawVector) { - return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)); - } - - float getAdditionalCorrection(MemorySegment rawVector) { - return Float.intBitsToFloat( - rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)); - } - - int getComponentSum(MemorySegment rawVector) { - return rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3); + OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException { + checkOrdinal(ord); + long byteOffset = (long) ord * nodeSize + vectorByteSize; + return new OptimizedScalarQuantizer.QuantizationResult( + Float.intBitsToFloat(input.readInt(byteOffset)), + Float.intBitsToFloat(input.readInt(byteOffset + Integer.BYTES)), + Float.intBitsToFloat(input.readInt(byteOffset + Integer.BYTES * 2)), + input.readInt(byteOffset + Integer.BYTES * 3)); } OptimizedScalarQuantizedVectorSimilarity getSimilarity() { @@ -209,6 +161,7 @@ Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() { private static class RandomVectorScorerImpl extends RandomVectorScorerBase { private final byte[] query; private final OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms; + private final byte[] scratch; RandomVectorScorerImpl( VectorSimilarityFunction similarityFunction, @@ -220,6 +173,7 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase { Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding = values.getScalarEncoding(); OptimizedScalarQuantizer quantizer = values.getQuantizer(); + scratch = new byte[values.getVectorByteLength()]; query = new byte [OptimizedScalarQuantizer.discretize( @@ -236,8 +190,7 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase { @Override public float score(int node) throws IOException { - MemorySegment rawDoc = getRawVector(node); - MemorySegment docVector = getVector(rawDoc); + MemorySegment docVector = getVector(node); float dotProduct = switch (getScalarEncoding()) { case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector); @@ -248,14 +201,7 @@ public float score(int node) throws IOException { // Call getCorrectiveTerms() after computing dot product since corrective terms // bytes appear after the vector bytes, so this sequence of calls is more cache // friendly. - return getSimilarity() - .score( - dotProduct, - queryCorrectiveTerms, - getLowerInterval(rawDoc), - getUpperInterval(rawDoc), - getAdditionalCorrection(rawDoc), - getComponentSum(rawDoc)); + return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); } } @@ -292,37 +238,23 @@ private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorer @Override public void setScoringOrdinal(int ord) throws IOException { checkOrdinal(ord); - Node node = getNode(ord); - query = node.vector(); - queryCorrectiveTerms = - new OptimizedScalarQuantizer.QuantizationResult( - node.lowerInterval(), - node.upperInterval(), - node.additionalCorrection(), - node.componentSum()); + query = getVector(ord); + queryCorrectiveTerms = getCorrectiveTerms(ord); } @Override public float score(int node) throws IOException { - Node doc = getNode(node); + MemorySegment doc = getVector(node); float dotProduct = switch (getScalarEncoding()) { - case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector()); - case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector()); - case PACKED_NIBBLE -> - PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc.vector()); + case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc); + case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc); }; // Call getCorrectiveTerms() after computing dot product since corrective terms // bytes appear after the vector bytes, so this sequence of calls is more cache // friendly. - return getSimilarity() - .score( - dotProduct, - queryCorrectiveTerms, - doc.lowerInterval(), - doc.upperInterval(), - doc.additionalCorrection(), - doc.componentSum()); + return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node)); } } } diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java index d2e104f92f70..cd870c93813a 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java @@ -60,7 +60,8 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport { private static final VectorSpecies FLOAT_SPECIES; private static final VectorSpecies DOUBLE_SPECIES = PanamaVectorConstants.PREFERRED_DOUBLE_SPECIES; - // This create a vector species which we make sure have exact half bits of DOUBLE_SPECIES + // This create a vector species which we make sure have exact half bits of + // DOUBLE_SPECIES private static final VectorSpecies INT_FOR_DOUBLE_SPECIES = VectorSpecies.of(int.class, VectorShape.forBitSize(DOUBLE_SPECIES.vectorBitSize() / 2)); private static final VectorSpecies INT_SPECIES = @@ -110,7 +111,8 @@ public float dotProduct(float[] a, float[] b) { int i = 0; float res = 0; - // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the + // overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); res += dotProductBody(a, b, i); @@ -154,7 +156,8 @@ private float dotProductBody(float[] a, float[] b, int limit) { FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length()); acc4 = fma(vg, vh, acc4); } - // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes + // vector tail: less scalar computations for unaligned sizes, esp with big + // vector sizes for (; i < limit; i += FLOAT_SPECIES.length()) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -173,7 +176,8 @@ public float cosine(float[] a, float[] b) { float norm1 = 0; float norm2 = 0; - // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the + // overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); float[] ret = cosineBody(a, b, i); @@ -218,7 +222,8 @@ private float[] cosineBody(float[] a, float[] b, int limit) { norm1_2 = fma(vc, vc, norm1_2); norm2_2 = fma(vd, vd, norm2_2); } - // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes + // vector tail: less scalar computations for unaligned sizes, esp with big + // vector sizes for (; i < limit; i += FLOAT_SPECIES.length()) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -238,7 +243,8 @@ public float squareDistance(float[] a, float[] b) { int i = 0; float res = 0; - // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the + // overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); res += squareDistanceBody(a, b, i); @@ -287,7 +293,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) { FloatVector diff4 = vg.sub(vh); acc4 = fma(diff4, diff4, acc4); } - // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes + // vector tail: less scalar computations for unaligned sizes, esp with big + // vector sizes for (; i < limit; i += FLOAT_SPECIES.length()) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -302,8 +309,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) { // Binary functions, these all follow a general pattern like this: // - // short intermediate = a * b; - // int accumulator = (int)accumulator + (int)intermediate; + // short intermediate = a * b; + // int accumulator = (int)accumulator + (int)intermediate; // // 256 or 512 bit vectors can process 64 or 128 bits at a time, respectively // intermediate results use 128 or 256 bit vectors, respectively @@ -877,7 +884,8 @@ private static int squareDistanceBody256( ByteVector vb8 = b.load(BYTE_SPECIES, i); // 32-bit sub, multiply, and add into accumulators - // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors on AVX-512? + // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors + // on AVX-512? Vector va32 = va8.convertShape(conversion, INT_SPECIES, 0); Vector vb32 = vb8.convertShape(conversion, INT_SPECIES, 0); Vector diff32 = va32.sub(vb32); @@ -890,8 +898,10 @@ private static int squareDistanceBody256( /** vectorized square distance body (128 bit vectors) */ private static int squareDistanceBody128( ByteVectorLoader a, ByteVectorLoader b, int limit, boolean signed) { - // 128-bit implementation, which must "split up" vectors due to widening conversions - // it doesn't help to do the overlapping read trick, due to 32-bit multiply in the formula + // 128-bit implementation, which must "split up" vectors due to widening + // conversions + // it doesn't help to do the overlapping read trick, due to 32-bit multiply in + // the formula IntVector acc1 = IntVector.zero(IntVector.SPECIES_128); IntVector acc2 = IntVector.zero(IntVector.SPECIES_128); var conversion_short = signed ? B2S : ZERO_EXTEND_B2S; @@ -1091,7 +1101,8 @@ private static int int4SquareDistanceBothPackedBody( return sum; } - // Experiments suggest that we need at least 8 lanes so that the overhead of going with the vector + // Experiments suggest that we need at least 8 lanes so that the overhead of + // going with the vector // approach and counting trues on vector masks pays off. private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8; @@ -1099,7 +1110,8 @@ private static int int4SquareDistanceBothPackedBody( public int findNextGEQ(int[] buffer, int target, int from, int to) { if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) { // This effectively implements the V1 intersection algorithm from - // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers + // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of + // Sorted Integers // with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512 // https://arxiv.org/pdf/1401.6399 for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) { @@ -1246,24 +1258,29 @@ public float minMaxScalarQuantize( FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i); // Make sure the value is within the quantile range, cutting off the tails - // see first parenthesis in equation: byte = (float - minQuantile) * 127/(maxQuantile - + // see first parenthesis in equation: byte = (float - minQuantile) * + // 127/(maxQuantile - // minQuantile) FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile); // Scale the value to the range [0, 127], this is our quantized value // scale = 127/(maxQuantile - minQuantile) - // Math.round rounds to positive infinity, so do the same by +0.5 then truncating to int + // Math.round rounds to positive infinity, so do the same by +0.5 then + // truncating to int Vector roundedDxs = fma(dxc, dxc.broadcast(scale), dxc.broadcast(0.5f)).convert(VectorOperators.F2I, 0); // output this to the array ((ByteVector) roundedDxs.castShape(BYTE_SPECIES, 0)).intoArray(dest, i); - // We multiply by `alpha` here to get the quantized value back into the original range + // We multiply by `alpha` here to get the quantized value back into the original + // range // to aid in calculating the corrective offset FloatVector dxq = ((FloatVector) roundedDxs.castShape(FLOAT_SPECIES, 0)).mul(alpha); // Calculate the corrective offset that needs to be applied to the score // in addition to the `byte * minQuantile * alpha` term in the equation - // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized value + // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized + // value // will be rounded to the nearest whole number and lose some accuracy - // Additionally, we account for the global correction of `minQuantile^2` in the equation + // Additionally, we account for the global correction of `minQuantile^2` in the + // equation sum = fma( v.sub(minQuantile / 2f), @@ -1375,7 +1392,8 @@ public float[] l2normalize(float[] v, boolean throwOnZero) { float invNorm = 1.0f / (float) Math.sqrt(l1norm); int i = 0; - // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the + // overhead to vectorize if (v.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(v.length); l2normalizeBody(v, invNorm, i); From 4d262f1d846cd46f4af4b371ba9d1703f7e81d39 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 3 Oct 2025 10:55:37 -0700 Subject: [PATCH 17/21] fix license --- ...ymmetricScalarQuantizeFlatVectorsScorer.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java index c77b7b4bbe82..a5af7ad211b3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.lucene.codecs.lucene104; import java.io.IOException; From 2c98eadd5d045870442434293466cb46429da2c5 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 3 Oct 2025 11:00:38 -0700 Subject: [PATCH 18/21] fix handling of asymmetric in memory seg impl --- .../Lucene104MemorySegmentScalarQuantizedVectorScorer.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java index 1f9d3de2d707..296259210903 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.lang.foreign.MemorySegment; import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; -import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; @@ -42,7 +41,7 @@ class Lucene104MemorySegmentScalarQuantizedVectorScorer static final Lucene104MemorySegmentScalarQuantizedVectorScorer INSTANCE = new Lucene104MemorySegmentScalarQuantizedVectorScorer(); - private static final FlatVectorsScorer DELEGATE = + private static final Lucene104ScalarQuantizedVectorScorer DELEGATE = new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); private static final int CORRECTIVE_TERMS_SIZE = Float.BYTES * 3 + Integer.BYTES; @@ -67,7 +66,9 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier( QuantizedByteVectorValues scoringVectors, QuantizedByteVectorValues targetVectors) throws IOException { - throw new UnsupportedOperationException("no asymmetric encodings are supported yet"); + // We do not yet support acceleration for any asymmetric formats. + return DELEGATE.getRandomVectorScorerSupplier( + similarityFunction, scoringVectors, targetVectors); } @Override From 16525957d37ae3229b3f75a9247615a552963955 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 3 Oct 2025 11:30:51 -0700 Subject: [PATCH 19/21] javadoc --- .../lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java index a5af7ad211b3..b84726052957 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java @@ -22,6 +22,10 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +/** + * Extension of {@link FlatVectorsScorer} that allows using two different vector codings for the + * "scoring" or "query" vectors and the "target" or "doc" vectors. + */ public interface AsymmetricScalarQuantizeFlatVectorsScorer extends FlatVectorsScorer { /** * Returns a {@link RandomVectorScorerSupplier} that can be used to score asymmetric vector From 8fca8a33bb8e15a341fc8e9f2cb9af7d6e5adfbb Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 3 Oct 2025 15:05:23 -0700 Subject: [PATCH 20/21] fix a bug in random vector scoring --- ...metricScalarQuantizeFlatVectorsScorer.java | 2 + .../Lucene104ScalarQuantizedVectorScorer.java | 15 ++--- ...Lucene104ScalarQuantizedVectorsWriter.java | 3 +- .../PanamaVectorUtilSupport.java | 55 +++++++------------ .../PanamaVectorizationProvider.java | 11 ++-- ...ne104HnswScalarQuantizedVectorsFormat.java | 7 +-- 6 files changed, 36 insertions(+), 57 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java index b84726052957..a18f8e411f26 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java @@ -15,6 +15,8 @@ * limitations under the License. */ +// XXX DO NOT MERGE FIX NAME + package org.apache.lucene.codecs.lucene104; import java.io.IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java index a5f37896eb82..1e1baa0593d6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java @@ -84,8 +84,7 @@ public RandomVectorScorer getRandomVectorScorer( var targetCorrectiveTerms = quantizer.scalarQuantize( target, scratch, scalarEncoding.getQueryBits(), qv.getCentroid()); - // for single bit query nibble, we need to transpose the nibbles for fast - // scoring comparisons + // for single bit query nibble, we need to transpose the nibbles for fast scoring comparisons if (scalarEncoding == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT_QUERY_NIBBLE) { OptimizedScalarQuantizer.transposeHalfByte(scratch, targetQuantized); @@ -194,8 +193,7 @@ public UpdateableRandomVectorScorer scorer() throws IOException { @Override public float score(int node) throws IOException { - return quantizedScore( - targetVector, targetCorrectiveTerms, targetValues, node, similarity); + return quantizedScore(targetVector, targetCorrectiveTerms, values, node, similarity); } @Override @@ -291,8 +289,7 @@ public static float quantizedScore( float y1 = queryCorrections.quantizedComponentSum(); float score = ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct; - // For euclidean, we need to invert the score and apply the additional - // correction, which is + // For euclidean, we need to invert the score and apply the additional correction, which is // assumed to be the squared l2norm of the centroid centered vectors. if (similarityFunction == EUCLIDEAN) { score = @@ -301,10 +298,8 @@ public static float quantizedScore( - 2 * score; return Math.max(1 / (1f + score), 0); } else { - // For cosine and max inner product, we need to apply the additional correction, - // which is - // assumed to be the non-centered dot-product between the vector and the - // centroid + // For cosine and max inner product, we need to apply the additional correction, which is + // assumed to be the non-centered dot-product between the vector and the centroid score += queryCorrections.additionalCorrection() + indexCorrections.additionalCorrection() diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java index 9e62969060dd..10a28a7f0022 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java @@ -138,8 +138,7 @@ public FlatFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOExceptio public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { rawVectorDelegate.flush(maxDoc, sortMap); for (FieldWriter field : fields) { - // after raw vectors are written, normalize vectors for clustering and - // quantization + // after raw vectors are written, normalize vectors for clustering and quantization if (VectorSimilarityFunction.COSINE == field.fieldInfo.getVectorSimilarityFunction()) { field.normalizeVectors(); } diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java index cd870c93813a..40e79f30cdb8 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java @@ -60,8 +60,7 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport { private static final VectorSpecies FLOAT_SPECIES; private static final VectorSpecies DOUBLE_SPECIES = PanamaVectorConstants.PREFERRED_DOUBLE_SPECIES; - // This create a vector species which we make sure have exact half bits of - // DOUBLE_SPECIES + // This create a vector species which we make sure have exact half bits of DOUBLE_SPECIES private static final VectorSpecies INT_FOR_DOUBLE_SPECIES = VectorSpecies.of(int.class, VectorShape.forBitSize(DOUBLE_SPECIES.vectorBitSize() / 2)); private static final VectorSpecies INT_SPECIES = @@ -111,8 +110,7 @@ public float dotProduct(float[] a, float[] b) { int i = 0; float res = 0; - // if the array size is large (> 2x platform vector size), it's worth the - // overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); res += dotProductBody(a, b, i); @@ -176,8 +174,7 @@ public float cosine(float[] a, float[] b) { float norm1 = 0; float norm2 = 0; - // if the array size is large (> 2x platform vector size), it's worth the - // overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); float[] ret = cosineBody(a, b, i); @@ -222,8 +219,7 @@ private float[] cosineBody(float[] a, float[] b, int limit) { norm1_2 = fma(vc, vc, norm1_2); norm2_2 = fma(vd, vd, norm2_2); } - // vector tail: less scalar computations for unaligned sizes, esp with big - // vector sizes + // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes for (; i < limit; i += FLOAT_SPECIES.length()) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -243,8 +239,7 @@ public float squareDistance(float[] a, float[] b) { int i = 0; float res = 0; - // if the array size is large (> 2x platform vector size), it's worth the - // overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize if (a.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(a.length); res += squareDistanceBody(a, b, i); @@ -293,8 +288,7 @@ private float squareDistanceBody(float[] a, float[] b, int limit) { FloatVector diff4 = vg.sub(vh); acc4 = fma(diff4, diff4, acc4); } - // vector tail: less scalar computations for unaligned sizes, esp with big - // vector sizes + // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes for (; i < limit; i += FLOAT_SPECIES.length()) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -309,8 +303,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) { // Binary functions, these all follow a general pattern like this: // - // short intermediate = a * b; - // int accumulator = (int)accumulator + (int)intermediate; + // short intermediate = a * b; + // int accumulator = (int)accumulator + (int)intermediate; // // 256 or 512 bit vectors can process 64 or 128 bits at a time, respectively // intermediate results use 128 or 256 bit vectors, respectively @@ -884,8 +878,7 @@ private static int squareDistanceBody256( ByteVector vb8 = b.load(BYTE_SPECIES, i); // 32-bit sub, multiply, and add into accumulators - // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors - // on AVX-512? + // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors on AVX-512? Vector va32 = va8.convertShape(conversion, INT_SPECIES, 0); Vector vb32 = vb8.convertShape(conversion, INT_SPECIES, 0); Vector diff32 = va32.sub(vb32); @@ -898,10 +891,8 @@ private static int squareDistanceBody256( /** vectorized square distance body (128 bit vectors) */ private static int squareDistanceBody128( ByteVectorLoader a, ByteVectorLoader b, int limit, boolean signed) { - // 128-bit implementation, which must "split up" vectors due to widening - // conversions - // it doesn't help to do the overlapping read trick, due to 32-bit multiply in - // the formula + // 128-bit implementation, which must "split up" vectors due to widening conversions + // it doesn't help to do the overlapping read trick, due to 32-bit multiply in the formula IntVector acc1 = IntVector.zero(IntVector.SPECIES_128); IntVector acc2 = IntVector.zero(IntVector.SPECIES_128); var conversion_short = signed ? B2S : ZERO_EXTEND_B2S; @@ -1101,8 +1092,7 @@ private static int int4SquareDistanceBothPackedBody( return sum; } - // Experiments suggest that we need at least 8 lanes so that the overhead of - // going with the vector + // Experiments suggest that we need at least 8 lanes so that the overhead of going with the vector // approach and counting trues on vector masks pays off. private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8; @@ -1110,8 +1100,7 @@ private static int int4SquareDistanceBothPackedBody( public int findNextGEQ(int[] buffer, int target, int from, int to) { if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) { // This effectively implements the V1 intersection algorithm from - // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of - // Sorted Integers + // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers // with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512 // https://arxiv.org/pdf/1401.6399 for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) { @@ -1259,28 +1248,23 @@ public float minMaxScalarQuantize( // Make sure the value is within the quantile range, cutting off the tails // see first parenthesis in equation: byte = (float - minQuantile) * - // 127/(maxQuantile - - // minQuantile) + // 127/(maxQuantile - minQuantile) FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile); // Scale the value to the range [0, 127], this is our quantized value // scale = 127/(maxQuantile - minQuantile) - // Math.round rounds to positive infinity, so do the same by +0.5 then - // truncating to int + // Math.round rounds to positive infinity, so do the same by +0.5 then truncating to int Vector roundedDxs = fma(dxc, dxc.broadcast(scale), dxc.broadcast(0.5f)).convert(VectorOperators.F2I, 0); // output this to the array ((ByteVector) roundedDxs.castShape(BYTE_SPECIES, 0)).intoArray(dest, i); - // We multiply by `alpha` here to get the quantized value back into the original - // range + // We multiply by `alpha` here to get the quantized value back into the original range // to aid in calculating the corrective offset FloatVector dxq = ((FloatVector) roundedDxs.castShape(FLOAT_SPECIES, 0)).mul(alpha); // Calculate the corrective offset that needs to be applied to the score // in addition to the `byte * minQuantile * alpha` term in the equation - // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized - // value + // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized value // will be rounded to the nearest whole number and lose some accuracy - // Additionally, we account for the global correction of `minQuantile^2` in the - // equation + // Additionally, we account for the global correction of `minQuantile^2` in the equation sum = fma( v.sub(minQuantile / 2f), @@ -1392,8 +1376,7 @@ public float[] l2normalize(float[] v, boolean throwOnZero) { float invNorm = 1.0f / (float) Math.sqrt(l1norm); int i = 0; - // if the array size is large (> 2x platform vector size), it's worth the - // overhead to vectorize + // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize if (v.length > 2 * FLOAT_SPECIES.length()) { i += FLOAT_SPECIES.loopBound(v.length); l2normalizeBody(v, invNorm, i); diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index 1a94ce7dcf2c..bbe3defa62ee 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -21,8 +21,10 @@ import java.util.Locale; import java.util.logging.Logger; import jdk.incubator.vector.FloatVector; +import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.Constants; @@ -31,10 +33,8 @@ /** A vectorization provider that leverages the Panama Vector API. */ final class PanamaVectorizationProvider extends VectorizationProvider { - // NOTE: Avoid static fields or initializers which rely on the vector API, as - // these initializers - // would get called before we have a chance to perform sanity checks around the - // vector API in the + // NOTE: Avoid static fields or initializers which rely on the vector API, as these initializers + // would get called before we have a chance to perform sanity checks around the vector API in the // constructor of this class. Put them in PanamaVectorConstants instead. private final VectorUtilSupport vectorUtilSupport; @@ -88,7 +88,8 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { @Override public AsymmetricScalarQuantizeFlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() { - return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE; + // return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE; + return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java index bfd04a4411aa..6cd536723b61 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java @@ -60,6 +60,7 @@ public class TestLucene104HnswScalarQuantizedVectorsFormat extends BaseKnnVector public void setUp() throws Exception { var encodingValues = ScalarEncoding.values(); encoding = encodingValues[random().nextInt(encodingValues.length)]; + System.out.println("ENCODING: " + encoding); format = new Lucene104HnswScalarQuantizedVectorsFormat( encoding, @@ -134,8 +135,7 @@ public void testSingleVectorCase() throws Exception { Integer.MAX_VALUE); assertEquals(1, td.totalHits.value()); assertTrue(td.scoreDocs[0].score >= 0); - // When it's the only vector in a segment, the score should be very close to the - // true + // When it's the only vector in a segment, the score should be very close to the true // score assertEquals(trueScore, td.scoreDocs[0].score, 0.01f); } @@ -167,8 +167,7 @@ public void testLimits() { ScalarEncoding.UNSIGNED_BYTE, 20, 100, 1, new SameThreadExecutorService())); } - // Ensures that all expected vector similarity functions are translatable in the - // format. + // Ensures that all expected vector similarity functions are translatable in the format. public void testVectorSimilarityFuncs() { // This does not necessarily have to be all similarity functions, but // differences should be considered carefully. From 25005563b102ac6db2b57ef2eedb7a581100b8a0 Mon Sep 17 00:00:00 2001 From: Trevor McCulloch Date: Fri, 3 Oct 2025 15:07:21 -0700 Subject: [PATCH 21/21] restore memseg scorer; fix some errant formatting issues --- .../internal/vectorization/PanamaVectorUtilSupport.java | 4 ++-- .../internal/vectorization/PanamaVectorizationProvider.java | 5 +---- .../TestLucene104HnswScalarQuantizedVectorsFormat.java | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java index 40e79f30cdb8..425b0e6630be 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java @@ -1247,8 +1247,8 @@ public float minMaxScalarQuantize( FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i); // Make sure the value is within the quantile range, cutting off the tails - // see first parenthesis in equation: byte = (float - minQuantile) * - // 127/(maxQuantile - minQuantile) + // see first parenthesis in equation: byte = (float - minQuantile) * 127/(maxQuantile - + // minQuantile) FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile); // Scale the value to the range [0, 127], this is our quantized value // scale = 127/(maxQuantile - minQuantile) diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index bbe3defa62ee..0ec71b6b7f2f 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -21,10 +21,8 @@ import java.util.Locale; import java.util.logging.Logger; import jdk.incubator.vector.FloatVector; -import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.Constants; @@ -88,8 +86,7 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { @Override public AsymmetricScalarQuantizeFlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() { - // return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE; - return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java index 6cd536723b61..0e8084391ab5 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java @@ -60,7 +60,6 @@ public class TestLucene104HnswScalarQuantizedVectorsFormat extends BaseKnnVector public void setUp() throws Exception { var encodingValues = ScalarEncoding.values(); encoding = encodingValues[random().nextInt(encodingValues.length)]; - System.out.println("ENCODING: " + encoding); format = new Lucene104HnswScalarQuantizedVectorsFormat( encoding,