From d8a31cd676300d3956716d942bd90b9517d291ba Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <trevor.mcculloch@mongodb.com>
Date: Thu, 25 Sep 2025 11:41:02 -0700
Subject: [PATCH 01/21] introduce vector provider call to get scorer

---
 .../apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java  | 9 +++++++++
 .../lucene104/Lucene104ScalarQuantizedVectorsFormat.java | 4 ++--
 .../lucene104/Lucene104ScalarQuantizedVectorsReader.java | 5 +++--
 .../lucene104/Lucene104ScalarQuantizedVectorsWriter.java | 5 +++--
 .../vectorization/DefaultVectorizationProvider.java      | 6 ++++++
 .../internal/vectorization/VectorizationProvider.java    | 3 +++
 .../vectorization/PanamaVectorizationProvider.java       | 8 ++++++++
 7 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java
index 123c18e00c08..2faa1b0a92fd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorScorerUtil.java
@@ -41,4 +41,13 @@ public static FlatVectorsScorer getLucene99FlatVectorsScorer() {
   public static FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
     return IMPL.getLucene99ScalarQuantizedVectorsScorer();
   }
+
+  /**
+   * Returns a FlatVectorsScorer that supports the Lucene104 scalar quantized format. Scorers
+   * retrieved through this method may be optimized on certain platforms. Otherwise, a
+   * DefaultFlatVectorScorer is returned.
+   */
+  public static FlatVectorsScorer getLucene104ScalarQuantizedFlatVectorsScorer() {
+    return IMPL.getLucene104ScalarQuantizedVectorsScorer();
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
index 44e1ef092c12..8198f4ef24f3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
 import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
 import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
+import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
 import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
 import org.apache.lucene.index.SegmentReadState;
@@ -105,8 +106,7 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
   private static final FlatVectorsFormat rawVectorFormat =
       new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
 
-  private static final Lucene104ScalarQuantizedVectorScorer scorer =
-      new Lucene104ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
+  private static final FlatVectorsScorer scorer = FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer();
 
   private final ScalarEncoding encoding;
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
index 8012d6095c27..5d3a4b22ace9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
@@ -27,6 +27,7 @@
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
+import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding;
 import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
 import org.apache.lucene.index.ByteVectorValues;
@@ -66,12 +67,12 @@ class Lucene104ScalarQuantizedVectorsReader extends FlatVectorsReader
   private final Map<String, FieldEntry> fields = new HashMap<>();
   private final IndexInput quantizedVectorData;
   private final FlatVectorsReader rawVectorsReader;
-  private final Lucene104ScalarQuantizedVectorScorer vectorScorer;
+  private final FlatVectorsScorer vectorScorer;
 
   Lucene104ScalarQuantizedVectorsReader(
       SegmentReadState state,
       FlatVectorsReader rawVectorsReader,
-      Lucene104ScalarQuantizedVectorScorer vectorsScorer)
+      FlatVectorsScorer vectorsScorer)
       throws IOException {
     super(vectorsScorer);
     this.vectorScorer = vectorsScorer;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
index 4238aed03600..84481c0a8e9a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding;
 import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
@@ -67,7 +68,7 @@ public class Lucene104ScalarQuantizedVectorsWriter extends FlatVectorsWriter {
   private final IndexOutput meta, vectorData;
   private final ScalarEncoding encoding;
   private final FlatVectorsWriter rawVectorDelegate;
-  private final Lucene104ScalarQuantizedVectorScorer vectorsScorer;
+  private final FlatVectorsScorer vectorsScorer;
   private boolean finished;
 
   /**
@@ -79,7 +80,7 @@ protected Lucene104ScalarQuantizedVectorsWriter(
       SegmentWriteState state,
       ScalarEncoding encoding,
       FlatVectorsWriter rawVectorDelegate,
-      Lucene104ScalarQuantizedVectorScorer vectorsScorer)
+      FlatVectorsScorer vectorsScorer)
       throws IOException {
     super(vectorsScorer);
     this.encoding = encoding;
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java
index 21977fa3dc77..5ebadda480fa 100644
--- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java
+++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer;
 import org.apache.lucene.store.IndexInput;
 
@@ -46,6 +47,11 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
     return new Lucene99ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
   }
 
+  @Override
+  public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() {
+    return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+  }
+
   @Override
   public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) {
     return new PostingDecodingUtil(input);
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java
index cf9c56c59774..44109191250b 100644
--- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java
+++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java
@@ -112,6 +112,9 @@ public static VectorizationProvider getInstance() {
   /** Returns a FlatVectorsScorer that supports the Lucene99 format. */
   public abstract FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer();
 
+  /** Returns a FlatVectorsScorer that supports the Lucene104 quantized format. */
+  public abstract FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer();
+
   /** Create a new {@link PostingDecodingUtil} for the given {@link IndexInput}. */
   public abstract PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException;
 
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
index cf3ab94f417c..79a16dd0bc1f 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
@@ -21,7 +21,9 @@
 import java.util.Locale;
 import java.util.logging.Logger;
 import jdk.incubator.vector.FloatVector;
+import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MemorySegmentAccessInput;
 import org.apache.lucene.util.Constants;
@@ -83,6 +85,12 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
     return Lucene99MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
   }
 
+  @Override
+  public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() {
+    // XXX DO NOT MERGE
+    return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+  }
+
   @Override
   public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException {
     if (input instanceof MemorySegmentAccessInput msai) {

From 475416601e5696c185c688858e1a6542482d8461 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <trevor.mcculloch@mongodb.com>
Date: Fri, 26 Sep 2025 14:44:06 -0700
Subject: [PATCH 02/21] factor out that parts of score computation that are not
 amenable to vectorization

---
 .../Lucene104ScalarQuantizedVectorScorer.java | 102 ++++++--------
 ...Lucene104ScalarQuantizedVectorsFormat.java |   3 +-
 ...Lucene104ScalarQuantizedVectorsReader.java |   4 +-
 ...imizedScalarQuantizedVectorSimilarity.java | 129 ++++++++++++++++++
 4 files changed, 170 insertions(+), 68 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
index a2072a297887..add7355de621 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
@@ -17,8 +17,6 @@
 package org.apache.lucene.codecs.lucene104;
 
 import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
-import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
-import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
 
 import java.io.IOException;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
@@ -29,6 +27,7 @@
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
 import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 
 /** Vector scorer over OptimizedScalarQuantized vectors */
@@ -78,10 +77,19 @@ public RandomVectorScorer getRandomVectorScorer(
           quantizer.scalarQuantize(
               target, targetQuantized, qv.getScalarEncoding().getBits(), qv.getCentroid());
       return new RandomVectorScorer.AbstractRandomVectorScorer(qv) {
+        private final OptimizedScalarQuantizedVectorSimilarity similarity =
+            new OptimizedScalarQuantizedVectorSimilarity(
+                similarityFunction,
+                qv.dimension(),
+                qv.getCentroidDP(),
+                qv.getScalarEncoding().getBits());
+
         @Override
         public float score(int node) throws IOException {
-          return quantizedScore(
-              targetQuantized, targetCorrectiveTerms, qv, node, similarityFunction);
+          return similarity.score(
+              dotProduct(targetQuantized, qv, node),
+              targetCorrectiveTerms,
+              qv.getCorrectiveTerms(node));
         }
       };
     }
@@ -107,12 +115,25 @@ private static final class ScalarQuantizedVectorScorerSupplier
       implements RandomVectorScorerSupplier {
     private final QuantizedByteVectorValues targetValues;
     private final QuantizedByteVectorValues values;
-    private final VectorSimilarityFunction similarity;
+    private final OptimizedScalarQuantizedVectorSimilarity similarity;
 
     public ScalarQuantizedVectorScorerSupplier(
         QuantizedByteVectorValues values, VectorSimilarityFunction similarity) throws IOException {
       this.targetValues = values.copy();
       this.values = values;
+      this.similarity =
+          new OptimizedScalarQuantizedVectorSimilarity(
+              similarity,
+              values.dimension(),
+              values.getCentroidDP(),
+              values.getScalarEncoding().getBits());
+    }
+
+    private ScalarQuantizedVectorScorerSupplier(
+        QuantizedByteVectorValues values, OptimizedScalarQuantizedVectorSimilarity similarity)
+        throws IOException {
+      this.targetValues = values.copy();
+      this.values = values;
       this.similarity = similarity;
     }
 
@@ -124,7 +145,10 @@ public UpdateableRandomVectorScorer scorer() throws IOException {
 
         @Override
         public float score(int node) throws IOException {
-          return quantizedScore(targetVector, targetCorrectiveTerms, values, node, similarity);
+          return similarity.score(
+              dotProduct(targetVector, values, node),
+              targetCorrectiveTerms,
+              values.getCorrectiveTerms(node));
         }
 
         @Override
@@ -151,64 +175,14 @@ public RandomVectorScorerSupplier copy() throws IOException {
     }
   }
 
-  private static final float[] SCALE_LUT =
-      new float[] {
-        1f,
-        1f / ((1 << 2) - 1),
-        1f / ((1 << 3) - 1),
-        1f / ((1 << 4) - 1),
-        1f / ((1 << 5) - 1),
-        1f / ((1 << 6) - 1),
-        1f / ((1 << 7) - 1),
-        1f / ((1 << 8) - 1),
-      };
-
-  private static float quantizedScore(
-      byte[] quantizedQuery,
-      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
-      QuantizedByteVectorValues targetVectors,
-      int targetOrd,
-      VectorSimilarityFunction similarityFunction)
-      throws IOException {
+  private static float dotProduct(
+      byte[] query, QuantizedByteVectorValues targetVectors, int targetOrd) throws IOException {
     var scalarEncoding = targetVectors.getScalarEncoding();
-    byte[] quantizedDoc = targetVectors.vectorValue(targetOrd);
-    float qcDist =
-        switch (scalarEncoding) {
-          case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(quantizedQuery, quantizedDoc);
-          case SEVEN_BIT -> VectorUtil.dotProduct(quantizedQuery, quantizedDoc);
-          case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(quantizedQuery, quantizedDoc);
-        };
-    OptimizedScalarQuantizer.QuantizationResult indexCorrections =
-        targetVectors.getCorrectiveTerms(targetOrd);
-    float scale = SCALE_LUT[scalarEncoding.getBits() - 1];
-    float x1 = indexCorrections.quantizedComponentSum();
-    float ax = indexCorrections.lowerInterval();
-    // Here we must scale according to the bits
-    float lx = (indexCorrections.upperInterval() - ax) * scale;
-    float ay = queryCorrections.lowerInterval();
-    float ly = (queryCorrections.upperInterval() - ay) * scale;
-    float y1 = queryCorrections.quantizedComponentSum();
-    float score =
-        ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
-    // For euclidean, we need to invert the score and apply the additional correction, which is
-    // assumed to be the squared l2norm of the centroid centered vectors.
-    if (similarityFunction == EUCLIDEAN) {
-      score =
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - 2 * score;
-      return Math.max(1 / (1f + score), 0);
-    } else {
-      // For cosine and max inner product, we need to apply the additional correction, which is
-      // assumed to be the non-centered dot-product between the vector and the centroid
-      score +=
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - targetVectors.getCentroidDP();
-      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
-        return VectorUtil.scaleMaxInnerProductScore(score);
-      }
-      return Math.max((1f + score) / 2f, 0);
-    }
+    byte[] doc = targetVectors.vectorValue(targetOrd);
+    return switch (scalarEncoding) {
+      case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(query, doc);
+      case SEVEN_BIT -> VectorUtil.dotProduct(query, doc);
+      case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(query, doc);
+    };
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
index 8198f4ef24f3..aa076a184a3c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java
@@ -106,7 +106,8 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
   private static final FlatVectorsFormat rawVectorFormat =
       new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
 
-  private static final FlatVectorsScorer scorer = FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer();
+  private static final FlatVectorsScorer scorer =
+      FlatVectorScorerUtil.getLucene104ScalarQuantizedFlatVectorsScorer();
 
   private final ScalarEncoding encoding;
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
index 5d3a4b22ace9..6e054206c481 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java
@@ -70,9 +70,7 @@ class Lucene104ScalarQuantizedVectorsReader extends FlatVectorsReader
   private final FlatVectorsScorer vectorScorer;
 
   Lucene104ScalarQuantizedVectorsReader(
-      SegmentReadState state,
-      FlatVectorsReader rawVectorsReader,
-      FlatVectorsScorer vectorsScorer)
+      SegmentReadState state, FlatVectorsReader rawVectorsReader, FlatVectorsScorer vectorsScorer)
       throws IOException {
     super(vectorsScorer);
     this.vectorScorer = vectorsScorer;
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
new file mode 100644
index 000000000000..8b6795bd11bf
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.quantization;
+
+import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
+import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
+
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.util.VectorUtil;
+
+/**
+ * Common utility functions for computing similarity scores between two optimized scalar quantized
+ * vectors.
+ */
+public class OptimizedScalarQuantizedVectorSimilarity {
+  // Precomputed scale factors for each quantization bit count (1 to 8 bits).
+  private static final float[] SCALE_LUT =
+      new float[] {
+        1f,
+        1f / ((1 << 2) - 1),
+        1f / ((1 << 3) - 1),
+        1f / ((1 << 4) - 1),
+        1f / ((1 << 5) - 1),
+        1f / ((1 << 6) - 1),
+        1f / ((1 << 7) - 1),
+        1f / ((1 << 8) - 1),
+      };
+
+  private final VectorSimilarityFunction similarityFunction;
+  private final int dimensions;
+  private final float centroidDotProduct;
+  private final float queryScale;
+  private final float indexScale;
+
+  /**
+   * Create a new vector similarity computer for optimized scalar quantized vectors.
+   *
+   * @param similarityFunction - the similarity function to use.
+   * @param dimensions - the number of dimensions in each vector.
+   * @param centroidDotProduct - the dot product of the segment centroid with itself.
+   * @param bits - the number of bits used for each dimension in [1,8].
+   */
+  public OptimizedScalarQuantizedVectorSimilarity(
+      VectorSimilarityFunction similarityFunction,
+      int dimensions,
+      float centroidDotProduct,
+      int bits) {
+    this(similarityFunction, dimensions, centroidDotProduct, bits, bits);
+  }
+
+  /**
+   * Create a new vector similarity computer for optimized scalar quantized vectors.
+   *
+   * @param similarityFunction - the similarity function to use.
+   * @param dimensions - the number of dimensions in each vector.
+   * @param centroidDotProduct - the dot product of the segment centroid with itself.
+   * @param queryBits - the number of bits used in the query vector for each dimension in [1,8].
+   * @param indexBits - the number of bits used in the query vector for each dimension in [1,8].
+   */
+  public OptimizedScalarQuantizedVectorSimilarity(
+      VectorSimilarityFunction similarityFunction,
+      int dimensions,
+      float centroidDotProduct,
+      int queryBits,
+      int indexBits) {
+    this.similarityFunction = similarityFunction;
+    this.dimensions = dimensions;
+    this.centroidDotProduct = centroidDotProduct;
+    this.queryScale = SCALE_LUT[queryBits - 1];
+    this.indexScale = SCALE_LUT[indexBits - 1];
+  }
+
+  /**
+   * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot
+   * product of the two vectors and their corrective factors.
+   *
+   * @param dotProduct - dot product of the two quantized vectors.
+   * @param queryCorrections - corrective factors for vector 'y'.
+   * @param indexCorrections - corrective factors for vector 'x'.
+   * @return - a similarity score value between 0 and 1; higher values are better.
+   */
+  public float score(
+      float dotProduct,
+      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
+      OptimizedScalarQuantizer.QuantizationResult indexCorrections) {
+    float x1 = indexCorrections.quantizedComponentSum();
+    float ax = indexCorrections.lowerInterval();
+    // Here we must scale according to the bits
+    float lx = (indexCorrections.upperInterval() - ax) * indexScale;
+    float ay = queryCorrections.lowerInterval();
+    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
+    float y1 = queryCorrections.quantizedComponentSum();
+    float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
+    // For euclidean, we need to invert the score and apply the additional correction, which is
+    // assumed to be the squared l2norm of the centroid centered vectors.
+    if (similarityFunction == EUCLIDEAN) {
+      score =
+          queryCorrections.additionalCorrection()
+              + indexCorrections.additionalCorrection()
+              - 2 * score;
+      return Math.max(1 / (1f + score), 0);
+    } else {
+      // For cosine and max inner product, we need to apply the additional correction, which is
+      // assumed to be the non-centered dot-product between the vector and the centroid
+      score +=
+          queryCorrections.additionalCorrection()
+              + indexCorrections.additionalCorrection()
+              - centroidDotProduct;
+      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
+        return VectorUtil.scaleMaxInnerProductScore(score);
+      }
+      return Math.max((1f + score) / 2f, 0);
+    }
+  }
+}

From 767a52e38371d6c4ebf50b65f0efc70cfdbf9ca5 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <trevor.mcculloch@mongodb.com>
Date: Fri, 26 Sep 2025 15:00:31 -0700
Subject: [PATCH 03/21] share with 102 binarized vectors

---
 .../Lucene102BinaryFlatVectorsScorer.java     | 95 ++++++++-----------
 1 file changed, 42 insertions(+), 53 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
index 02e37a9e89d8..65d3c768386f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
@@ -16,10 +16,9 @@
  */
 package org.apache.lucene.codecs.lucene102;
 
+import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS;
 import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS;
 import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
-import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
-import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
 import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.transposeHalfByte;
 
 import java.io.IOException;
@@ -31,13 +30,13 @@
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
 import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer.QuantizationResult;
 
 /** Vector scorer over binarized vector values */
 public class Lucene102BinaryFlatVectorsScorer implements FlatVectorsScorer {
   private final FlatVectorsScorer nonQuantizedDelegate;
-  private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1);
 
   public Lucene102BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
     this.nonQuantizedDelegate = nonQuantizedDelegate;
@@ -73,10 +72,20 @@ public RandomVectorScorer getRandomVectorScorer(
           quantizer.scalarQuantize(target, initial, (byte) 4, centroid);
       transposeHalfByte(initial, quantized);
       return new RandomVectorScorer.AbstractRandomVectorScorer(binarizedVectors) {
+        private final OptimizedScalarQuantizedVectorSimilarity similarity =
+            new OptimizedScalarQuantizedVectorSimilarity(
+                similarityFunction,
+                binarizedVectors.dimension(),
+                binarizedVectors.getCentroidDP(),
+                QUERY_BITS,
+                INDEX_BITS);
+
         @Override
         public float score(int node) throws IOException {
-          return quantizedScore(
-              quantized, queryCorrections, binarizedVectors, node, similarityFunction);
+          var indexVector = binarizedVectors.vectorValue(node);
+          var indexCorrections = binarizedVectors.getCorrectiveTerms(node);
+          float dotProduct = VectorUtil.int4BitDotProduct(quantized, indexVector);
+          return similarity.score(dotProduct, queryCorrections, indexCorrections);
         }
       };
     }
@@ -93,7 +102,8 @@ public RandomVectorScorer getRandomVectorScorer(
   RandomVectorScorerSupplier getRandomVectorScorerSupplier(
       VectorSimilarityFunction similarityFunction,
       Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues scoringVectors,
-      BinarizedByteVectorValues targetVectors) {
+      BinarizedByteVectorValues targetVectors)
+      throws IOException {
     return new BinarizedRandomVectorScorerSupplier(
         scoringVectors, targetVectors, similarityFunction);
   }
@@ -108,15 +118,31 @@ static class BinarizedRandomVectorScorerSupplier implements RandomVectorScorerSu
     private final Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues
         queryVectors;
     private final BinarizedByteVectorValues targetVectors;
-    private final VectorSimilarityFunction similarityFunction;
+    private final OptimizedScalarQuantizedVectorSimilarity similarity;
+
+    BinarizedRandomVectorScorerSupplier(
+        Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors,
+        BinarizedByteVectorValues targetVectors,
+        VectorSimilarityFunction similarityFunction)
+        throws IOException {
+      this.queryVectors = queryVectors;
+      this.targetVectors = targetVectors;
+      this.similarity =
+          new OptimizedScalarQuantizedVectorSimilarity(
+              similarityFunction,
+              targetVectors.dimension(),
+              targetVectors.getCentroidDP(),
+              QUERY_BITS,
+              INDEX_BITS);
+    }
 
     BinarizedRandomVectorScorerSupplier(
         Lucene102BinaryQuantizedVectorsWriter.OffHeapBinarizedQueryVectorValues queryVectors,
         BinarizedByteVectorValues targetVectors,
-        VectorSimilarityFunction similarityFunction) {
+        OptimizedScalarQuantizedVectorSimilarity similarity) {
       this.queryVectors = queryVectors;
       this.targetVectors = targetVectors;
-      this.similarityFunction = similarityFunction;
+      this.similarity = similarity;
     }
 
     @Override
@@ -139,7 +165,12 @@ public float score(int node) throws IOException {
           if (vector == null || queryCorrections == null) {
             throw new IllegalStateException("setScoringOrdinal was not called");
           }
-          return quantizedScore(vector, queryCorrections, targetVectors, node, similarityFunction);
+          var indexVector = targetVectors.vectorValue(node);
+          var indexCorrections = targetVectors.getCorrectiveTerms(node);
+          return similarity.score(
+              VectorUtil.int4BitDotProduct(vector, indexVector),
+              queryCorrections,
+              indexCorrections);
         }
       };
     }
@@ -147,49 +178,7 @@ public float score(int node) throws IOException {
     @Override
     public RandomVectorScorerSupplier copy() throws IOException {
       return new BinarizedRandomVectorScorerSupplier(
-          queryVectors.copy(), targetVectors.copy(), similarityFunction);
-    }
-  }
-
-  static float quantizedScore(
-      byte[] quantizedQuery,
-      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
-      BinarizedByteVectorValues targetVectors,
-      int targetOrd,
-      VectorSimilarityFunction similarityFunction)
-      throws IOException {
-    byte[] binaryCode = targetVectors.vectorValue(targetOrd);
-    float qcDist = VectorUtil.int4BitDotProduct(quantizedQuery, binaryCode);
-    OptimizedScalarQuantizer.QuantizationResult indexCorrections =
-        targetVectors.getCorrectiveTerms(targetOrd);
-    float x1 = indexCorrections.quantizedComponentSum();
-    float ax = indexCorrections.lowerInterval();
-    // Here we assume `lx` is simply bit vectors, so the scaling isn't necessary
-    float lx = indexCorrections.upperInterval() - ax;
-    float ay = queryCorrections.lowerInterval();
-    float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE;
-    float y1 = queryCorrections.quantizedComponentSum();
-    float score =
-        ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
-    // For euclidean, we need to invert the score and apply the additional correction, which is
-    // assumed to be the squared l2norm of the centroid centered vectors.
-    if (similarityFunction == EUCLIDEAN) {
-      score =
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - 2 * score;
-      return Math.max(1 / (1f + score), 0);
-    } else {
-      // For cosine and max inner product, we need to apply the additional correction, which is
-      // assumed to be the non-centered dot-product between the vector and the centroid
-      score +=
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - targetVectors.getCentroidDP();
-      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
-        return VectorUtil.scaleMaxInnerProductScore(score);
-      }
-      return Math.max((1f + score) / 2f, 0);
+          queryVectors.copy(), targetVectors.copy(), similarity);
     }
   }
 }

From 7511cab23423c6d6f14c220b4ed1af2a45ba4e74 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 08:59:28 -0700
Subject: [PATCH 04/21] make qbvv public so I can use it in the accelerated
 code path

---
 .../lucene/codecs/lucene104/QuantizedByteVectorValues.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
index 48d0c4e665f1..a49ad5068373 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
@@ -23,7 +23,7 @@
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 
 /** Scalar quantized byte vector values */
-abstract class QuantizedByteVectorValues extends ByteVectorValues {
+public abstract class QuantizedByteVectorValues extends ByteVectorValues {
 
   /**
    * Retrieve the corrective terms for the given vector ordinal. For the dot-product family of

From 4ca050a71891cfa25ae064dbeb5d7a4e666686e4 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 11:33:31 -0700
Subject: [PATCH 05/21] feature complete

---
 ...Lucene104ScalarQuantizedVectorsWriter.java |   8 +-
 .../OffHeapScalarQuantizedVectorValues.java   |   5 +
 .../lucene104/QuantizedByteVectorValues.java  |   3 +-
 ...orySegmentScalarQuantizedVectorScorer.java | 271 ++++++++++++++++++
 .../PanamaVectorizationProvider.java          |   9 +-
 5 files changed, 290 insertions(+), 6 deletions(-)
 create mode 100644 lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
index 84481c0a8e9a..8edb412e44e9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
@@ -137,7 +137,8 @@ public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOExceptio
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     rawVectorDelegate.flush(maxDoc, sortMap);
     for (FieldWriter field : fields) {
-      // after raw vectors are written, normalize vectors for clustering and quantization
+      // after raw vectors are written, normalize vectors for clustering and
+      // quantization
       if (VectorSimilarityFunction.COSINE == field.fieldInfo.getVectorSimilarityFunction()) {
         field.normalizeVectors();
       }
@@ -728,6 +729,11 @@ public int dimension() {
       return values.dimension();
     }
 
+    @Override
+    public IndexInput getSlice() {
+      return null;
+    }
+
     @Override
     public OptimizedScalarQuantizer getQuantizer() {
       throw new UnsupportedOperationException();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
index d2c678d8f8ba..8bd050827d54 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
@@ -141,6 +141,11 @@ public int getVectorByteLength() {
     return dimension;
   }
 
+  @Override
+  public IndexInput getSlice() {
+    return slice;
+  }
+
   static void packNibbles(byte[] unpacked, byte[] packed) {
     assert unpacked.length == packed.length * 2;
     for (int i = 0; i < packed.length; i++) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
index a49ad5068373..91637b15fe34 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/QuantizedByteVectorValues.java
@@ -18,12 +18,13 @@
 
 import java.io.IOException;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding;
+import org.apache.lucene.codecs.lucene95.HasIndexSlice;
 import org.apache.lucene.index.ByteVectorValues;
 import org.apache.lucene.search.VectorScorer;
 import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 
 /** Scalar quantized byte vector values */
-public abstract class QuantizedByteVectorValues extends ByteVectorValues {
+public abstract class QuantizedByteVectorValues extends ByteVectorValues implements HasIndexSlice {
 
   /**
    * Retrieve the corrective terms for the given vector ordinal. For the dot-product family of
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
new file mode 100644
index 000000000000..66f7ed71b06a
--- /dev/null
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.internal.vectorization;
+
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteOrder;
+import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
+import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
+import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.MemorySegmentAccessInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.VectorUtil;
+import org.apache.lucene.util.hnsw.RandomVectorScorer;
+import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
+import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizedVectorSimilarity;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
+
+class Lucene104MemorySegmentScalarQuantizedVectorScorer implements FlatVectorsScorer {
+  static final Lucene104MemorySegmentScalarQuantizedVectorScorer INSTANCE =
+      new Lucene104MemorySegmentScalarQuantizedVectorScorer();
+
+  private static final FlatVectorsScorer DELEGATE =
+      new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+
+  private static final int CORRECTIVE_TERMS_SIZE = Float.BYTES * 3 + Integer.BYTES;
+
+  private Lucene104MemorySegmentScalarQuantizedVectorScorer() {}
+
+  @Override
+  public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
+      VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
+      throws IOException {
+    if (vectorValues instanceof QuantizedByteVectorValues quantized
+        && quantized.getSlice() instanceof MemorySegmentAccessInput input) {
+      return new RandomVectorScorerSupplierImpl(similarityFunction, quantized, input);
+    }
+    return DELEGATE.getRandomVectorScorerSupplier(similarityFunction, vectorValues);
+  }
+
+  @Override
+  public RandomVectorScorer getRandomVectorScorer(
+      VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
+      throws IOException {
+    if (vectorValues instanceof QuantizedByteVectorValues quantized
+        && quantized.getSlice() instanceof MemorySegmentAccessInput input) {
+      return new RandomVectorScorerImpl(similarityFunction, quantized, input, target);
+    }
+    return DELEGATE.getRandomVectorScorer(similarityFunction, vectorValues, target);
+  }
+
+  @Override
+  public RandomVectorScorer getRandomVectorScorer(
+      VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
+      throws IOException {
+    return DELEGATE.getRandomVectorScorer(similarityFunction, vectorValues, target);
+  }
+
+  @Override
+  public String toString() {
+    return "Lucene104MemorySegmentScalarQuantizedVectorScorer()";
+  }
+
+  private abstract static class RandomVectorScorerBase
+      extends RandomVectorScorer.AbstractRandomVectorScorer {
+
+    private final QuantizedByteVectorValues values;
+    private final MemorySegmentAccessInput input;
+    private final int vectorByteSize;
+    private final int nodeSize;
+    private final OptimizedScalarQuantizedVectorSimilarity similarity;
+    private byte[] scratch = null;
+
+    RandomVectorScorerBase(
+        VectorSimilarityFunction similarityFunction,
+        QuantizedByteVectorValues values,
+        MemorySegmentAccessInput input)
+        throws IOException {
+      super(values);
+
+      this.values = values;
+      this.input = input;
+      this.vectorByteSize = values.getVectorByteLength();
+      this.nodeSize = this.vectorByteSize + CORRECTIVE_TERMS_SIZE;
+      this.similarity =
+          new OptimizedScalarQuantizedVectorSimilarity(
+              similarityFunction,
+              values.dimension(),
+              values.getCentroidDP(),
+              values.getScalarEncoding().getBits());
+      checkInvariants();
+    }
+
+    final void checkInvariants() {
+      if (input.length() < (long) nodeSize * maxOrd()) {
+        throw new IllegalArgumentException("input length is less than expected vector data");
+      }
+    }
+
+    final void checkOrdinal(int ord) {
+      if (ord < 0 || ord >= maxOrd()) {
+        throw new IllegalArgumentException("illegal ordinal: " + ord);
+      }
+    }
+
+    private static final ValueLayout.OfInt INT_UNALIGNED_LE =
+        JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
+
+    @SuppressWarnings("restricted")
+    MemorySegment getVector(int ord) throws IOException {
+      checkOrdinal(ord);
+      long byteOffset = (long) ord * nodeSize;
+      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
+      if (vector == null) {
+        if (scratch == null) {
+          scratch = new byte[nodeSize];
+        }
+        input.readBytes(byteOffset, scratch, 0, nodeSize);
+        vector = MemorySegment.ofArray(scratch).reinterpret(vectorByteSize);
+      }
+      return vector;
+    }
+
+    @SuppressWarnings("restricted")
+    OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException {
+      checkOrdinal(ord);
+      long byteOffset = (long) ord * nodeSize + vectorByteSize;
+      MemorySegment node = input.segmentSliceOrNull(byteOffset, CORRECTIVE_TERMS_SIZE);
+      if (node == null) {
+        if (scratch == null) {
+          scratch = new byte[nodeSize];
+        }
+        input.readBytes(byteOffset, scratch, 0, CORRECTIVE_TERMS_SIZE);
+        node = MemorySegment.ofArray(scratch).reinterpret(CORRECTIVE_TERMS_SIZE);
+      }
+      return new OptimizedScalarQuantizer.QuantizationResult(
+          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, 0)),
+          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES)),
+          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES * 2)),
+          node.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
+    }
+
+    OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
+      return similarity;
+    }
+
+    Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() {
+      return values.getScalarEncoding();
+    }
+  }
+
+  private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
+    private final byte[] query;
+    private final OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms;
+
+    RandomVectorScorerImpl(
+        VectorSimilarityFunction similarityFunction,
+        QuantizedByteVectorValues values,
+        MemorySegmentAccessInput input,
+        float[] target)
+        throws IOException {
+      super(similarityFunction, values, input);
+      Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding =
+          values.getScalarEncoding();
+      OptimizedScalarQuantizer quantizer = values.getQuantizer();
+      query =
+          new byte
+              [OptimizedScalarQuantizer.discretize(
+                  target.length, scalarEncoding.getDimensionsPerByte())];
+      // We make a copy as the quantization process mutates the input
+      float[] copy = ArrayUtil.copyOfSubArray(target, 0, target.length);
+      if (similarityFunction == COSINE) {
+        VectorUtil.l2normalize(copy);
+      }
+      target = copy;
+      queryCorrectiveTerms =
+          quantizer.scalarQuantize(target, query, scalarEncoding.getBits(), values.getCentroid());
+    }
+
+    @Override
+    public float score(int node) throws IOException {
+      MemorySegment doc = getVector(node);
+      float dotProduct =
+          switch (getScalarEncoding()) {
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc);
+          };
+      // Call getCorrectiveTerms() after computing dot product since corrective terms
+      // bytes appear
+      // after the vector bytes, so this sequence of calls is more cache friendly.
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
+    }
+  }
+
+  private record RandomVectorScorerSupplierImpl(
+      VectorSimilarityFunction similarityFunction,
+      QuantizedByteVectorValues values,
+      MemorySegmentAccessInput input)
+      implements RandomVectorScorerSupplier {
+
+    @Override
+    public UpdateableRandomVectorScorer scorer() throws IOException {
+      return new UpdateableRandomVectorScorerImpl(similarityFunction, values, input);
+    }
+
+    @Override
+    public RandomVectorScorerSupplier copy() {
+      return new RandomVectorScorerSupplierImpl(similarityFunction, values, input);
+    }
+  }
+
+  private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorerBase
+      implements UpdateableRandomVectorScorer {
+    private MemorySegment query;
+    private OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms;
+
+    UpdateableRandomVectorScorerImpl(
+        VectorSimilarityFunction similarityFunction,
+        QuantizedByteVectorValues values,
+        MemorySegmentAccessInput input)
+        throws IOException {
+      super(similarityFunction, values, input);
+    }
+
+    @Override
+    public void setScoringOrdinal(int ord) throws IOException {
+      checkOrdinal(ord);
+      query = getVector(ord);
+      queryCorrectiveTerms = getCorrectiveTerms(ord);
+    }
+
+    @Override
+    public float score(int node) throws IOException {
+      MemorySegment doc = getVector(node);
+      float dotProduct =
+          switch (getScalarEncoding()) {
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc);
+          };
+      // Call getCorrectiveTerms() after computing dot product since corrective terms
+      // bytes appear
+      // after the vector bytes, so this sequence of calls is more cache friendly.
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
+    }
+  }
+}
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
index 79a16dd0bc1f..cb177dd93e6a 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
@@ -32,8 +32,10 @@
 /** A vectorization provider that leverages the Panama Vector API. */
 final class PanamaVectorizationProvider extends VectorizationProvider {
 
-  // NOTE: Avoid static fields or initializers which rely on the vector API, as these initializers
-  // would get called before we have a chance to perform sanity checks around the vector API in the
+  // NOTE: Avoid static fields or initializers which rely on the vector API, as
+  // these initializers
+  // would get called before we have a chance to perform sanity checks around the
+  // vector API in the
   // constructor of this class. Put them in PanamaVectorConstants instead.
 
   private final VectorUtilSupport vectorUtilSupport;
@@ -87,8 +89,7 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
 
   @Override
   public FlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() {
-    // XXX DO NOT MERGE
-    return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+    return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
   }
 
   @Override

From 99918a0b64f70cca4178b29365b811245f08574a Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 11:42:25 -0700
Subject: [PATCH 06/21] fix tidy

---
 .../internal/vectorization/PanamaVectorizationProvider.java     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
index cb177dd93e6a..9eca005367ca 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
@@ -21,9 +21,7 @@
 import java.util.Locale;
 import java.util.logging.Logger;
 import jdk.incubator.vector.FloatVector;
-import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
-import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MemorySegmentAccessInput;
 import org.apache.lucene.util.Constants;

From 2894e4415f5976db935fe01eec753951456dc01f Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 11:48:01 -0700
Subject: [PATCH 07/21] explicitly cast long -> float

---
 .../codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
index 65d3c768386f..6ffef975c9ce 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryFlatVectorsScorer.java
@@ -168,7 +168,7 @@ public float score(int node) throws IOException {
           var indexVector = targetVectors.vectorValue(node);
           var indexCorrections = targetVectors.getCorrectiveTerms(node);
           return similarity.score(
-              VectorUtil.int4BitDotProduct(vector, indexVector),
+              (float) VectorUtil.int4BitDotProduct(vector, indexVector),
               queryCorrections,
               indexCorrections);
         }

From fd0d85e4a2c9d8cda3b353ed3b1045c8af89464d Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 12:54:31 -0700
Subject: [PATCH 08/21] fix toString tests

---
 ...ucene104HnswScalarQuantizedVectorsFormat.java | 16 +++++++++++-----
 ...estLucene104ScalarQuantizedVectorsFormat.java | 10 +++++++---
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
index 6164b0062f02..91687e200182 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
@@ -70,16 +70,20 @@ public KnnVectorsFormat knnVectorsFormat() {
         "Lucene104HnswScalarQuantizedVectorsFormat(name=Lucene104HnswScalarQuantizedVectorsFormat, maxConn=10, beamWidth=20,"
             + " flatVectorFormat=Lucene104ScalarQuantizedVectorsFormat(name=Lucene104ScalarQuantizedVectorsFormat,"
             + " encoding=UNSIGNED_BYTE,"
-            + " flatVectorScorer=Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=%s()),"
+            + " flatVectorScorer=%s,"
             + " rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s())))";
 
     var defaultScorer =
-        format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer", "DefaultFlatVectorScorer");
+        format(
+            Locale.ROOT,
+            expectedPattern,
+            "Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())",
+            "DefaultFlatVectorScorer");
     var memSegScorer =
         format(
             Locale.ROOT,
             expectedPattern,
-            "Lucene99MemorySegmentFlatVectorsScorer",
+            "Lucene104MemorySegmentScalarQuantizedVectorScorer()",
             "Lucene99MemorySegmentFlatVectorsScorer");
     assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer)));
   }
@@ -112,7 +116,8 @@ public void testSingleVectorCase() throws Exception {
                   Integer.MAX_VALUE);
           assertEquals(1, td.totalHits.value());
           assertTrue(td.scoreDocs[0].score >= 0);
-          // When it's the only vector in a segment, the score should be very close to the true
+          // When it's the only vector in a segment, the score should be very close to the
+          // true
           // score
           assertEquals(trueScore, td.scoreDocs[0].score, 0.01f);
         }
@@ -144,7 +149,8 @@ public void testLimits() {
                 ScalarEncoding.UNSIGNED_BYTE, 20, 100, 1, new SameThreadExecutorService()));
   }
 
-  // Ensures that all expected vector similarity functions are translatable in the format.
+  // Ensures that all expected vector similarity functions are translatable in the
+  // format.
   public void testVectorSimilarityFuncs() {
     // This does not necessarily have to be all similarity functions, but
     // differences should be considered carefully.
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
index 29041b5b07f0..925995e7327c 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
@@ -110,15 +110,19 @@ public KnnVectorsFormat knnVectorsFormat() {
         "Lucene104ScalarQuantizedVectorsFormat("
             + "name=Lucene104ScalarQuantizedVectorsFormat, "
             + "encoding=UNSIGNED_BYTE, "
-            + "flatVectorScorer=Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=%s()), "
+            + "flatVectorScorer=%s, "
             + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s()))";
     var defaultScorer =
-        format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer", "DefaultFlatVectorScorer");
+        format(
+            Locale.ROOT,
+            expectedPattern,
+            "Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())",
+            "DefaultFlatVectorScorer");
     var memSegScorer =
         format(
             Locale.ROOT,
             expectedPattern,
-            "Lucene99MemorySegmentFlatVectorsScorer",
+            "Lucene104MemorySegmentScalarQuantizedVectorScorer()",
             "Lucene99MemorySegmentFlatVectorsScorer");
     assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer)));
   }

From cc328d59b527e87eb4f30e8debae0f71734985d4 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 14:13:21 -0700
Subject: [PATCH 09/21] fix bug in off heap sqvv that affects invariant checks
 in memory segment codec

---
 .../codecs/lucene104/OffHeapScalarQuantizedVectorValues.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
index 8bd050827d54..71b6a2fbe8f9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/OffHeapScalarQuantizedVectorValues.java
@@ -138,7 +138,7 @@ public float[] getCentroid() {
 
   @Override
   public int getVectorByteLength() {
-    return dimension;
+    return this.encoding.getPackedLength(dimension);
   }
 
   @Override

From 9261f41790b6a29c52d9d547a7307c6ac11cc06c Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 16:03:22 -0700
Subject: [PATCH 10/21] try to create fewer memory segments

---
 ...orySegmentScalarQuantizedVectorScorer.java | 45 ++++++++++++++++---
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index 66f7ed71b06a..a29fdafe8b50 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -129,6 +129,10 @@ final void checkOrdinal(int ord) {
     private static final ValueLayout.OfInt INT_UNALIGNED_LE =
         JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
 
+    // XXX I need to return something wraps the MemorySegment and can produce the
+    // corrective terms
+    // on demand. rep is probably (MemorySegment, MemorySegment) with a slice for
+    // the corrective terms.
     @SuppressWarnings("restricted")
     MemorySegment getVector(int ord) throws IOException {
       checkOrdinal(ord);
@@ -163,6 +167,32 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I
           node.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
     }
 
+    record Node(MemorySegment vector, MemorySegment correctiveTerms) {
+      OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() {
+        return new OptimizedScalarQuantizer.QuantizationResult(
+            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, 0)),
+            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES)),
+            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 2)),
+            correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
+      }
+    }
+
+    @SuppressWarnings("restricted")
+    Node getNode(int ord) throws IOException {
+      checkOrdinal(ord);
+      long byteOffset = (long) ord * nodeSize;
+      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
+      if (vector == null) {
+        if (scratch == null) {
+          scratch = new byte[nodeSize];
+        }
+        input.readBytes(byteOffset, scratch, 0, nodeSize);
+        vector = MemorySegment.ofArray(scratch);
+      }
+      MemorySegment correctiveTerms = vector.asSlice(vectorByteSize, CORRECTIVE_TERMS_SIZE);
+      return new Node(vector.reinterpret(vectorByteSize), correctiveTerms);
+    }
+
     OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
       return similarity;
     }
@@ -202,17 +232,18 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
 
     @Override
     public float score(int node) throws IOException {
-      MemorySegment doc = getVector(node);
+      Node doc = getNode(node);
       float dotProduct =
           switch (getScalarEncoding()) {
-            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc);
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector);
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector);
+            case PACKED_NIBBLE ->
+                PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc.vector);
           };
       // Call getCorrectiveTerms() after computing dot product since corrective terms
-      // bytes appear
-      // after the vector bytes, so this sequence of calls is more cache friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
+      // bytes appear after the vector bytes, so this sequence of calls is more cache
+      // friendly.
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.getQuantizationResult());
     }
   }
 

From d012ebf6fd642b00368beee6dd9a7062f7f4817f Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Mon, 29 Sep 2025 16:05:09 -0700
Subject: [PATCH 11/21] nodeSize

---
 .../Lucene104MemorySegmentScalarQuantizedVectorScorer.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index a29fdafe8b50..da79e85bcc76 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -181,7 +181,7 @@ OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() {
     Node getNode(int ord) throws IOException {
       checkOrdinal(ord);
       long byteOffset = (long) ord * nodeSize;
-      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
+      MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize);
       if (vector == null) {
         if (scratch == null) {
           scratch = new byte[nodeSize];

From 78388d39713d2fcc4d21afedb68791773563e84d Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Tue, 30 Sep 2025 09:05:26 -0700
Subject: [PATCH 12/21] try to avoid allocating multiple memory segments

---
 ...orySegmentScalarQuantizedVectorScorer.java | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index da79e85bcc76..b429c9be3997 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -167,15 +167,8 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I
           node.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
     }
 
-    record Node(MemorySegment vector, MemorySegment correctiveTerms) {
-      OptimizedScalarQuantizer.QuantizationResult getQuantizationResult() {
-        return new OptimizedScalarQuantizer.QuantizationResult(
-            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, 0)),
-            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES)),
-            Float.intBitsToFloat(correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 2)),
-            correctiveTerms.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
-      }
-    }
+    record Node(
+        MemorySegment vector, OptimizedScalarQuantizer.QuantizationResult correctiveTerms) {}
 
     @SuppressWarnings("restricted")
     Node getNode(int ord) throws IOException {
@@ -189,7 +182,13 @@ Node getNode(int ord) throws IOException {
         input.readBytes(byteOffset, scratch, 0, nodeSize);
         vector = MemorySegment.ofArray(scratch);
       }
-      MemorySegment correctiveTerms = vector.asSlice(vectorByteSize, CORRECTIVE_TERMS_SIZE);
+      var correctiveTerms =
+          new OptimizedScalarQuantizer.QuantizationResult(
+              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
+              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
+              Float.intBitsToFloat(
+                  vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
+              vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
       return new Node(vector.reinterpret(vectorByteSize), correctiveTerms);
     }
 
@@ -243,7 +242,7 @@ public float score(int node) throws IOException {
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
       // friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.getQuantizationResult());
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.correctiveTerms);
     }
   }
 

From 5e18d3a6c758825e87d292d03c25e1e2460dab0b Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Tue, 30 Sep 2025 11:12:12 -0700
Subject: [PATCH 13/21] try flattening the corrective terms into the node

---
 ...imizedScalarQuantizedVectorSimilarity.java | 57 ++++++++++++++++++-
 ...orySegmentScalarQuantizedVectorScorer.java | 34 +++++++----
 2 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
index 8b6795bd11bf..c0e391851f14 100644
--- a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
@@ -105,7 +105,8 @@ public float score(
     float ly = (queryCorrections.upperInterval() - ay) * queryScale;
     float y1 = queryCorrections.quantizedComponentSum();
     float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
-    // For euclidean, we need to invert the score and apply the additional correction, which is
+    // For euclidean, we need to invert the score and apply the additional
+    // correction, which is
     // assumed to be the squared l2norm of the centroid centered vectors.
     if (similarityFunction == EUCLIDEAN) {
       score =
@@ -114,8 +115,10 @@ public float score(
               - 2 * score;
       return Math.max(1 / (1f + score), 0);
     } else {
-      // For cosine and max inner product, we need to apply the additional correction, which is
-      // assumed to be the non-centered dot-product between the vector and the centroid
+      // For cosine and max inner product, we need to apply the additional correction,
+      // which is
+      // assumed to be the non-centered dot-product between the vector and the
+      // centroid
       score +=
           queryCorrections.additionalCorrection()
               + indexCorrections.additionalCorrection()
@@ -126,4 +129,52 @@ public float score(
       return Math.max((1f + score) / 2f, 0);
     }
   }
+
+  // XXX DO NOT MERGE duplication with above.
+  /**
+   * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot
+   * product of the two vectors and their corrective factors.
+   *
+   * @param dotProduct - dot product of the two quantized vectors.
+   * @param queryCorrections - corrective factors for vector 'y'.
+   * @param indexLowerInterval - corrective factors for vector 'x'.
+   * @param indexUpperInterval - corrective factors for vector 'x'.
+   * @param indexAdditionalCorrection - corrective factors for vector 'x'.
+   * @param indexQuantizedComponentSum - corrective factors for vector 'x'.
+   * @return - a similarity score value between 0 and 1; higher values are better.
+   */
+  public float score(
+      float dotProduct,
+      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
+      float indexLowerInterval,
+      float indexUpperInterval,
+      float indexAdditionalCorrection,
+      int indexQuantizedComponentSum) {
+    float x1 = indexQuantizedComponentSum;
+    float ax = indexLowerInterval;
+    // Here we must scale according to the bits
+    float lx = (indexUpperInterval - ax) * indexScale;
+    float ay = queryCorrections.lowerInterval();
+    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
+    float y1 = queryCorrections.quantizedComponentSum();
+    float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
+    // For euclidean, we need to invert the score and apply the additional
+    // correction, which is
+    // assumed to be the squared l2norm of the centroid centered vectors.
+    if (similarityFunction == EUCLIDEAN) {
+      score = queryCorrections.additionalCorrection() + indexAdditionalCorrection - 2 * score;
+      return Math.max(1 / (1f + score), 0);
+    } else {
+      // For cosine and max inner product, we need to apply the additional correction,
+      // which is
+      // assumed to be the non-centered dot-product between the vector and the
+      // centroid
+      score +=
+          queryCorrections.additionalCorrection() + indexAdditionalCorrection - centroidDotProduct;
+      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
+        return VectorUtil.scaleMaxInnerProductScore(score);
+      }
+      return Math.max((1f + score) / 2f, 0);
+    }
+  }
 }
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index b429c9be3997..70015719518e 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -168,7 +168,11 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I
     }
 
     record Node(
-        MemorySegment vector, OptimizedScalarQuantizer.QuantizationResult correctiveTerms) {}
+        MemorySegment vector,
+        float lowerInterval,
+        float upperInterval,
+        float additionalCorrection,
+        int componentSum) {}
 
     @SuppressWarnings("restricted")
     Node getNode(int ord) throws IOException {
@@ -182,14 +186,17 @@ Node getNode(int ord) throws IOException {
         input.readBytes(byteOffset, scratch, 0, nodeSize);
         vector = MemorySegment.ofArray(scratch);
       }
-      var correctiveTerms =
-          new OptimizedScalarQuantizer.QuantizationResult(
-              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
-              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
-              Float.intBitsToFloat(
-                  vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
-              vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
-      return new Node(vector.reinterpret(vectorByteSize), correctiveTerms);
+      // XXX investigate reordering the vector so that corrective terms appear first.
+      // we're forced to read them immediately to avoid creating a second memory
+      // segment which is
+      // not cheap, so they might as well be read first to avoid additional memory
+      // latency.
+      return new Node(
+          vector.reinterpret(vectorByteSize),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
+          vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
     }
 
     OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
@@ -242,7 +249,14 @@ public float score(int node) throws IOException {
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
       // friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.correctiveTerms);
+      return getSimilarity()
+          .score(
+              dotProduct,
+              queryCorrectiveTerms,
+              doc.lowerInterval,
+              doc.upperInterval,
+              doc.additionalCorrection,
+              doc.componentSum);
     }
   }
 

From 955d083a724420b17c0167bd8d65384337510b5a Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Tue, 30 Sep 2025 11:32:59 -0700
Subject: [PATCH 14/21] cleanup

---
 ...imizedScalarQuantizedVectorSimilarity.java | 39 ++--------
 ...orySegmentScalarQuantizedVectorScorer.java | 77 +++++++------------
 2 files changed, 33 insertions(+), 83 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
index c0e391851f14..0d9ff2d5432c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
@@ -97,40 +97,15 @@ public float score(
       float dotProduct,
       OptimizedScalarQuantizer.QuantizationResult queryCorrections,
       OptimizedScalarQuantizer.QuantizationResult indexCorrections) {
-    float x1 = indexCorrections.quantizedComponentSum();
-    float ax = indexCorrections.lowerInterval();
-    // Here we must scale according to the bits
-    float lx = (indexCorrections.upperInterval() - ax) * indexScale;
-    float ay = queryCorrections.lowerInterval();
-    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
-    float y1 = queryCorrections.quantizedComponentSum();
-    float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
-    // For euclidean, we need to invert the score and apply the additional
-    // correction, which is
-    // assumed to be the squared l2norm of the centroid centered vectors.
-    if (similarityFunction == EUCLIDEAN) {
-      score =
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - 2 * score;
-      return Math.max(1 / (1f + score), 0);
-    } else {
-      // For cosine and max inner product, we need to apply the additional correction,
-      // which is
-      // assumed to be the non-centered dot-product between the vector and the
-      // centroid
-      score +=
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - centroidDotProduct;
-      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
-        return VectorUtil.scaleMaxInnerProductScore(score);
-      }
-      return Math.max((1f + score) / 2f, 0);
-    }
+    return score(
+        dotProduct,
+        queryCorrections,
+        indexCorrections.lowerInterval(),
+        indexCorrections.upperInterval(),
+        indexCorrections.additionalCorrection(),
+        indexCorrections.quantizedComponentSum());
   }
 
-  // XXX DO NOT MERGE duplication with above.
   /**
    * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot
    * product of the two vectors and their corrective factors.
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index 70015719518e..9715313bb738 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -129,44 +129,6 @@ final void checkOrdinal(int ord) {
     private static final ValueLayout.OfInt INT_UNALIGNED_LE =
         JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
 
-    // XXX I need to return something wraps the MemorySegment and can produce the
-    // corrective terms
-    // on demand. rep is probably (MemorySegment, MemorySegment) with a slice for
-    // the corrective terms.
-    @SuppressWarnings("restricted")
-    MemorySegment getVector(int ord) throws IOException {
-      checkOrdinal(ord);
-      long byteOffset = (long) ord * nodeSize;
-      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
-      if (vector == null) {
-        if (scratch == null) {
-          scratch = new byte[nodeSize];
-        }
-        input.readBytes(byteOffset, scratch, 0, nodeSize);
-        vector = MemorySegment.ofArray(scratch).reinterpret(vectorByteSize);
-      }
-      return vector;
-    }
-
-    @SuppressWarnings("restricted")
-    OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException {
-      checkOrdinal(ord);
-      long byteOffset = (long) ord * nodeSize + vectorByteSize;
-      MemorySegment node = input.segmentSliceOrNull(byteOffset, CORRECTIVE_TERMS_SIZE);
-      if (node == null) {
-        if (scratch == null) {
-          scratch = new byte[nodeSize];
-        }
-        input.readBytes(byteOffset, scratch, 0, CORRECTIVE_TERMS_SIZE);
-        node = MemorySegment.ofArray(scratch).reinterpret(CORRECTIVE_TERMS_SIZE);
-      }
-      return new OptimizedScalarQuantizer.QuantizationResult(
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, 0)),
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES)),
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES * 2)),
-          node.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
-    }
-
     record Node(
         MemorySegment vector,
         float lowerInterval,
@@ -188,9 +150,8 @@ Node getNode(int ord) throws IOException {
       }
       // XXX investigate reordering the vector so that corrective terms appear first.
       // we're forced to read them immediately to avoid creating a second memory
-      // segment which is
-      // not cheap, so they might as well be read first to avoid additional memory
-      // latency.
+      // segment which is not cheap, so they might as well be read first to avoid
+      // additional memory latency.
       return new Node(
           vector.reinterpret(vectorByteSize),
           Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
@@ -260,7 +221,7 @@ public float score(int node) throws IOException {
     }
   }
 
-  private record RandomVectorScorerSupplierImpl(
+  record RandomVectorScorerSupplierImpl(
       VectorSimilarityFunction similarityFunction,
       QuantizedByteVectorValues values,
       MemorySegmentAccessInput input)
@@ -293,23 +254,37 @@ private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorer
     @Override
     public void setScoringOrdinal(int ord) throws IOException {
       checkOrdinal(ord);
-      query = getVector(ord);
-      queryCorrectiveTerms = getCorrectiveTerms(ord);
+      Node node = getNode(ord);
+      query = node.vector();
+      queryCorrectiveTerms =
+          new OptimizedScalarQuantizer.QuantizationResult(
+              node.lowerInterval(),
+              node.upperInterval(),
+              node.additionalCorrection(),
+              node.componentSum());
     }
 
     @Override
     public float score(int node) throws IOException {
-      MemorySegment doc = getVector(node);
+      Node doc = getNode(node);
       float dotProduct =
           switch (getScalarEncoding()) {
-            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc);
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
+            case PACKED_NIBBLE ->
+                PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc.vector());
           };
       // Call getCorrectiveTerms() after computing dot product since corrective terms
-      // bytes appear
-      // after the vector bytes, so this sequence of calls is more cache friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
+      // bytes appear after the vector bytes, so this sequence of calls is more cache
+      // friendly.
+      return getSimilarity()
+          .score(
+              dotProduct,
+              queryCorrectiveTerms,
+              doc.lowerInterval(),
+              doc.upperInterval(),
+              doc.additionalCorrection(),
+              doc.componentSum());
     }
   }
 }

From e7178bc28598f09944a2a4d162b16daa90cb239a Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Tue, 30 Sep 2025 12:39:54 -0700
Subject: [PATCH 15/21] try another formulation of vector handling

---
 ...orySegmentScalarQuantizedVectorScorer.java | 54 ++++++++++++++++---
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index 9715313bb738..3604eec6e5d3 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -160,6 +160,43 @@ Node getNode(int ord) throws IOException {
           vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
     }
 
+    MemorySegment getRawVector(int ord) throws IOException {
+      checkOrdinal(ord);
+      long byteOffset = (long) ord * nodeSize;
+      MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize);
+      if (vector != null) {
+        return vector;
+      }
+
+      if (scratch == null) {
+        scratch = new byte[nodeSize];
+      }
+      input.readBytes(byteOffset, scratch, 0, nodeSize);
+      return MemorySegment.ofArray(scratch);
+    }
+
+    @SuppressWarnings("restricted")
+    MemorySegment getVector(MemorySegment rawVector) {
+      return rawVector.reinterpret(vectorByteSize);
+    }
+
+    float getLowerInterval(MemorySegment rawVector) {
+      return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize));
+    }
+
+    float getUpperInterval(MemorySegment rawVector) {
+      return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES));
+    }
+
+    float getAdditionalCorrection(MemorySegment rawVector) {
+      return Float.intBitsToFloat(
+          rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2));
+    }
+
+    int getComponentSum(MemorySegment rawVector) {
+      return rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3);
+    }
+
     OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
       return similarity;
     }
@@ -199,13 +236,14 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
 
     @Override
     public float score(int node) throws IOException {
-      Node doc = getNode(node);
+      MemorySegment rawDoc = getRawVector(node);
+      MemorySegment docVector = getVector(rawDoc);
       float dotProduct =
           switch (getScalarEncoding()) {
-            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector);
-            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector);
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector);
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector);
             case PACKED_NIBBLE ->
-                PanamaVectorUtilSupport.int4DotProductSinglePacked(query, doc.vector);
+                PanamaVectorUtilSupport.int4DotProductSinglePacked(query, docVector);
           };
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
@@ -214,10 +252,10 @@ public float score(int node) throws IOException {
           .score(
               dotProduct,
               queryCorrectiveTerms,
-              doc.lowerInterval,
-              doc.upperInterval,
-              doc.additionalCorrection,
-              doc.componentSum);
+              getLowerInterval(rawDoc),
+              getUpperInterval(rawDoc),
+              getAdditionalCorrection(rawDoc),
+              getComponentSum(rawDoc));
     }
   }
 

From 151d2395e2c262a8e5a09743792fe57b5113ed05 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Thu, 2 Oct 2025 15:24:33 -0700
Subject: [PATCH 16/21] settle on a single path

---
 ...orySegmentScalarQuantizedVectorScorer.java | 114 ++++--------------
 .../PanamaVectorUtilSupport.java              |  58 ++++++---
 2 files changed, 61 insertions(+), 111 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index 3604eec6e5d3..0c7735819c8c 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -16,13 +16,10 @@
  */
 package org.apache.lucene.internal.vectorization;
 
-import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
 import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
 
 import java.io.IOException;
 import java.lang.foreign.MemorySegment;
-import java.lang.foreign.ValueLayout;
-import java.nio.ByteOrder;
 import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
@@ -126,75 +123,30 @@ final void checkOrdinal(int ord) {
       }
     }
 
-    private static final ValueLayout.OfInt INT_UNALIGNED_LE =
-        JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
-
-    record Node(
-        MemorySegment vector,
-        float lowerInterval,
-        float upperInterval,
-        float additionalCorrection,
-        int componentSum) {}
-
     @SuppressWarnings("restricted")
-    Node getNode(int ord) throws IOException {
-      checkOrdinal(ord);
-      long byteOffset = (long) ord * nodeSize;
-      MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize);
-      if (vector == null) {
-        if (scratch == null) {
-          scratch = new byte[nodeSize];
-        }
-        input.readBytes(byteOffset, scratch, 0, nodeSize);
-        vector = MemorySegment.ofArray(scratch);
-      }
-      // XXX investigate reordering the vector so that corrective terms appear first.
-      // we're forced to read them immediately to avoid creating a second memory
-      // segment which is not cheap, so they might as well be read first to avoid
-      // additional memory latency.
-      return new Node(
-          vector.reinterpret(vectorByteSize),
-          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
-          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
-          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
-          vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
-    }
-
-    MemorySegment getRawVector(int ord) throws IOException {
+    MemorySegment getVector(int ord) throws IOException {
       checkOrdinal(ord);
       long byteOffset = (long) ord * nodeSize;
-      MemorySegment vector = input.segmentSliceOrNull(byteOffset, nodeSize);
+      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
       if (vector != null) {
         return vector;
       }
 
       if (scratch == null) {
-        scratch = new byte[nodeSize];
+        scratch = new byte[vectorByteSize];
       }
-      input.readBytes(byteOffset, scratch, 0, nodeSize);
+      input.readBytes(byteOffset, scratch, 0, vectorByteSize);
       return MemorySegment.ofArray(scratch);
     }
 
-    @SuppressWarnings("restricted")
-    MemorySegment getVector(MemorySegment rawVector) {
-      return rawVector.reinterpret(vectorByteSize);
-    }
-
-    float getLowerInterval(MemorySegment rawVector) {
-      return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize));
-    }
-
-    float getUpperInterval(MemorySegment rawVector) {
-      return Float.intBitsToFloat(rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES));
-    }
-
-    float getAdditionalCorrection(MemorySegment rawVector) {
-      return Float.intBitsToFloat(
-          rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2));
-    }
-
-    int getComponentSum(MemorySegment rawVector) {
-      return rawVector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3);
+    OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException {
+      checkOrdinal(ord);
+      long byteOffset = (long) ord * nodeSize + vectorByteSize;
+      return new OptimizedScalarQuantizer.QuantizationResult(
+          Float.intBitsToFloat(input.readInt(byteOffset)),
+          Float.intBitsToFloat(input.readInt(byteOffset + Integer.BYTES)),
+          Float.intBitsToFloat(input.readInt(byteOffset + Integer.BYTES * 2)),
+          input.readInt(byteOffset + Integer.BYTES * 3));
     }
 
     OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
@@ -209,6 +161,7 @@ Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() {
   private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
     private final byte[] query;
     private final OptimizedScalarQuantizer.QuantizationResult queryCorrectiveTerms;
+    private final byte[] scratch;
 
     RandomVectorScorerImpl(
         VectorSimilarityFunction similarityFunction,
@@ -220,6 +173,7 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
       Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding =
           values.getScalarEncoding();
       OptimizedScalarQuantizer quantizer = values.getQuantizer();
+      scratch = new byte[values.getVectorByteLength()];
       query =
           new byte
               [OptimizedScalarQuantizer.discretize(
@@ -236,8 +190,7 @@ private static class RandomVectorScorerImpl extends RandomVectorScorerBase {
 
     @Override
     public float score(int node) throws IOException {
-      MemorySegment rawDoc = getRawVector(node);
-      MemorySegment docVector = getVector(rawDoc);
+      MemorySegment docVector = getVector(node);
       float dotProduct =
           switch (getScalarEncoding()) {
             case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, docVector);
@@ -248,14 +201,7 @@ public float score(int node) throws IOException {
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
       // friendly.
-      return getSimilarity()
-          .score(
-              dotProduct,
-              queryCorrectiveTerms,
-              getLowerInterval(rawDoc),
-              getUpperInterval(rawDoc),
-              getAdditionalCorrection(rawDoc),
-              getComponentSum(rawDoc));
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
     }
   }
 
@@ -292,37 +238,23 @@ private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorer
     @Override
     public void setScoringOrdinal(int ord) throws IOException {
       checkOrdinal(ord);
-      Node node = getNode(ord);
-      query = node.vector();
-      queryCorrectiveTerms =
-          new OptimizedScalarQuantizer.QuantizationResult(
-              node.lowerInterval(),
-              node.upperInterval(),
-              node.additionalCorrection(),
-              node.componentSum());
+      query = getVector(ord);
+      queryCorrectiveTerms = getCorrectiveTerms(ord);
     }
 
     @Override
     public float score(int node) throws IOException {
-      Node doc = getNode(node);
+      MemorySegment doc = getVector(node);
       float dotProduct =
           switch (getScalarEncoding()) {
-            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
-            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
-            case PACKED_NIBBLE ->
-                PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc.vector());
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
+            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc);
           };
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
       // friendly.
-      return getSimilarity()
-          .score(
-              dotProduct,
-              queryCorrectiveTerms,
-              doc.lowerInterval(),
-              doc.upperInterval(),
-              doc.additionalCorrection(),
-              doc.componentSum());
+      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
     }
   }
 }
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
index d2e104f92f70..cd870c93813a 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
@@ -60,7 +60,8 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
   private static final VectorSpecies<Float> FLOAT_SPECIES;
   private static final VectorSpecies<Double> DOUBLE_SPECIES =
       PanamaVectorConstants.PREFERRED_DOUBLE_SPECIES;
-  // This create a vector species which we make sure have exact half bits of DOUBLE_SPECIES
+  // This create a vector species which we make sure have exact half bits of
+  // DOUBLE_SPECIES
   private static final VectorSpecies<Integer> INT_FOR_DOUBLE_SPECIES =
       VectorSpecies.of(int.class, VectorShape.forBitSize(DOUBLE_SPECIES.vectorBitSize() / 2));
   private static final VectorSpecies<Integer> INT_SPECIES =
@@ -110,7 +111,8 @@ public float dotProduct(float[] a, float[] b) {
     int i = 0;
     float res = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the
+    // overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       res += dotProductBody(a, b, i);
@@ -154,7 +156,8 @@ private float dotProductBody(float[] a, float[] b, int limit) {
       FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length());
       acc4 = fma(vg, vh, acc4);
     }
-    // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
+    // vector tail: less scalar computations for unaligned sizes, esp with big
+    // vector sizes
     for (; i < limit; i += FLOAT_SPECIES.length()) {
       FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
       FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
@@ -173,7 +176,8 @@ public float cosine(float[] a, float[] b) {
     float norm1 = 0;
     float norm2 = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the
+    // overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       float[] ret = cosineBody(a, b, i);
@@ -218,7 +222,8 @@ private float[] cosineBody(float[] a, float[] b, int limit) {
       norm1_2 = fma(vc, vc, norm1_2);
       norm2_2 = fma(vd, vd, norm2_2);
     }
-    // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
+    // vector tail: less scalar computations for unaligned sizes, esp with big
+    // vector sizes
     for (; i < limit; i += FLOAT_SPECIES.length()) {
       FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
       FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
@@ -238,7 +243,8 @@ public float squareDistance(float[] a, float[] b) {
     int i = 0;
     float res = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the
+    // overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       res += squareDistanceBody(a, b, i);
@@ -287,7 +293,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) {
       FloatVector diff4 = vg.sub(vh);
       acc4 = fma(diff4, diff4, acc4);
     }
-    // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
+    // vector tail: less scalar computations for unaligned sizes, esp with big
+    // vector sizes
     for (; i < limit; i += FLOAT_SPECIES.length()) {
       FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
       FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
@@ -302,8 +309,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) {
 
   // Binary functions, these all follow a general pattern like this:
   //
-  //   short intermediate = a * b;
-  //   int accumulator = (int)accumulator + (int)intermediate;
+  // short intermediate = a * b;
+  // int accumulator = (int)accumulator + (int)intermediate;
   //
   // 256 or 512 bit vectors can process 64 or 128 bits at a time, respectively
   // intermediate results use 128 or 256 bit vectors, respectively
@@ -877,7 +884,8 @@ private static int squareDistanceBody256(
       ByteVector vb8 = b.load(BYTE_SPECIES, i);
 
       // 32-bit sub, multiply, and add into accumulators
-      // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors on AVX-512?
+      // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors
+      // on AVX-512?
       Vector<Integer> va32 = va8.convertShape(conversion, INT_SPECIES, 0);
       Vector<Integer> vb32 = vb8.convertShape(conversion, INT_SPECIES, 0);
       Vector<Integer> diff32 = va32.sub(vb32);
@@ -890,8 +898,10 @@ private static int squareDistanceBody256(
   /** vectorized square distance body (128 bit vectors) */
   private static int squareDistanceBody128(
       ByteVectorLoader a, ByteVectorLoader b, int limit, boolean signed) {
-    // 128-bit implementation, which must "split up" vectors due to widening conversions
-    // it doesn't help to do the overlapping read trick, due to 32-bit multiply in the formula
+    // 128-bit implementation, which must "split up" vectors due to widening
+    // conversions
+    // it doesn't help to do the overlapping read trick, due to 32-bit multiply in
+    // the formula
     IntVector acc1 = IntVector.zero(IntVector.SPECIES_128);
     IntVector acc2 = IntVector.zero(IntVector.SPECIES_128);
     var conversion_short = signed ? B2S : ZERO_EXTEND_B2S;
@@ -1091,7 +1101,8 @@ private static int int4SquareDistanceBothPackedBody(
     return sum;
   }
 
-  // Experiments suggest that we need at least 8 lanes so that the overhead of going with the vector
+  // Experiments suggest that we need at least 8 lanes so that the overhead of
+  // going with the vector
   // approach and counting trues on vector masks pays off.
   private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8;
 
@@ -1099,7 +1110,8 @@ private static int int4SquareDistanceBothPackedBody(
   public int findNextGEQ(int[] buffer, int target, int from, int to) {
     if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
       // This effectively implements the V1 intersection algorithm from
-      // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers
+      // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of
+      // Sorted Integers
       // with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512
       // https://arxiv.org/pdf/1401.6399
       for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
@@ -1246,24 +1258,29 @@ public float minMaxScalarQuantize(
         FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i);
 
         // Make sure the value is within the quantile range, cutting off the tails
-        // see first parenthesis in equation: byte = (float - minQuantile) * 127/(maxQuantile -
+        // see first parenthesis in equation: byte = (float - minQuantile) *
+        // 127/(maxQuantile -
         // minQuantile)
         FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile);
         // Scale the value to the range [0, 127], this is our quantized value
         // scale = 127/(maxQuantile - minQuantile)
-        // Math.round rounds to positive infinity, so do the same by +0.5 then truncating to int
+        // Math.round rounds to positive infinity, so do the same by +0.5 then
+        // truncating to int
         Vector<Integer> roundedDxs =
             fma(dxc, dxc.broadcast(scale), dxc.broadcast(0.5f)).convert(VectorOperators.F2I, 0);
         // output this to the array
         ((ByteVector) roundedDxs.castShape(BYTE_SPECIES, 0)).intoArray(dest, i);
-        // We multiply by `alpha` here to get the quantized value back into the original range
+        // We multiply by `alpha` here to get the quantized value back into the original
+        // range
         // to aid in calculating the corrective offset
         FloatVector dxq = ((FloatVector) roundedDxs.castShape(FLOAT_SPECIES, 0)).mul(alpha);
         // Calculate the corrective offset that needs to be applied to the score
         // in addition to the `byte * minQuantile * alpha` term in the equation
-        // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized value
+        // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized
+        // value
         // will be rounded to the nearest whole number and lose some accuracy
-        // Additionally, we account for the global correction of `minQuantile^2` in the equation
+        // Additionally, we account for the global correction of `minQuantile^2` in the
+        // equation
         sum =
             fma(
                 v.sub(minQuantile / 2f),
@@ -1375,7 +1392,8 @@ public float[] l2normalize(float[] v, boolean throwOnZero) {
     float invNorm = 1.0f / (float) Math.sqrt(l1norm);
     int i = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the
+    // overhead to vectorize
     if (v.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(v.length);
       l2normalizeBody(v, invNorm, i);

From 4d262f1d846cd46f4af4b371ba9d1703f7e81d39 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Fri, 3 Oct 2025 10:55:37 -0700
Subject: [PATCH 17/21] fix license

---
 ...ymmetricScalarQuantizeFlatVectorsScorer.java | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
index c77b7b4bbe82..a5af7ad211b3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.codecs.lucene104;
 
 import java.io.IOException;

From 2c98eadd5d045870442434293466cb46429da2c5 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Fri, 3 Oct 2025 11:00:38 -0700
Subject: [PATCH 18/21] fix handling of asymmetric in memory seg impl

---
 .../Lucene104MemorySegmentScalarQuantizedVectorScorer.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
index 1f9d3de2d707..296259210903 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -21,7 +21,6 @@
 import java.io.IOException;
 import java.lang.foreign.MemorySegment;
 import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
-import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
@@ -42,7 +41,7 @@ class Lucene104MemorySegmentScalarQuantizedVectorScorer
   static final Lucene104MemorySegmentScalarQuantizedVectorScorer INSTANCE =
       new Lucene104MemorySegmentScalarQuantizedVectorScorer();
 
-  private static final FlatVectorsScorer DELEGATE =
+  private static final Lucene104ScalarQuantizedVectorScorer DELEGATE =
       new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
 
   private static final int CORRECTIVE_TERMS_SIZE = Float.BYTES * 3 + Integer.BYTES;
@@ -67,7 +66,9 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
       QuantizedByteVectorValues scoringVectors,
       QuantizedByteVectorValues targetVectors)
       throws IOException {
-    throw new UnsupportedOperationException("no asymmetric encodings are supported yet");
+    // We do not yet support acceleration for any asymmetric formats.
+    return DELEGATE.getRandomVectorScorerSupplier(
+        similarityFunction, scoringVectors, targetVectors);
   }
 
   @Override

From 16525957d37ae3229b3f75a9247615a552963955 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Fri, 3 Oct 2025 11:30:51 -0700
Subject: [PATCH 19/21] javadoc

---
 .../lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
index a5af7ad211b3..b84726052957 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
@@ -22,6 +22,10 @@
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
 
+/**
+ * Extension of {@link FlatVectorsScorer} that allows using two different vector codings for the
+ * "scoring" or "query" vectors and the "target" or "doc" vectors.
+ */
 public interface AsymmetricScalarQuantizeFlatVectorsScorer extends FlatVectorsScorer {
   /**
    * Returns a {@link RandomVectorScorerSupplier} that can be used to score asymmetric vector

From 8fca8a33bb8e15a341fc8e9f2cb9af7d6e5adfbb Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Fri, 3 Oct 2025 15:05:23 -0700
Subject: [PATCH 20/21] fix a bug in random vector scoring

---
 ...metricScalarQuantizeFlatVectorsScorer.java |  2 +
 .../Lucene104ScalarQuantizedVectorScorer.java | 15 ++---
 ...Lucene104ScalarQuantizedVectorsWriter.java |  3 +-
 .../PanamaVectorUtilSupport.java              | 55 +++++++------------
 .../PanamaVectorizationProvider.java          | 11 ++--
 ...ne104HnswScalarQuantizedVectorsFormat.java |  7 +--
 6 files changed, 36 insertions(+), 57 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
index b84726052957..a18f8e411f26 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/AsymmetricScalarQuantizeFlatVectorsScorer.java
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+// XXX DO NOT MERGE FIX NAME
+
 package org.apache.lucene.codecs.lucene104;
 
 import java.io.IOException;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
index a5f37896eb82..1e1baa0593d6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java
@@ -84,8 +84,7 @@ public RandomVectorScorer getRandomVectorScorer(
       var targetCorrectiveTerms =
           quantizer.scalarQuantize(
               target, scratch, scalarEncoding.getQueryBits(), qv.getCentroid());
-      // for single bit query nibble, we need to transpose the nibbles for fast
-      // scoring comparisons
+      // for single bit query nibble, we need to transpose the nibbles for fast scoring comparisons
       if (scalarEncoding
           == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT_QUERY_NIBBLE) {
         OptimizedScalarQuantizer.transposeHalfByte(scratch, targetQuantized);
@@ -194,8 +193,7 @@ public UpdateableRandomVectorScorer scorer() throws IOException {
 
         @Override
         public float score(int node) throws IOException {
-          return quantizedScore(
-              targetVector, targetCorrectiveTerms, targetValues, node, similarity);
+          return quantizedScore(targetVector, targetCorrectiveTerms, values, node, similarity);
         }
 
         @Override
@@ -291,8 +289,7 @@ public static float quantizedScore(
     float y1 = queryCorrections.quantizedComponentSum();
     float score =
         ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
-    // For euclidean, we need to invert the score and apply the additional
-    // correction, which is
+    // For euclidean, we need to invert the score and apply the additional correction, which is
     // assumed to be the squared l2norm of the centroid centered vectors.
     if (similarityFunction == EUCLIDEAN) {
       score =
@@ -301,10 +298,8 @@ public static float quantizedScore(
               - 2 * score;
       return Math.max(1 / (1f + score), 0);
     } else {
-      // For cosine and max inner product, we need to apply the additional correction,
-      // which is
-      // assumed to be the non-centered dot-product between the vector and the
-      // centroid
+      // For cosine and max inner product, we need to apply the additional correction, which is
+      // assumed to be the non-centered dot-product between the vector and the centroid
       score +=
           queryCorrections.additionalCorrection()
               + indexCorrections.additionalCorrection()
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
index 9e62969060dd..10a28a7f0022 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsWriter.java
@@ -138,8 +138,7 @@ public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOExceptio
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     rawVectorDelegate.flush(maxDoc, sortMap);
     for (FieldWriter field : fields) {
-      // after raw vectors are written, normalize vectors for clustering and
-      // quantization
+      // after raw vectors are written, normalize vectors for clustering and quantization
       if (VectorSimilarityFunction.COSINE == field.fieldInfo.getVectorSimilarityFunction()) {
         field.normalizeVectors();
       }
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
index cd870c93813a..40e79f30cdb8 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
@@ -60,8 +60,7 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
   private static final VectorSpecies<Float> FLOAT_SPECIES;
   private static final VectorSpecies<Double> DOUBLE_SPECIES =
       PanamaVectorConstants.PREFERRED_DOUBLE_SPECIES;
-  // This create a vector species which we make sure have exact half bits of
-  // DOUBLE_SPECIES
+  // This create a vector species which we make sure have exact half bits of DOUBLE_SPECIES
   private static final VectorSpecies<Integer> INT_FOR_DOUBLE_SPECIES =
       VectorSpecies.of(int.class, VectorShape.forBitSize(DOUBLE_SPECIES.vectorBitSize() / 2));
   private static final VectorSpecies<Integer> INT_SPECIES =
@@ -111,8 +110,7 @@ public float dotProduct(float[] a, float[] b) {
     int i = 0;
     float res = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the
-    // overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       res += dotProductBody(a, b, i);
@@ -176,8 +174,7 @@ public float cosine(float[] a, float[] b) {
     float norm1 = 0;
     float norm2 = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the
-    // overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       float[] ret = cosineBody(a, b, i);
@@ -222,8 +219,7 @@ private float[] cosineBody(float[] a, float[] b, int limit) {
       norm1_2 = fma(vc, vc, norm1_2);
       norm2_2 = fma(vd, vd, norm2_2);
     }
-    // vector tail: less scalar computations for unaligned sizes, esp with big
-    // vector sizes
+    // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
     for (; i < limit; i += FLOAT_SPECIES.length()) {
       FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
       FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
@@ -243,8 +239,7 @@ public float squareDistance(float[] a, float[] b) {
     int i = 0;
     float res = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the
-    // overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
     if (a.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(a.length);
       res += squareDistanceBody(a, b, i);
@@ -293,8 +288,7 @@ private float squareDistanceBody(float[] a, float[] b, int limit) {
       FloatVector diff4 = vg.sub(vh);
       acc4 = fma(diff4, diff4, acc4);
     }
-    // vector tail: less scalar computations for unaligned sizes, esp with big
-    // vector sizes
+    // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
     for (; i < limit; i += FLOAT_SPECIES.length()) {
       FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
       FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
@@ -309,8 +303,8 @@ private float squareDistanceBody(float[] a, float[] b, int limit) {
 
   // Binary functions, these all follow a general pattern like this:
   //
-  // short intermediate = a * b;
-  // int accumulator = (int)accumulator + (int)intermediate;
+  //   short intermediate = a * b;
+  //   int accumulator = (int)accumulator + (int)intermediate;
   //
   // 256 or 512 bit vectors can process 64 or 128 bits at a time, respectively
   // intermediate results use 128 or 256 bit vectors, respectively
@@ -884,8 +878,7 @@ private static int squareDistanceBody256(
       ByteVector vb8 = b.load(BYTE_SPECIES, i);
 
       // 32-bit sub, multiply, and add into accumulators
-      // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors
-      // on AVX-512?
+      // TODO: uses AVX-512 heavy multiply on zmm, should we just use 256-bit vectors on AVX-512?
       Vector<Integer> va32 = va8.convertShape(conversion, INT_SPECIES, 0);
       Vector<Integer> vb32 = vb8.convertShape(conversion, INT_SPECIES, 0);
       Vector<Integer> diff32 = va32.sub(vb32);
@@ -898,10 +891,8 @@ private static int squareDistanceBody256(
   /** vectorized square distance body (128 bit vectors) */
   private static int squareDistanceBody128(
       ByteVectorLoader a, ByteVectorLoader b, int limit, boolean signed) {
-    // 128-bit implementation, which must "split up" vectors due to widening
-    // conversions
-    // it doesn't help to do the overlapping read trick, due to 32-bit multiply in
-    // the formula
+    // 128-bit implementation, which must "split up" vectors due to widening conversions
+    // it doesn't help to do the overlapping read trick, due to 32-bit multiply in the formula
     IntVector acc1 = IntVector.zero(IntVector.SPECIES_128);
     IntVector acc2 = IntVector.zero(IntVector.SPECIES_128);
     var conversion_short = signed ? B2S : ZERO_EXTEND_B2S;
@@ -1101,8 +1092,7 @@ private static int int4SquareDistanceBothPackedBody(
     return sum;
   }
 
-  // Experiments suggest that we need at least 8 lanes so that the overhead of
-  // going with the vector
+  // Experiments suggest that we need at least 8 lanes so that the overhead of going with the vector
   // approach and counting trues on vector masks pays off.
   private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8;
 
@@ -1110,8 +1100,7 @@ private static int int4SquareDistanceBothPackedBody(
   public int findNextGEQ(int[] buffer, int target, int from, int to) {
     if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
       // This effectively implements the V1 intersection algorithm from
-      // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of
-      // Sorted Integers
+      // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers
       // with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512
       // https://arxiv.org/pdf/1401.6399
       for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
@@ -1259,28 +1248,23 @@ public float minMaxScalarQuantize(
 
         // Make sure the value is within the quantile range, cutting off the tails
         // see first parenthesis in equation: byte = (float - minQuantile) *
-        // 127/(maxQuantile -
-        // minQuantile)
+        // 127/(maxQuantile - minQuantile)
         FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile);
         // Scale the value to the range [0, 127], this is our quantized value
         // scale = 127/(maxQuantile - minQuantile)
-        // Math.round rounds to positive infinity, so do the same by +0.5 then
-        // truncating to int
+        // Math.round rounds to positive infinity, so do the same by +0.5 then truncating to int
         Vector<Integer> roundedDxs =
             fma(dxc, dxc.broadcast(scale), dxc.broadcast(0.5f)).convert(VectorOperators.F2I, 0);
         // output this to the array
         ((ByteVector) roundedDxs.castShape(BYTE_SPECIES, 0)).intoArray(dest, i);
-        // We multiply by `alpha` here to get the quantized value back into the original
-        // range
+        // We multiply by `alpha` here to get the quantized value back into the original range
         // to aid in calculating the corrective offset
         FloatVector dxq = ((FloatVector) roundedDxs.castShape(FLOAT_SPECIES, 0)).mul(alpha);
         // Calculate the corrective offset that needs to be applied to the score
         // in addition to the `byte * minQuantile * alpha` term in the equation
-        // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized
-        // value
+        // we add the `(dx - dxq) * dxq` term to account for the fact that the quantized value
         // will be rounded to the nearest whole number and lose some accuracy
-        // Additionally, we account for the global correction of `minQuantile^2` in the
-        // equation
+        // Additionally, we account for the global correction of `minQuantile^2` in the equation
         sum =
             fma(
                 v.sub(minQuantile / 2f),
@@ -1392,8 +1376,7 @@ public float[] l2normalize(float[] v, boolean throwOnZero) {
     float invNorm = 1.0f / (float) Math.sqrt(l1norm);
     int i = 0;
 
-    // if the array size is large (> 2x platform vector size), it's worth the
-    // overhead to vectorize
+    // if the array size is large (> 2x platform vector size), it's worth the overhead to vectorize
     if (v.length > 2 * FLOAT_SPECIES.length()) {
       i += FLOAT_SPECIES.loopBound(v.length);
       l2normalizeBody(v, invNorm, i);
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
index 1a94ce7dcf2c..bbe3defa62ee 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
@@ -21,8 +21,10 @@
 import java.util.Locale;
 import java.util.logging.Logger;
 import jdk.incubator.vector.FloatVector;
+import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MemorySegmentAccessInput;
 import org.apache.lucene.util.Constants;
@@ -31,10 +33,8 @@
 /** A vectorization provider that leverages the Panama Vector API. */
 final class PanamaVectorizationProvider extends VectorizationProvider {
 
-  // NOTE: Avoid static fields or initializers which rely on the vector API, as
-  // these initializers
-  // would get called before we have a chance to perform sanity checks around the
-  // vector API in the
+  // NOTE: Avoid static fields or initializers which rely on the vector API, as these initializers
+  // would get called before we have a chance to perform sanity checks around the vector API in the
   // constructor of this class. Put them in PanamaVectorConstants instead.
 
   private final VectorUtilSupport vectorUtilSupport;
@@ -88,7 +88,8 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
 
   @Override
   public AsymmetricScalarQuantizeFlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() {
-    return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
+    // return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
+    return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
   }
 
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
index bfd04a4411aa..6cd536723b61 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
@@ -60,6 +60,7 @@ public class TestLucene104HnswScalarQuantizedVectorsFormat extends BaseKnnVector
   public void setUp() throws Exception {
     var encodingValues = ScalarEncoding.values();
     encoding = encodingValues[random().nextInt(encodingValues.length)];
+    System.out.println("ENCODING: " + encoding);
     format =
         new Lucene104HnswScalarQuantizedVectorsFormat(
             encoding,
@@ -134,8 +135,7 @@ public void testSingleVectorCase() throws Exception {
                   Integer.MAX_VALUE);
           assertEquals(1, td.totalHits.value());
           assertTrue(td.scoreDocs[0].score >= 0);
-          // When it's the only vector in a segment, the score should be very close to the
-          // true
+          // When it's the only vector in a segment, the score should be very close to the true
           // score
           assertEquals(trueScore, td.scoreDocs[0].score, 0.01f);
         }
@@ -167,8 +167,7 @@ public void testLimits() {
                 ScalarEncoding.UNSIGNED_BYTE, 20, 100, 1, new SameThreadExecutorService()));
   }
 
-  // Ensures that all expected vector similarity functions are translatable in the
-  // format.
+  // Ensures that all expected vector similarity functions are translatable in the format.
   public void testVectorSimilarityFuncs() {
     // This does not necessarily have to be all similarity functions, but
     // differences should be considered carefully.

From 25005563b102ac6db2b57ef2eedb7a581100b8a0 Mon Sep 17 00:00:00 2001
From: Trevor McCulloch <mccullocht@gmail.com>
Date: Fri, 3 Oct 2025 15:07:21 -0700
Subject: [PATCH 21/21] restore memseg scorer; fix some errant formatting
 issues

---
 .../internal/vectorization/PanamaVectorUtilSupport.java      | 4 ++--
 .../internal/vectorization/PanamaVectorizationProvider.java  | 5 +----
 .../TestLucene104HnswScalarQuantizedVectorsFormat.java       | 1 -
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
index 40e79f30cdb8..425b0e6630be 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
@@ -1247,8 +1247,8 @@ public float minMaxScalarQuantize(
         FloatVector v = FloatVector.fromArray(FLOAT_SPECIES, vector, i);
 
         // Make sure the value is within the quantile range, cutting off the tails
-        // see first parenthesis in equation: byte = (float - minQuantile) *
-        // 127/(maxQuantile - minQuantile)
+        // see first parenthesis in equation: byte = (float - minQuantile) * 127/(maxQuantile -
+        // minQuantile)
         FloatVector dxc = v.min(maxQuantile).max(minQuantile).sub(minQuantile);
         // Scale the value to the range [0, 127], this is our quantized value
         // scale = 127/(maxQuantile - minQuantile)
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
index bbe3defa62ee..0ec71b6b7f2f 100644
--- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
+++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java
@@ -21,10 +21,8 @@
 import java.util.Locale;
 import java.util.logging.Logger;
 import jdk.incubator.vector.FloatVector;
-import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene104.AsymmetricScalarQuantizeFlatVectorsScorer;
-import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MemorySegmentAccessInput;
 import org.apache.lucene.util.Constants;
@@ -88,8 +86,7 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
 
   @Override
   public AsymmetricScalarQuantizeFlatVectorsScorer getLucene104ScalarQuantizedVectorsScorer() {
-    // return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
-    return new Lucene104ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+    return Lucene104MemorySegmentScalarQuantizedVectorScorer.INSTANCE;
   }
 
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
index 6cd536723b61..0e8084391ab5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java
@@ -60,7 +60,6 @@ public class TestLucene104HnswScalarQuantizedVectorsFormat extends BaseKnnVector
   public void setUp() throws Exception {
     var encodingValues = ScalarEncoding.values();
     encoding = encodingValues[random().nextInt(encodingValues.length)];
-    System.out.println("ENCODING: " + encoding);
     format =
         new Lucene104HnswScalarQuantizedVectorsFormat(
             encoding,