From e858aff6487893feee1d02e5af475aebbcc28265 Mon Sep 17 00:00:00 2001 From: "gesong.samuel" Date: Tue, 30 Sep 2025 17:16:48 +0800 Subject: [PATCH 1/3] implement MultiFieldNormValues.longValues --- .../search/MultiNormsLeafSimScorer.java | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java index 65609921bc93..b85d33bffc8b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -127,8 +128,10 @@ public float score(int doc, float freq) throws IOException { */ public void scoreRange(DocAndFloatFeatureBuffer buffer) throws IOException { normValues = ArrayUtil.growNoCopy(normValues, buffer.size); - for (int i = 0; i < buffer.size; i++) { - normValues[i] = getNormValue(buffer.docs[i]); + if (norms != null) { + norms.longValues(buffer.size, buffer.docs, normValues, 1L); + } else { + Arrays.fill(normValues, 0, buffer.size, 1L); } bulkScorer.score(buffer.size, buffer.features, normValues, buffer.features); } @@ -145,6 +148,7 @@ public Explanation explain(int doc, Explanation freqExpl) throws IOException { private static class MultiFieldNormValues extends NumericDocValues { private final NumericDocValues[] normsArr; + private float[] accBuf = new float[0]; private final float[] weightArr; private long current; private int docID = -1; @@ -193,5 +197,31 @@ public int advance(int target) { public long cost() { throw new UnsupportedOperationException(); } + + @Override + public void longValues(int size, int[] docs, long[] values, long defaultValue) + throws IOException { + if (accBuf.length < size) { + accBuf = new float[ArrayUtil.oversize(size, Float.BYTES)]; + } else { + Arrays.fill(accBuf, 0f); + } + + for (int i = 0; i < normsArr.length; i++) { + normsArr[i].longValues(size, docs, values, 0L); + float weight = weightArr[i]; + for (int j = 0; j < size; j++) { + accBuf[j] += weight * LENGTH_TABLE[Byte.toUnsignedInt((byte) values[j])]; + } + } + + for (int i = 0; i < size; i++) { + if (accBuf[i] == 0f) { + values[i] = defaultValue; + } else { + values[i] = SmallFloat.intToByte4(Math.round(accBuf[i])); + } + } + } } } From c6d88f349a91adf0c048b2b93cdf3bfbb68584d4 Mon Sep 17 00:00:00 2001 From: "gesong.samuel" Date: Tue, 30 Sep 2025 17:28:43 +0800 Subject: [PATCH 2/3] change --- lucene/CHANGES.txt | 2 ++ .../java/org/apache/lucene/search/MultiNormsLeafSimScorer.java | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index aec321005770..ef1e808c2559 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -182,6 +182,8 @@ Optimizations * GITHUB#14863: Perform scoring for 4, 7, 8 bit quantized vectors off-heap. (Kaival Parikh) +* GITHUB#15261: Implement longValues for MultiFieldNormValues to speedup CombinedQuery (Ge Song) + Bug Fixes --------------------- * GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java index b85d33bffc8b..a4803207c2c5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java @@ -26,6 +26,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.internal.hppc.FloatArrayList; import org.apache.lucene.search.CombinedFieldQuery.FieldAndWeight; import org.apache.lucene.search.similarities.Similarity.BulkSimScorer; import org.apache.lucene.search.similarities.Similarity.SimScorer; @@ -148,7 +149,7 @@ public Explanation explain(int doc, Explanation freqExpl) throws IOException { private static class MultiFieldNormValues extends NumericDocValues { private final NumericDocValues[] normsArr; - private float[] accBuf = new float[0]; + private float[] accBuf = FloatArrayList.EMPTY_ARRAY; private final float[] weightArr; private long current; private int docID = -1; From 708e7f2f4e9dedf3d5ab22c6adcca06597fe2ce3 Mon Sep 17 00:00:00 2001 From: "gesong.samuel" Date: Thu, 9 Oct 2025 11:19:54 +0800 Subject: [PATCH 3/3] add comment --- .../java/org/apache/lucene/search/MultiNormsLeafSimScorer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java index a4803207c2c5..179872e52435 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java @@ -209,6 +209,8 @@ public void longValues(int size, int[] docs, long[] values, long defaultValue) } for (int i = 0; i < normsArr.length; i++) { + // this code relies on the assumption that document length can never be equal to 0, + // so we can use 0L to indicate whether we have a norm value or not normsArr[i].longValues(size, docs, values, 0L); float weight = weightArr[i]; for (int j = 0; j < size; j++) {