diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index aec321005770..ef1e808c2559 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -182,6 +182,8 @@ Optimizations * GITHUB#14863: Perform scoring for 4, 7, 8 bit quantized vectors off-heap. (Kaival Parikh) +* GITHUB#15261: Implement longValues for MultiFieldNormValues to speedup CombinedQuery (Ge Song) + Bug Fixes --------------------- * GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java index 65609921bc93..179872e52435 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -25,6 +26,7 @@ import java.util.Set; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.internal.hppc.FloatArrayList; import org.apache.lucene.search.CombinedFieldQuery.FieldAndWeight; import org.apache.lucene.search.similarities.Similarity.BulkSimScorer; import org.apache.lucene.search.similarities.Similarity.SimScorer; @@ -127,8 +129,10 @@ public float score(int doc, float freq) throws IOException { */ public void scoreRange(DocAndFloatFeatureBuffer buffer) throws IOException { normValues = ArrayUtil.growNoCopy(normValues, buffer.size); - for (int i = 0; i < buffer.size; i++) { - normValues[i] = getNormValue(buffer.docs[i]); + if (norms != null) { + norms.longValues(buffer.size, buffer.docs, normValues, 1L); + } else { + Arrays.fill(normValues, 0, buffer.size, 1L); } bulkScorer.score(buffer.size, buffer.features, normValues, buffer.features); } @@ -145,6 +149,7 @@ public Explanation explain(int doc, Explanation freqExpl) throws IOException { private static class MultiFieldNormValues extends NumericDocValues { private final NumericDocValues[] normsArr; + private float[] accBuf = FloatArrayList.EMPTY_ARRAY; private final float[] weightArr; private long current; private int docID = -1; @@ -193,5 +198,33 @@ public int advance(int target) { public long cost() { throw new UnsupportedOperationException(); } + + @Override + public void longValues(int size, int[] docs, long[] values, long defaultValue) + throws IOException { + if (accBuf.length < size) { + accBuf = new float[ArrayUtil.oversize(size, Float.BYTES)]; + } else { + Arrays.fill(accBuf, 0f); + } + + for (int i = 0; i < normsArr.length; i++) { + // this code relies on the assumption that document length can never be equal to 0, + // so we can use 0L to indicate whether we have a norm value or not + normsArr[i].longValues(size, docs, values, 0L); + float weight = weightArr[i]; + for (int j = 0; j < size; j++) { + accBuf[j] += weight * LENGTH_TABLE[Byte.toUnsignedInt((byte) values[j])]; + } + } + + for (int i = 0; i < size; i++) { + if (accBuf[i] == 0f) { + values[i] = defaultValue; + } else { + values[i] = SmallFloat.intToByte4(Math.round(accBuf[i])); + } + } + } } }