From 95d4147d419a57237380743a3144c1825fc76842 Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Wed, 17 Sep 2025 10:27:30 +0900 Subject: [PATCH 1/5] optimize prefix sums --- .../lucene101/ForDeltaUtil.java | 7 +- .../lucene101/Lucene101PostingsReader.java | 11 +- .../Lucene50CompressingTermVectorsReader.java | 8 +- .../lucene84/Lucene84PostingsReader.java | 7 +- .../lucene90/Lucene90PostingsReader.java | 7 +- .../lucene912/Lucene912PostingsReader.java | 7 +- .../lucene99/Lucene99PostingsReader.java | 7 +- .../Lucene90CompressingTermVectorsReader.java | 127 +++++++++--------- 8 files changed, 95 insertions(+), 86 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java index 276ed2006239..ae01015a2978 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java @@ -104,9 +104,10 @@ private static void prefixSum16(int[] arr, int base) { } private static void prefixSum32(int[] arr, int base) { - arr[0] += base; - for (int i = 1; i < BLOCK_SIZE; ++i) { - arr[i] += arr[i - 1]; + int sum = base; + for (int i = 0; i < BLOCK_SIZE; ++i) { + sum += arr[i]; + arr[i] = sum; } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java index 62f9b49f6526..c5155b94e2b1 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java @@ -186,9 +186,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException } static void prefixSum(int[] buffer, int count, long base) { - buffer[0] += base; - for (int i = 1; i < count; ++i) { - buffer[i] += buffer[i - 1]; + int sum = base; + for (int i = 0; i < count; ++i) { + sum += buffer[i]; + buffer[i] = sum; } } @@ -606,8 +607,10 @@ private void refillFullBlock() throws IOException { for (int i = 0; i < numLongs - 1; ++i) { docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]); } + int sum = docCumulativeWordPopCounts[0]; for (int i = 1; i < numLongs - 1; ++i) { - docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1]; + sum += docCumulativeWordPopCounts[i]; + docCumulativeWordPopCounts[i] = sum; } docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE; assert docCumulativeWordPopCounts[numLongs - 2] diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java index bee4ea5e3084..3e69d751fd21 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java @@ -547,8 +547,10 @@ public Fields get(int doc) throws IOException { // delta-decode start offsets and patch lengths using term lengths final int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; + int sum = fStartOffsets[positionIndex[i][j]]; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { - fStartOffsets[k] += fStartOffsets[k - 1]; + sum += fStartOffsets[k]; + fStartOffsets[k] = sum; fLengths[k] += termLength; } } @@ -565,8 +567,10 @@ public Fields get(int doc) throws IOException { if (fPositions != null) { for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) { // delta-decode start offsets + int sum = fPositions[fpositionIndex[j]]; for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { - fPositions[k] += fPositions[k - 1]; + sum += fPositions[k]; + fPositions[k] = sum; } } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java index 9709c7aa9d7b..2e6d9d7507de 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java @@ -164,9 +164,10 @@ static void readVIntBlock( } static void prefixSum(long[] buffer, int count, long base) { - buffer[0] += base; - for (int i = 1; i < count; ++i) { - buffer[i] += buffer[i - 1]; + long sum = base; + for (int i = 0; i < count; ++i) { + sum += buffer[i]; + buffer[i] = sum; } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java index 190fe5b1d1d5..ea775ac098f6 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java @@ -162,9 +162,10 @@ static void readVIntBlock( } static void prefixSum(long[] buffer, int count, long base) { - buffer[0] += base; - for (int i = 1; i < count; ++i) { - buffer[i] += buffer[i - 1]; + long sum = base; + for (int i = 0; i < count; ++i) { + sum += buffer[i]; + buffer[i] = sum; } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/Lucene912PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/Lucene912PostingsReader.java index 77353ed63da6..6e48442d05d8 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/Lucene912PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/Lucene912PostingsReader.java @@ -196,9 +196,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException } static void prefixSum(long[] buffer, int count, long base) { - buffer[0] += base; - for (int i = 1; i < count; ++i) { - buffer[i] += buffer[i - 1]; + long sum = base; + for (int i = 0; i < count; ++i) { + sum += buffer[i]; + buffer[i] = sum; } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java index 1cc41e239263..28617c8cf6ef 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java @@ -145,9 +145,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException } static void prefixSum(long[] buffer, int count, long base) { - buffer[0] += base; - for (int i = 1; i < count; ++i) { - buffer[i] += buffer[i - 1]; + long sum = base; + for (int i = 0; i < count; ++i) { + sum += buffer[i]; + buffer[i] = sum; } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java index a39112039570..fdc1816b91a8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java @@ -98,9 +98,12 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade private final long numDirtyDocs; // cumulative number of docs in incomplete chunks private final long maxPointer; // end of the data section private BlockState blockState = new BlockState(-1, -1, 0); - // Cache of recently prefetched block IDs. This helps reduce chances of prefetching the same block - // multiple times, which is otherwise likely due to index sorting or recursive graph bisection - // clustering similar documents together. NOTE: this cache must be small since it's fully scanned. + // Cache of recently prefetched block IDs. This helps reduce chances of + // prefetching the same block + // multiple times, which is otherwise likely due to index sorting or recursive + // graph bisection + // clustering similar documents together. NOTE: this cache must be small since + // it's fully scanned. private final long[] prefetchedBlockIDCache; private int prefetchedBlockIDCacheIndex; @@ -114,8 +117,7 @@ private Lucene90CompressingTermVectorsReader(Lucene90CompressingTermVectorsReade this.decompressor = reader.decompressor.clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; - this.reader = - new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); + this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); this.version = reader.version; this.numChunks = reader.numChunks; this.numDirtyChunks = reader.numDirtyChunks; @@ -144,18 +146,13 @@ public Lucene90CompressingTermVectorsReader( ChecksumIndexInput metaIn = null; try { // Open the data file - final String vectorsStreamFN = - IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION); - vectorsStream = - d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM)); - version = - CodecUtil.checkIndexHeader( - vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix); - assert CodecUtil.indexHeaderLength(formatName, segmentSuffix) - == vectorsStream.getFilePointer(); - - final String metaStreamFN = - IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION); + final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION); + vectorsStream = d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM)); + version = CodecUtil.checkIndexHeader( + vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix); + assert CodecUtil.indexHeaderLength(formatName, segmentSuffix) == vectorsStream.getFilePointer(); + + final String metaStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION); metaIn = d.openChecksumInput(metaStreamFN); CodecUtil.checkIndexHeader( metaIn, @@ -169,21 +166,22 @@ public Lucene90CompressingTermVectorsReader( chunkSize = metaIn.readVInt(); // NOTE: data file is too costly to verify checksum against all the bytes on open, - // but for now we at least verify proper structure of the checksum footer: which looks - // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption + // but for now we at least verify proper structure of the checksum footer: which + // looks + // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of + // corruption // such as file truncation. CodecUtil.retrieveChecksum(vectorsStream); - FieldsIndexReader fieldsIndexReader = - new FieldsIndexReader( - d, - si.name, - segmentSuffix, - VECTORS_INDEX_EXTENSION, - VECTORS_INDEX_CODEC_NAME, - si.getId(), - metaIn, - context); + FieldsIndexReader fieldsIndexReader = new FieldsIndexReader( + d, + si.name, + segmentSuffix, + VECTORS_INDEX_EXTENSION, + VECTORS_INDEX_CODEC_NAME, + si.getId(), + metaIn, + context); this.indexReader = fieldsIndexReader; this.maxPointer = fieldsIndexReader.getMaxPointer(); @@ -218,8 +216,7 @@ public Lucene90CompressingTermVectorsReader( } decompressor = compressionMode.newDecompressor(); - this.reader = - new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); + this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); CodecUtil.checkFooter(metaIn, null); metaIn.close(); @@ -338,7 +335,8 @@ boolean isLoaded(int docID) { return blockState.docBase <= docID && docID < blockState.docBase + blockState.chunkDocs; } - private record BlockState(long startPointer, int docBase, int chunkDocs) {} + private record BlockState(long startPointer, int docBase, int chunkDocs) { + } @Override public void prefetch(int docID) throws IOException { @@ -418,14 +416,13 @@ public Fields get(int doc) throws IOException { totalDistinctFields += vectorsStream.readVInt(); } ++totalDistinctFields; - final PackedInts.ReaderIterator it = - PackedInts.getReaderIteratorNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - packedIntsVersion, - totalDistinctFields, - bitsPerFieldNum, - 1); + final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader( + vectorsStream, + PackedInts.Format.PACKED, + packedIntsVersion, + totalDistinctFields, + bitsPerFieldNum, + 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int) it.next(); @@ -493,7 +490,7 @@ public Fields get(int doc) throws IOException { final int termCount = (int) numTerms.get(skip + i); final int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; - for (int j = 0; j < termCount; ) { + for (int j = 0; j < termCount;) { final LongsRef next = reader.next(termCount - j); for (int k = 0; k < next.length; ++k) { fieldPrefixLengths[j++] = (int) next.longs[next.offset + k]; @@ -514,7 +511,7 @@ public Fields get(int doc) throws IOException { final int termCount = (int) numTerms.get(skip + i); final int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; - for (int j = 0; j < termCount; ) { + for (int j = 0; j < termCount;) { final LongsRef next = reader.next(termCount - j); for (int k = 0; k < next.length; ++k) { fieldSuffixLengths[j++] = (int) next.longs[next.offset + k]; @@ -535,7 +532,7 @@ public Fields get(int doc) throws IOException { final int[] termFreqs = new int[totalTerms]; { reader.reset(vectorsStream, totalTerms); - for (int i = 0; i < totalTerms; ) { + for (int i = 0; i < totalTerms;) { final LongsRef next = reader.next(totalTerms - i); for (int k = 0; k < next.length; ++k) { termFreqs[i++] = 1 + (int) next.longs[next.offset + k]; @@ -566,16 +563,15 @@ public Fields get(int doc) throws IOException { final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs); final int[][] positions, startOffsets, lengths; if (totalPositions > 0) { - positions = - readPositions( - skip, - numFields, - flags, - numTerms, - termFreqs, - POSITIONS, - totalPositions, - positionIndex); + positions = readPositions( + skip, + numFields, + flags, + numTerms, + termFreqs, + POSITIONS, + totalPositions, + positionIndex); } else { positions = new int[numFields][]; } @@ -586,12 +582,10 @@ public Fields get(int doc) throws IOException { for (int i = 0; i < charsPerTerm.length; ++i) { charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt()); } - startOffsets = - readPositions( - skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); - lengths = - readPositions( - skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); + startOffsets = readPositions( + skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); + lengths = readPositions( + skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { final int[] fStartOffsets = startOffsets[i]; @@ -608,11 +602,13 @@ public Fields get(int doc) throws IOException { final int[] fSuffixLengths = suffixLengths[i]; final int[] fLengths = lengths[i]; for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) { - // delta-decode start offsets and patch lengths using term lengths + // delta-decode start offsets and patch lengths using term lengths final int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; + int sum = fStartOffsets[positionIndex[i][j]]; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { - fStartOffsets[k] += fStartOffsets[k - 1]; + sum += fStartOffsets[k]; + fStartOffsets[k] = sum; fLengths[k] += termLength; } } @@ -629,8 +625,10 @@ public Fields get(int doc) throws IOException { if (fPositions != null) { for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) { // delta-decode start offsets + int sum = fPositions[fpositionIndex[j]]; for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { - fPositions[k] += fPositions[k - 1]; + sum += fPositions[k]; + fPositions[k] = sum; } } } @@ -709,8 +707,7 @@ public Fields get(int doc) throws IOException { docLen + payloadLen, suffixBytes); suffixBytes.length = docLen; - final BytesRef payloadBytes = - new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen); + final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen); final int[] fieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { @@ -812,7 +809,7 @@ private int[][] readPositions( final int totalFreq = positionIndex[i][termCount]; final int[] fieldPositions = new int[totalFreq]; positions[i] = fieldPositions; - for (int j = 0; j < totalFreq; ) { + for (int j = 0; j < totalFreq;) { final LongsRef nextPositions = reader.next(totalFreq - j); for (int k = 0; k < nextPositions.length; ++k) { fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k]; From de5e2a459f0ead26918782e20bc0ebdf0218a1c8 Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Wed, 17 Sep 2025 10:29:52 +0900 Subject: [PATCH 2/5] tidy --- .../Lucene90CompressingTermVectorsReader.java | 102 ++++++++++-------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java index fdc1816b91a8..948a5f6beae4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java @@ -117,7 +117,8 @@ private Lucene90CompressingTermVectorsReader(Lucene90CompressingTermVectorsReade this.decompressor = reader.decompressor.clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; - this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); + this.reader = + new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); this.version = reader.version; this.numChunks = reader.numChunks; this.numDirtyChunks = reader.numDirtyChunks; @@ -146,13 +147,18 @@ public Lucene90CompressingTermVectorsReader( ChecksumIndexInput metaIn = null; try { // Open the data file - final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION); - vectorsStream = d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM)); - version = CodecUtil.checkIndexHeader( - vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix); - assert CodecUtil.indexHeaderLength(formatName, segmentSuffix) == vectorsStream.getFilePointer(); - - final String metaStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION); + final String vectorsStreamFN = + IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION); + vectorsStream = + d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM)); + version = + CodecUtil.checkIndexHeader( + vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix); + assert CodecUtil.indexHeaderLength(formatName, segmentSuffix) + == vectorsStream.getFilePointer(); + + final String metaStreamFN = + IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION); metaIn = d.openChecksumInput(metaStreamFN); CodecUtil.checkIndexHeader( metaIn, @@ -173,15 +179,16 @@ public Lucene90CompressingTermVectorsReader( // such as file truncation. CodecUtil.retrieveChecksum(vectorsStream); - FieldsIndexReader fieldsIndexReader = new FieldsIndexReader( - d, - si.name, - segmentSuffix, - VECTORS_INDEX_EXTENSION, - VECTORS_INDEX_CODEC_NAME, - si.getId(), - metaIn, - context); + FieldsIndexReader fieldsIndexReader = + new FieldsIndexReader( + d, + si.name, + segmentSuffix, + VECTORS_INDEX_EXTENSION, + VECTORS_INDEX_CODEC_NAME, + si.getId(), + metaIn, + context); this.indexReader = fieldsIndexReader; this.maxPointer = fieldsIndexReader.getMaxPointer(); @@ -216,7 +223,8 @@ public Lucene90CompressingTermVectorsReader( } decompressor = compressionMode.newDecompressor(); - this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); + this.reader = + new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); CodecUtil.checkFooter(metaIn, null); metaIn.close(); @@ -335,8 +343,7 @@ boolean isLoaded(int docID) { return blockState.docBase <= docID && docID < blockState.docBase + blockState.chunkDocs; } - private record BlockState(long startPointer, int docBase, int chunkDocs) { - } + private record BlockState(long startPointer, int docBase, int chunkDocs) {} @Override public void prefetch(int docID) throws IOException { @@ -416,13 +423,14 @@ public Fields get(int doc) throws IOException { totalDistinctFields += vectorsStream.readVInt(); } ++totalDistinctFields; - final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - packedIntsVersion, - totalDistinctFields, - bitsPerFieldNum, - 1); + final PackedInts.ReaderIterator it = + PackedInts.getReaderIteratorNoHeader( + vectorsStream, + PackedInts.Format.PACKED, + packedIntsVersion, + totalDistinctFields, + bitsPerFieldNum, + 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int) it.next(); @@ -490,7 +498,7 @@ public Fields get(int doc) throws IOException { final int termCount = (int) numTerms.get(skip + i); final int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; - for (int j = 0; j < termCount;) { + for (int j = 0; j < termCount; ) { final LongsRef next = reader.next(termCount - j); for (int k = 0; k < next.length; ++k) { fieldPrefixLengths[j++] = (int) next.longs[next.offset + k]; @@ -511,7 +519,7 @@ public Fields get(int doc) throws IOException { final int termCount = (int) numTerms.get(skip + i); final int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; - for (int j = 0; j < termCount;) { + for (int j = 0; j < termCount; ) { final LongsRef next = reader.next(termCount - j); for (int k = 0; k < next.length; ++k) { fieldSuffixLengths[j++] = (int) next.longs[next.offset + k]; @@ -532,7 +540,7 @@ public Fields get(int doc) throws IOException { final int[] termFreqs = new int[totalTerms]; { reader.reset(vectorsStream, totalTerms); - for (int i = 0; i < totalTerms;) { + for (int i = 0; i < totalTerms; ) { final LongsRef next = reader.next(totalTerms - i); for (int k = 0; k < next.length; ++k) { termFreqs[i++] = 1 + (int) next.longs[next.offset + k]; @@ -563,15 +571,16 @@ public Fields get(int doc) throws IOException { final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs); final int[][] positions, startOffsets, lengths; if (totalPositions > 0) { - positions = readPositions( - skip, - numFields, - flags, - numTerms, - termFreqs, - POSITIONS, - totalPositions, - positionIndex); + positions = + readPositions( + skip, + numFields, + flags, + numTerms, + termFreqs, + POSITIONS, + totalPositions, + positionIndex); } else { positions = new int[numFields][]; } @@ -582,10 +591,12 @@ public Fields get(int doc) throws IOException { for (int i = 0; i < charsPerTerm.length; ++i) { charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt()); } - startOffsets = readPositions( - skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); - lengths = readPositions( - skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); + startOffsets = + readPositions( + skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); + lengths = + readPositions( + skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { final int[] fStartOffsets = startOffsets[i]; @@ -707,7 +718,8 @@ public Fields get(int doc) throws IOException { docLen + payloadLen, suffixBytes); suffixBytes.length = docLen; - final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen); + final BytesRef payloadBytes = + new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen); final int[] fieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { @@ -809,7 +821,7 @@ private int[][] readPositions( final int totalFreq = positionIndex[i][termCount]; final int[] fieldPositions = new int[totalFreq]; positions[i] = fieldPositions; - for (int j = 0; j < totalFreq;) { + for (int j = 0; j < totalFreq; ) { final LongsRef nextPositions = reader.next(totalFreq - j); for (int k = 0; k < nextPositions.length; ++k) { fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k]; From 247a61d3364fa1d7067bf3be776095bec926f46a Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Wed, 17 Sep 2025 10:32:08 +0900 Subject: [PATCH 3/5] revert formatting change --- .../Lucene90CompressingTermVectorsReader.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java index 948a5f6beae4..30fac85a3fb2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java @@ -98,12 +98,9 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade private final long numDirtyDocs; // cumulative number of docs in incomplete chunks private final long maxPointer; // end of the data section private BlockState blockState = new BlockState(-1, -1, 0); - // Cache of recently prefetched block IDs. This helps reduce chances of - // prefetching the same block - // multiple times, which is otherwise likely due to index sorting or recursive - // graph bisection - // clustering similar documents together. NOTE: this cache must be small since - // it's fully scanned. + // Cache of recently prefetched block IDs. This helps reduce chances of prefetching the same block + // multiple times, which is otherwise likely due to index sorting or recursive graph bisection + // clustering similar documents together. NOTE: this cache must be small since it's fully scanned. private final long[] prefetchedBlockIDCache; private int prefetchedBlockIDCacheIndex; @@ -172,10 +169,8 @@ public Lucene90CompressingTermVectorsReader( chunkSize = metaIn.readVInt(); // NOTE: data file is too costly to verify checksum against all the bytes on open, - // but for now we at least verify proper structure of the checksum footer: which - // looks - // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of - // corruption + // but for now we at least verify proper structure of the checksum footer: which looks + // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(vectorsStream); From 76ba5198b6fc14e63d8c62f48367ed246b03dffd Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Wed, 17 Sep 2025 10:38:02 +0900 Subject: [PATCH 4/5] modify checksum --- .../src/generated/checksums/generateForDeltaUtil101.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/generated/checksums/generateForDeltaUtil101.json b/lucene/backward-codecs/src/generated/checksums/generateForDeltaUtil101.json index b08480a3eda1..5c9a5c7926f5 100644 --- a/lucene/backward-codecs/src/generated/checksums/generateForDeltaUtil101.json +++ b/lucene/backward-codecs/src/generated/checksums/generateForDeltaUtil101.json @@ -1,4 +1,4 @@ { - "lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java": "29fd59e9847d3012497c3079fa7ebc4f68fa6a94", + "lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java": "19ddcb5229ae0a3b5c14f9c8a2fca5e811a7b172", "lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/gen_ForDeltaUtil.py": "147b81236bc0373fe97e808c0ae7a2cb6276ae5a" } From 7ec09bee748dae7b68f692f6dc1bd96851ab8b30 Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Wed, 17 Sep 2025 10:46:32 +0900 Subject: [PATCH 5/5] fix error --- .../backward_codecs/lucene101/Lucene101PostingsReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java index c5155b94e2b1..0bc94899b5bd 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java @@ -185,7 +185,7 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException } } - static void prefixSum(int[] buffer, int count, long base) { + static void prefixSum(int[] buffer, int count, int base) { int sum = base; for (int i = 0; i < count; ++i) { sum += buffer[i];