From d76c8aeb10ca1e561de259075c26bdd9b6c31cfa Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Tue, 16 Dec 2025 19:20:09 +0000 Subject: [PATCH] avoids unneeded meta index read CachableBlockFile.Reader.getBCFile() always read the RFile root block from the index cache even if would not use it. Modified the method to only read from the index cache when the data is actually needed. Noticed this while working on #6010, saw for every data block read from a rfile it would also read from the index cache. Adjusted ScanTracingIT because before this change there was an rfile index cache read per rfile data block read. After this change the number of rfile index reads align w/ the number of rfiles opened, which in this case aligns with the number of tablets in the test tables. The existing behavior was likely not a performance problem if the data was in cache, but could potentially cause thread contention and extra CPU usage. So its probably good to fix, also nice to make the tracing stats align better w/ expectations. --- .../blockfile/impl/CachableBlockFile.java | 29 +++++++++++-------- .../accumulo/test/tracing/ScanTracingIT.java | 22 +++++++------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java b/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java index f694740d964..f13c155641c 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java +++ b/core/src/main/java/org/apache/accumulo/core/file/blockfile/impl/CachableBlockFile.java @@ -179,13 +179,14 @@ private long getCachedFileLen() throws IOException { } } - private BCFile.Reader getBCFile(byte[] serializedMetadata) throws IOException { + private BCFile.Reader getBCFile(Supplier cachedMetadataSupplier) throws IOException { BCFile.Reader reader = bcfr.get(); if (reader == null) { RateLimitedInputStream fsIn = new RateLimitedInputStream((InputStream & Seekable) inputSupplier.get(), readLimiter); BCFile.Reader tmpReader = null; + byte[] serializedMetadata = cachedMetadataSupplier.get(); if (serializedMetadata == null) { if (fileLenCache == null) { tmpReader = new BCFile.Reader(fsIn, lengthSupplier.get(), conf, cryptoService); @@ -221,15 +222,19 @@ private BCFile.Reader getBCFile(byte[] serializedMetadata) throws IOException { } private BCFile.Reader getBCFile() throws IOException { - BlockCache _iCache = cacheProvider.getIndexCache(); - if (_iCache != null) { - CacheEntry mce = _iCache.getBlock(cacheId + ROOT_BLOCK_NAME, new BCFileLoader()); - if (mce != null) { - return getBCFile(mce.getBuffer()); + + Supplier cachedMetadataSupplier = () -> { + BlockCache _iCache = cacheProvider.getIndexCache(); + if (_iCache != null) { + CacheEntry mce = _iCache.getBlock(cacheId + ROOT_BLOCK_NAME, new BCFileLoader()); + if (mce != null) { + return mce.getBuffer(); + } } - } + return null; + }; - return getBCFile(null); + return getBCFile(cachedMetadataSupplier); } private class BCFileLoader implements Loader { @@ -242,7 +247,7 @@ public Map getDependencies() { @Override public byte[] load(int maxSize, Map dependencies) { try { - return getBCFile(null).serializeMetadata(maxSize); + return getBCFile(() -> null).serializeMetadata(maxSize); } catch (IOException e) { throw new UncheckedIOException(e); } @@ -348,7 +353,7 @@ public byte[] load(int maxSize, Map dependencies) { if (reader == null) { if (loadingMetaBlock) { byte[] serializedMetadata = dependencies.get(cacheId + ROOT_BLOCK_NAME); - reader = getBCFile(serializedMetadata); + reader = getBCFile(() -> serializedMetadata); } else { reader = getBCFile(); } @@ -407,7 +412,7 @@ public CachedBlockRead getMetaBlock(String blockName) throws IOException { } } - BlockReader _currBlock = getBCFile(null).getMetaBlock(blockName); + BlockReader _currBlock = getBCFile(() -> null).getMetaBlock(blockName); incrementCacheBypass(CacheType.INDEX); return new CachedBlockRead(_currBlock); } @@ -424,7 +429,7 @@ public CachedBlockRead getMetaBlock(long offset, long compressedSize, long rawSi } } - BlockReader _currBlock = getBCFile(null).getDataBlock(offset, compressedSize, rawSize); + BlockReader _currBlock = getBCFile(() -> null).getDataBlock(offset, compressedSize, rawSize); incrementCacheBypass(CacheType.INDEX); return new CachedBlockRead(_currBlock); } diff --git a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java index 8d0fe70bba0..898915999d7 100644 --- a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java +++ b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java @@ -94,14 +94,17 @@ public void stopCollector() throws Exception { @Test public void test() throws Exception { - var names = getUniqueNames(7); + var names = getUniqueNames(8); runTest(names[0], 0, false, false, -1, -1, -1); runTest(names[1], 10, false, false, -1, -1, -1); - runTest(names[2], 0, true, false, -1, -1, -1); - runTest(names[3], 0, false, false, -1, -1, 2); - runTest(names[4], 0, false, false, 32, 256, -1); - runTest(names[5], 0, true, true, 32, 256, -1); - runTest(names[6], 0, true, false, -1, -1, 2); + // when the tables tablets are spread across two tablet servers, then all the tables data will + // fit in cache + runTest(names[2], 10, true, true, -1, -1, -1); + runTest(names[3], 0, true, false, -1, -1, -1); + runTest(names[4], 0, false, false, -1, -1, 2); + runTest(names[5], 0, false, false, 32, 256, -1); + runTest(names[6], 0, true, true, 32, 256, -1); + runTest(names[7], 0, true, false, -1, -1, 2); } private void runTest(String tableName, int numSplits, boolean cacheData, @@ -239,15 +242,14 @@ private void runTest(String tableName, int numSplits, boolean cacheData, if (stats.getFileBytesRead() == 0) { assertEquals(0L, stats.getDataCacheMisses(), stats::toString); } - // When caching data, does not seem to hit the cache much - var cacheSum = stats.getIndexCacheHits() + stats.getIndexCacheMisses(); - assertTrue(cacheSum == 0 || cacheSum == 1, stats::toString); } else { assertEquals(0, stats.getDataCacheHits(), stats::toString); assertEquals(0, stats.getDataCacheMisses(), stats::toString); assertTrue(stats.getDataCacheBypasses() > stats.getSeeks(), stats::toString); - assertClose(stats.getDataCacheBypasses(), stats.getIndexCacheHits(), .05); } + // May see rfile metadata reads for each tablet + var cacheSum = stats.getIndexCacheHits() + stats.getIndexCacheMisses(); + assertTrue(cacheSum <= (numSplits + 1) * 2L, stats::toString); assertEquals(0, stats.getIndexCacheBypasses(), stats::toString); }