-
Notifications
You must be signed in to change notification settings - Fork 588
Open
Description
There is a race condition in Ehcache's off-heap eviction logic that causes AssertionError during concurrent put() operations when off-heap memory is at or near capacity.
When the cache is configured to use heap-offheap with the size of off-heap being about the same size of the data (with very little room for metadata overhead) an AssertionError is thrown during put operations.
JMH benchmark also shows heap-offheap having the worst performance (throughput/latency) compared to all other configurations.
JMH BENCHMARK CONFIGURATION:
------------------------------------------------------------
Forks: 1 (separate JVM processes)
Threads per fork: 8
Warmup: 3 iterations × 1 s
Measurement: 8 iterations × 2 s
Modes: Throughput, AverageTime
JVM Arguments:
-Xms2g
-Xmx2g
-XX:+UseG1GC
-XX:MaxDirectMemorySize=256m
============================================================
BENCHMARK: benchmarkCacheMixed (90% Hits / 10% Misses)
============================================================
TIER CONFIGURATIONS:
------------------------------------------------------------
Total Data Size: 7.75 MB (104 entries)
------------------------------------------------------------
Config Name Heap Entries (Actual Size) Off-heap Disk
------------------------------------------------------------
HEAP_ONLY 104 entries (7.75 MB ) 0 MB 0 MB
HEAP_OFFHEAP 94 entries (1.99 MB ) 6 MB 0 MB
HEAP_DISK 94 entries (1.99 MB ) 0 MB 16 MB
THREE_TIER_TINY 82 entries (0.99 MB ) 3 MB 16 MB
THROUGHPUT (ops/sec - HIGHER is better):
--------------------------------------------------------------------------------------------
Config Name ZIPFIAN UNIFORM
--------------------------------------------------------------------------------------------
HEAP_ONLY 4,535,330 ± 479,886 (± 10.6%) 2,199,778 ± 130,353 (± 5.9%)
HEAP_OFFHEAP 7,716 ± 323 (± 4.2%) 6,939 ± 390 (± 5.6%)
HEAP_DISK 333,987 ± 14,978 (± 4.5%) 260,309 ± 67,632 (± 26.0%)
THREE_TIER_TINY 139,686 ± 6,038 (± 4.3%) 137,248 ± 10,951 (± 8.0%)
LATENCY (µs/op - LOWER is better):
--------------------------------------------------------------------------------------------
Config Name ZIPFIAN UNIFORM
--------------------------------------------------------------------------------------------
HEAP_ONLY 1.697 ± 0.058 (± 3.4%) 4.167 ± 0.184 (± 4.4%)
HEAP_OFFHEAP 1003.665 ± 39.293 (± 3.9%) 1152.306 ± 53.827 (± 4.7%)
HEAP_DISK 23.709 ± 1.449 (± 6.1%) 27.531 ± 1.077 (± 3.9%)
THREE_TIER_TINY 55.848 ± 5.310 (± 9.5%) 58.942 ± 6.290 (± 10.7%)
java.lang.AssertionError
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.release(OffHeapStorageArea.java:592)
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.shrink(OffHeapStorageArea.java:696)
at org.ehcache.shadow.org.terracotta.offheapstore.storage.OffHeapBufferStorageEngine.shrink(OffHeapBufferStorageEngine.java:250)
at org.ehcache.shadow.org.terracotta.offheapstore.AbstractLockedOffHeapHashMap.shrink(AbstractLockedOffHeapHashMap.java:501)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.handleOversizeMappingException(AbstractConcurrentOffHeapMap.java:714)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.computeWithMetadata(AbstractConcurrentOffHeapMap.java:744)
at org.ehcache.impl.internal.store.offheap.EhcacheConcurrentOffHeapClockCache.compute(EhcacheConcurrentOffHeapClockCache.java:153)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.computeWithRetry(AbstractOffHeapStore.java:1051)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.put(AbstractOffHeapStore.java:251)
at org.ehcache.impl.internal.store.tiering.TieredStore.put(TieredStore.java:114)
at org.ehcache.core.Ehcache.doPut(Ehcache.java:94)
at org.ehcache.core.EhcacheBase.put(EhcacheBase.java:188)
at com.test.benchmark.EhcacheTierBenchmark.benchmarkCacheMixed(EhcacheTierBenchmark.java:397)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_thrpt_jmhStub(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:145)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_Throughput(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:84)
at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:527)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:504)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
========================================
<failure>
java.lang.RuntimeException: Benchmark failed - stopping execution
at com.test.benchmark.EhcacheTierBenchmark.benchmarkCacheMixed(EhcacheTierBenchmark.java:407)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_thrpt_jmhStub(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:145)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_Throughput(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:84)
at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:527)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:504)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
Caused by: java.lang.AssertionError
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.release(OffHeapStorageArea.java:592)
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.shrink(OffHeapStorageArea.java:696)
at org.ehcache.shadow.org.terracotta.offheapstore.storage.OffHeapBufferStorageEngine.shrink(OffHeapBufferStorageEngine.java:250)
at org.ehcache.shadow.org.terracotta.offheapstore.AbstractLockedOffHeapHashMap.shrink(AbstractLockedOffHeapHashMap.java:501)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.handleOversizeMappingException(AbstractConcurrentOffHeapMap.java:714)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.computeWithMetadata(AbstractConcurrentOffHeapMap.java:744)
at org.ehcache.impl.internal.store.offheap.EhcacheConcurrentOffHeapClockCache.compute(EhcacheConcurrentOffHeapClockCache.java:153)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.computeWithRetry(AbstractOffHeapStore.java:1051)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.put(AbstractOffHeapStore.java:251)
at org.ehcache.impl.internal.store.tiering.TieredStore.put(TieredStore.java:114)
at org.ehcache.core.Ehcache.doPut(Ehcache.java:94)
at org.ehcache.core.EhcacheBase.put(EhcacheBase.java:188)
at com.test.benchmark.EhcacheTierBenchmark.benchmarkCacheMixed(EhcacheTierBenchmark.java:397)
... 12 more
Benchmark had encountered error, and fail on error was requested
ERROR: org.openjdk.jmh.runner.RunnerException: Benchmark caught the exception
at org.openjdk.jmh.runner.Runner.runBenchmarks(Runner.java:572)
at org.openjdk.jmh.runner.Runner.internalRun(Runner.java:309)
at org.openjdk.jmh.runner.Runner.run(Runner.java:208)
at org.openjdk.jmh.Main.main(Main.java:71)
Caused by: org.openjdk.jmh.runner.BenchmarkException: Benchmark error during the run
at org.openjdk.jmh.runner.BenchmarkHandler.runIteration(BenchmarkHandler.java:440)
at org.openjdk.jmh.runner.BaseRunner.runBenchmark(BaseRunner.java:262)
at org.openjdk.jmh.runner.BaseRunner.runBenchmark(BaseRunner.java:233)
at org.openjdk.jmh.runner.BaseRunner.doSingle(BaseRunner.java:138)
at org.openjdk.jmh.runner.BaseRunner.runBenchmarksForked(BaseRunner.java:75)
at org.openjdk.jmh.runner.ForkedRunner.run(ForkedRunner.java:72)
at org.openjdk.jmh.runner.ForkedMain.main(ForkedMain.java:86)
Suppressed: java.lang.RuntimeException: Benchmark failed - stopping execution
at com.test.benchmark.EhcacheTierBenchmark.benchmarkCacheMixed(EhcacheTierBenchmark.java:407)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_thrpt_jmhStub(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:145)
at com.test.benchmark.jmh_generated.EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.benchmarkCacheMixed_Throughput(EhcacheTierBenchmark_benchmarkCacheMixed_jmhTest.java:84)
at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:527)
at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:504)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
Caused by: java.lang.AssertionError
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.release(OffHeapStorageArea.java:592)
at org.ehcache.shadow.org.terracotta.offheapstore.paging.OffHeapStorageArea.shrink(OffHeapStorageArea.java:696)
at org.ehcache.shadow.org.terracotta.offheapstore.storage.OffHeapBufferStorageEngine.shrink(OffHeapBufferStorageEngine.java:250)
at org.ehcache.shadow.org.terracotta.offheapstore.AbstractLockedOffHeapHashMap.shrink(AbstractLockedOffHeapHashMap.java:501)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.handleOversizeMappingException(AbstractConcurrentOffHeapMap.java:714)
at org.ehcache.shadow.org.terracotta.offheapstore.concurrent.AbstractConcurrentOffHeapMap.computeWithMetadata(AbstractConcurrentOffHeapMap.java:744)
at org.ehcache.impl.internal.store.offheap.EhcacheConcurrentOffHeapClockCache.compute(EhcacheConcurrentOffHeapClockCache.java:153)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.computeWithRetry(AbstractOffHeapStore.java:1051)
at org.ehcache.impl.internal.store.offheap.AbstractOffHeapStore.put(AbstractOffHeapStore.java:251)
at org.ehcache.impl.internal.store.tiering.TieredStore.put(TieredStore.java:114)
at org.ehcache.core.Ehcache.doPut(Ehcache.java:94)
at org.ehcache.core.EhcacheBase.put(EhcacheBase.java:188)
at com.test.benchmark.EhcacheTierBenchmark.benchmarkCacheMixed(EhcacheTierBenchmark.java:397)
... 12 more
Here is a unit test that reproduces the race condition
class EhcacheOffHeapMemoryExceptionTest {
private CacheManager cacheManager;
private Cache<String, CachedData> cache;
private Map<String, CachedData> testData;
private ExecutorService executorService;
private int totalEntries;
private long totalDataBytes;
private long offHeapBytes;
@BeforeEach
void setUp() {
Random random = new Random(42);
List<Integer> bodySizes = generateRealisticDistribution(100, random);
totalEntries = bodySizes.size();
totalDataBytes = bodySizes.stream().mapToLong(Integer::longValue).sum();
System.out.println("=== Realistic HTTP Response Distribution ===");
System.out.println("Total entries: " + totalEntries);
System.out.println("Total data size: " + String.format("%.2f MB", totalDataBytes / (1024.0 * 1024.0)));
System.out.println("Average entry size: " + String.format("%.2f KB", (totalDataBytes / totalEntries) / 1024.0));
System.out.println("Min size: " + String.format("%.2f KB", bodySizes.stream().min(Integer::compareTo).orElse(0) / 1024.0));
System.out.println("Max size: " + String.format("%.2f KB", bodySizes.stream().max(Integer::compareTo).orElse(0) / 1024.0));
offHeapBytes = (long) Math.ceil(totalDataBytes / (1024.0 * 1024.0));
cacheManager = CacheManagerBuilder.newCacheManagerBuilder().build(true);
// Generate test data with real body sizes
testData = new HashMap<>();
Random rnd = new Random(42);
for (int i = 0; i < bodySizes.size(); i++) {
String key = "key-" + i;
byte[] payload = new byte[bodySizes.get(i)];
rnd.nextBytes(payload); // Random data, but real size distribution
testData.put(key, new CachedData(payload));
}
executorService = Executors.newFixedThreadPool(16);
}
@AfterEach
void tearDown() {
if (executorService != null) {
executorService.shutdownNow();
}
if (cacheManager != null) {
cacheManager.close();
}
}
@Test
void testWithOffHeapExtraHeapSize() throws InterruptedException {
long offHeapSize = offHeapBytes * 2;
ResourcePoolsBuilder pools = ResourcePoolsBuilder.newResourcePoolsBuilder()
.heap(2, EntryUnit.ENTRIES)
.offheap(offHeapSize, MemoryUnit.MB);
cache = cacheManager.createCache("benchmark-cache",
CacheConfigurationBuilder.newCacheConfigurationBuilder(
String.class,
CachedData.class,
pools
)
.build()
);
for (Map.Entry<String, CachedData> entry : testData.entrySet())
cache.put(entry.getKey(), entry.getValue());
int numThreads = 8;
int operationsPerThread = 100000;
printConfiguration(numThreads, operationsPerThread, offHeapSize);
runTest(numThreads, operationsPerThread, offHeapSize, true);
}
@Test
void testWithOffHeapMatchingDataSize() throws InterruptedException {
ResourcePoolsBuilder pools = ResourcePoolsBuilder.newResourcePoolsBuilder()
.heap(2, EntryUnit.ENTRIES)
.offheap(offHeapBytes, MemoryUnit.MB);
cache = cacheManager.createCache("benchmark-cache",
CacheConfigurationBuilder.newCacheConfigurationBuilder(
String.class,
CachedData.class,
pools
)
.build()
);
for (Map.Entry<String, CachedData> entry : testData.entrySet())
cache.put(entry.getKey(), entry.getValue());
int numThreads = 8;
int operationsPerThread = 100000;
printConfiguration(numThreads, operationsPerThread, offHeapBytes);
runTest(numThreads, operationsPerThread, offHeapBytes, true);
}
@Test
void testWithOffHeapHalfDataSize() throws InterruptedException {
long offHeapSize = offHeapBytes / 2;
ResourcePoolsBuilder pools = ResourcePoolsBuilder.newResourcePoolsBuilder()
.heap(2, EntryUnit.ENTRIES)
.offheap(offHeapSize, MemoryUnit.MB);
cache = cacheManager.createCache("benchmark-cache",
CacheConfigurationBuilder.newCacheConfigurationBuilder(
String.class,
CachedData.class,
pools
)
.build()
);
for (Map.Entry<String, CachedData> entry : testData.entrySet())
cache.put(entry.getKey(), entry.getValue());
int numThreads = 10;
int operationsPerThread = 200;
printConfiguration(numThreads, operationsPerThread, offHeapBytes);
for (int i = 0; i < 50; i++)
runTest(numThreads, operationsPerThread, offHeapSize, false);
}
private void runTest(int numThreads, int operationsPerThread, long offHeapBytes, boolean simulateMiss) throws InterruptedException {
AtomicBoolean stopFlag = new AtomicBoolean(false);
AtomicReference<Throwable> firstError = new AtomicReference<>(null);
CountDownLatch startLatch = new CountDownLatch(1);
CountDownLatch completionLatch = new CountDownLatch(numThreads);
// Launch threads that mimic benchmarkCacheMixed behavior
for (int threadId = 0; threadId < numThreads; threadId++) {
final int finalThreadId = threadId;
executorService.submit(() -> {
try {
try {
startLatch.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
Random random = new Random(42 + finalThreadId);
String[] keys = testData.keySet().toArray(new String[0]);
for (int i = 0; i < operationsPerThread; i++) {
if (stopFlag.get())
break;
try {
// Select key uniformly (simplified from Zipfian for reproducibility)
String key = keys[random.nextInt(keys.length)];
// 30% of the time, simulate cache miss (3x more aggressive than benchmark's 10%)
if (simulateMiss && random.nextInt(100) < 30)
cache.remove(key);
// Try to get from cache (70% hit, 30% miss)
CachedData data = cache.get(key);
if (data == null) {
// Cache miss - fetch from "backend" and populate cache
data = testData.get(key);
cache.put(key, data); // THIS IS WHERE THE EXCEPTION OCCURS
}
if (i > 0 && i % 10000 == 0)
System.out.println("Thread-" + finalThreadId + ": " + i + " operations completed");
} catch (AssertionError e) {
firstError.compareAndSet(null, e);
stopFlag.set(true);
executorService.shutdownNow();
break;
}
}
} finally {
completionLatch.countDown();
}
});
}
startLatch.countDown();
completionLatch.await(300, TimeUnit.SECONDS); // Longer timeout for more operations
// If an error was detected, fail the test by re-throwing it
Throwable error = firstError.get();
if (error != null) {
System.err.println("\nTest FAILED due to AssertionError in worker thread.");
if (error instanceof AssertionError) {
throw (AssertionError) error;
} else {
throw new AssertionError("Test failed with unexpected error", error);
}
}
}
private void printConfiguration(int numThreads, int operationsPerThread, long offHeapBytes) {
System.out.println("========================================");
System.out.println("Configuration:");
System.out.println(" Heap: 12 entries");
System.out.println(" Off-heap: " + offHeapBytes + " MB");
System.out.println(" Working set: " + totalEntries + " entries (" +
String.format("%.2f", totalDataBytes / (1024.0 * 1024.0)) + " MB)");
System.out.println(" Threads: " + numThreads);
System.out.println(" Operations per thread: " + operationsPerThread);
System.out.println(" Total operations: " + (numThreads * operationsPerThread));
System.out.println("========================================\n");
}
/**
* Generates body sizes with realistic HTTP response distribution.
* 40% small (1KB - 20KB) - typical JSON/API responses
* 35% medium (20KB - 100KB) - HTML pages, small images
* 20% large (100KB - 500KB) - larger responses
* 5% very large (500KB - 2MB) - big payloads
*/
private List<Integer> generateRealisticDistribution(int count, Random random) {
List<Integer> sizes = new ArrayList<>();
for (int i = 0; i < count; i++) {
int category = random.nextInt(100);
int size;
if (category < 40)
size = 1024 + random.nextInt(19 * 1024);
else if (category < 75)
size = 20 * 1024 + random.nextInt(80 * 1024);
else if (category < 95)
size = 100 * 1024 + random.nextInt(400 * 1024);
else
size = 500 * 1024 + random.nextInt(1536 * 1024);
sizes.add(size);
}
return sizes;
}
record CachedData(
byte[] body
) implements java.io.Serializable {
}
}Metadata
Metadata
Assignees
Labels
No labels