From a4d380e4e76bd0d0ec1344ef29f533b7574cd027 Mon Sep 17 00:00:00 2001 From: patlego Date: Mon, 26 Jan 2026 10:35:23 -0500 Subject: [PATCH 1/2] JCRVLT-837 adding cached namespace impl to prevent thrashing in the repository --- .../vault/fs/impl/AggregateImpl.java | 17 ++- .../vault/fs/impl/AggregateManagerImpl.java | 136 ++++++++++++++++++ 2 files changed, 152 insertions(+), 1 deletion(-) diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java index 525660fbb..ed73bb914 100644 --- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java +++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java @@ -185,6 +185,8 @@ public void invalidate() { nodeRef = null; relPath = null; state = STATE_INITIAL; + // Invalidate the cached namespace prefixes for this aggregate + mgr.invalidateAggregatePrefixes(path); } public Aggregate getParent() { @@ -410,7 +412,7 @@ public String[] getNamespacePrefixes() { } public String getNamespaceURI(String prefix) throws RepositoryException { - return mgr.getNamespaceURI(prefix); + return mgr.getCachedNamespaceURI(prefix); } public Collection getBinaries() { @@ -610,6 +612,8 @@ private void addNamespace(Set prefixes, String name) throws RepositoryEx String pfx = name.substring(0, idx); if (!prefixes.contains(pfx)) { prefixes.add(pfx); + // Cache the prefix in the manager to avoid repeated JCR lookups + mgr.cacheNamespacePrefix(pfx); } } } @@ -623,6 +627,14 @@ private void addNamespacePath(Set prefixes, String path) throws Reposito private void loadNamespaces() { if (namespacePrefixes == null) { + // Check if this aggregate's namespaces are already cached + String[] cachedPrefixes = mgr.getCachedAggregatePrefixes(path); + if (cachedPrefixes != null) { + log.debug("Using cached namespace prefixes for '{}': {}", path, cachedPrefixes); + namespacePrefixes = cachedPrefixes; + return; + } + if (log.isDebugEnabled()) { log.trace("loading namespaces of aggregate {}", path); } @@ -635,6 +647,9 @@ private void loadNamespaces() { loadNamespaces(prefixes, "", getNode()); namespacePrefixes = prefixes.toArray(new String[prefixes.size()]); + // Cache the discovered prefixes for this aggregate path + mgr.cacheAggregatePrefixes(path, namespacePrefixes); + // set if and only if in DEBUG level if (start >= 0) { Duration duration = Duration.ofNanos(System.nanoTime() - start); diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java index 0b3137cc1..229d9c693 100644 --- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java +++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java @@ -38,6 +38,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.apache.jackrabbit.vault.fs.api.AggregateManager; import org.apache.jackrabbit.vault.fs.api.Aggregator; @@ -85,6 +86,11 @@ public class AggregateManagerImpl implements AggregateManager { */ private static final String DEFAULT_NODETYPES = "" + "org/apache/jackrabbit/vault/fs/config/nodetypes.cnd"; + /** + * default logger + */ + private static final Logger log = LoggerFactory.getLogger(AggregateManagerImpl.class); + /** * the repository session for this manager */ @@ -124,6 +130,18 @@ public class AggregateManagerImpl implements AggregateManager { */ private final Set nodeTypes = new HashSet(); + /** + * Cache of namespace prefixes to URIs. This cache is shared across all aggregates + * to avoid expensive JCR tree traversals for namespace discovery. + */ + private final ConcurrentHashMap namespacePrefixCache = new ConcurrentHashMap<>(); + + /** + * Cache of namespace prefixes per aggregate path. This cache stores the discovered prefixes + * for each aggregate path to avoid re-walking the same subtrees. + */ + private final ConcurrentHashMap aggregateNamespaceCache = new ConcurrentHashMap<>(); + /** * config */ @@ -301,6 +319,9 @@ private AggregateManagerImpl( // setup node types initNodeTypes(); + + // pre-populate namespace cache with standard JCR namespaces + initNamespaceCache(); } public Set getNodeTypes() { @@ -324,6 +345,100 @@ public String getNamespaceURI(String prefix) throws RepositoryException { return session.getNamespaceURI(prefix); } + /** + * Gets a namespace URI from the cache or from the session if not cached. + * This method caches the prefix-to-URI mapping to avoid repeated JCR lookups. + * + * @param prefix the namespace prefix + * @return the namespace URI + * @throws RepositoryException if an error occurs + */ + public String getCachedNamespaceURI(String prefix) throws RepositoryException { + return namespacePrefixCache.computeIfAbsent(prefix, p -> { + try { + return session.getNamespaceURI(p); + } catch (RepositoryException e) { + throw new RuntimeException("Failed to get namespace URI for prefix: " + p, e); + } + }); + } + + /** + * Adds a namespace prefix to the cache. + * + * @param prefix the namespace prefix to cache + */ + public void cacheNamespacePrefix(String prefix) { + if (prefix != null && !prefix.isEmpty() && !namespacePrefixCache.containsKey(prefix)) { + try { + String uri = session.getNamespaceURI(prefix); + namespacePrefixCache.put(prefix, uri); + } catch (RepositoryException e) { + // Log but don't fail - the prefix might be checked later + log.debug("Could not resolve namespace URI for prefix: {}", prefix, e); + } + } + } + + /** + * Gets the cached namespace prefixes. + * + * @return a set of all cached namespace prefixes + */ + public Set getCachedNamespacePrefixes() { + return namespacePrefixCache.keySet(); + } + + /** + * Gets cached namespace prefixes for a specific aggregate path. + * + * @param path the aggregate path + * @return the cached prefixes, or null if not cached + */ + public String[] getCachedAggregatePrefixes(String path) { + return aggregateNamespaceCache.get(path); + } + + /** + * Caches namespace prefixes for a specific aggregate path. + * + * @param path the aggregate path + * @param prefixes the namespace prefixes to cache + */ + public void cacheAggregatePrefixes(String path, String[] prefixes) { + if (path != null && prefixes != null) { + aggregateNamespaceCache.put(path, prefixes); + } + } + + /** + * Invalidates the namespace caches. This should be called if namespace + * definitions are added or modified in the repository. + */ + public void invalidateNamespaceCaches() { + log.info( + "Invalidating namespace caches ({} prefix mappings, {} aggregate caches)", + namespacePrefixCache.size(), + aggregateNamespaceCache.size()); + namespacePrefixCache.clear(); + aggregateNamespaceCache.clear(); + // Re-initialize the prefix cache with current repository namespaces + initNamespaceCache(); + } + + /** + * Invalidates the aggregate namespace cache for a specific path. + * This should be called when content at that path is modified. + * + * @param path the aggregate path to invalidate + */ + public void invalidateAggregatePrefixes(String path) { + if (path != null) { + aggregateNamespaceCache.remove(path); + log.debug("Invalidated namespace cache for path: {}", path); + } + } + public void startTracking(ProgressTrackerListener pTracker) { tracker = new AggregatorTracker(pTracker); } @@ -426,6 +541,27 @@ private void initNodeTypes() throws RepositoryException { } } + /** + * Pre-populates the namespace cache with all namespaces registered in the repository. + * This optimization reduces expensive JCR tree traversals during namespace discovery. + */ + private void initNamespaceCache() { + try { + String[] prefixes = session.getNamespacePrefixes(); + for (String prefix : prefixes) { + try { + String uri = session.getNamespaceURI(prefix); + namespacePrefixCache.put(prefix, uri); + } catch (RepositoryException e) { + log.debug("Could not cache namespace prefix '{}': {}", prefix, e.getMessage()); + } + } + log.info("Initialized namespace cache with {} prefixes", namespacePrefixCache.size()); + } catch (RepositoryException e) { + log.warn("Could not initialize namespace cache", e); + } + } + public Aggregator getAggregator(Node node, String path) throws RepositoryException { return aggregatorProvider.getAggregator(node, path); } From da206755193cd0c1be87aa25ee4f5bfefaf38d75 Mon Sep 17 00:00:00 2001 From: patlego Date: Mon, 26 Jan 2026 16:43:24 -0500 Subject: [PATCH 2/2] JCRVLT-837 adding tests and bounded LRU cache --- .../vault/fs/impl/AggregateImpl.java | 4 +- .../vault/fs/impl/AggregateManagerImpl.java | 88 +++++-- .../fs/impl/BoundedAggregateCacheTest.java | 230 ++++++++++++++++++ 3 files changed, 303 insertions(+), 19 deletions(-) create mode 100644 vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/BoundedAggregateCacheTest.java diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java index ed73bb914..cfecc6849 100644 --- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java +++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateImpl.java @@ -627,7 +627,7 @@ private void addNamespacePath(Set prefixes, String path) throws Reposito private void loadNamespaces() { if (namespacePrefixes == null) { - // Check if this aggregate's namespaces are already cached + // Check if this aggregate's namespaces are already cached (bounded LRU cache) String[] cachedPrefixes = mgr.getCachedAggregatePrefixes(path); if (cachedPrefixes != null) { log.debug("Using cached namespace prefixes for '{}': {}", path, cachedPrefixes); @@ -647,7 +647,7 @@ private void loadNamespaces() { loadNamespaces(prefixes, "", getNode()); namespacePrefixes = prefixes.toArray(new String[prefixes.size()]); - // Cache the discovered prefixes for this aggregate path + // Cache the discovered prefixes for this aggregate path (bounded LRU cache) mgr.cacheAggregatePrefixes(path, namespacePrefixes); // set if and only if in DEBUG level diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java index 229d9c693..0ff616e59 100644 --- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java +++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/AggregateManagerImpl.java @@ -36,7 +36,9 @@ import java.util.Collections; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -137,10 +139,17 @@ public class AggregateManagerImpl implements AggregateManager { private final ConcurrentHashMap namespacePrefixCache = new ConcurrentHashMap<>(); /** - * Cache of namespace prefixes per aggregate path. This cache stores the discovered prefixes - * for each aggregate path to avoid re-walking the same subtrees. + * Default maximum size for the aggregate namespace prefix cache. + * This limits memory usage while still providing performance benefits for frequently accessed paths. */ - private final ConcurrentHashMap aggregateNamespaceCache = new ConcurrentHashMap<>(); + private static final int DEFAULT_AGGREGATE_CACHE_SIZE = 1000; + + /** + * Bounded LRU cache of namespace prefixes per aggregate path. + * This cache stores the discovered prefixes for frequently accessed aggregate paths + * to avoid re-walking the same subtrees, while limiting memory usage through LRU eviction. + */ + private final Map aggregateNamespaceCache; /** * config @@ -313,6 +322,16 @@ private AggregateManagerImpl( aggregatorProvider = new AggregatorProvider(config.getAggregators()); artifactHandlers = Collections.unmodifiableList(config.getHandlers()); + // Initialize bounded LRU cache for aggregate namespace prefixes + int cacheSize = getAggregateCacheSizeFromConfig(); + this.aggregateNamespaceCache = + Collections.synchronizedMap(new LinkedHashMap(cacheSize + 1, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > cacheSize; + } + }); + // init root node Aggregator rootAggregator = rootNode.getDepth() == 0 ? new RootAggregator() : getAggregator(rootNode, null); root = new AggregateImpl(this, rootNode.getPath(), rootAggregator); @@ -389,8 +408,33 @@ public Set getCachedNamespacePrefixes() { return namespacePrefixCache.keySet(); } + /** + * Gets the cache size configuration for the aggregate namespace cache. + * + * @return the maximum cache size, defaulting to {@link #DEFAULT_AGGREGATE_CACHE_SIZE} + */ + private int getAggregateCacheSizeFromConfig() { + String cacheSizeStr = config.getProperty("aggregateNamespaceCacheSize"); + if (cacheSizeStr != null) { + try { + int size = Integer.parseInt(cacheSizeStr); + if (size > 0) { + log.info("Using configured aggregate namespace cache size: {}", size); + return size; + } + } catch (NumberFormatException e) { + log.warn( + "Invalid aggregate cache size '{}', using default: {}", + cacheSizeStr, + DEFAULT_AGGREGATE_CACHE_SIZE); + } + } + return DEFAULT_AGGREGATE_CACHE_SIZE; + } + /** * Gets cached namespace prefixes for a specific aggregate path. + * Uses a bounded LRU cache to prevent unbounded memory growth. * * @param path the aggregate path * @return the cached prefixes, or null if not cached @@ -401,6 +445,7 @@ public String[] getCachedAggregatePrefixes(String path) { /** * Caches namespace prefixes for a specific aggregate path. + * Uses a bounded LRU cache with automatic eviction of least recently used entries. * * @param path the aggregate path * @param prefixes the namespace prefixes to cache @@ -408,11 +453,33 @@ public String[] getCachedAggregatePrefixes(String path) { public void cacheAggregatePrefixes(String path, String[] prefixes) { if (path != null && prefixes != null) { aggregateNamespaceCache.put(path, prefixes); + if (log.isTraceEnabled()) { + log.trace( + "Cached namespace prefixes for path '{}': {} (cache size: {})", + path, + prefixes, + aggregateNamespaceCache.size()); + } + } + } + + /** + * Invalidates the aggregate namespace cache for a specific path. + * This should be called when content at that path is modified. + * + * @param path the aggregate path to invalidate + */ + public void invalidateAggregatePrefixes(String path) { + if (path != null) { + String[] removed = aggregateNamespaceCache.remove(path); + if (removed != null) { + log.debug("Invalidated namespace cache for path: {}", path); + } } } /** - * Invalidates the namespace caches. This should be called if namespace + * Invalidates all namespace caches. This should be called if namespace * definitions are added or modified in the repository. */ public void invalidateNamespaceCaches() { @@ -426,19 +493,6 @@ public void invalidateNamespaceCaches() { initNamespaceCache(); } - /** - * Invalidates the aggregate namespace cache for a specific path. - * This should be called when content at that path is modified. - * - * @param path the aggregate path to invalidate - */ - public void invalidateAggregatePrefixes(String path) { - if (path != null) { - aggregateNamespaceCache.remove(path); - log.debug("Invalidated namespace cache for path: {}", path); - } - } - public void startTracking(ProgressTrackerListener pTracker) { tracker = new AggregatorTracker(pTracker); } diff --git a/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/BoundedAggregateCacheTest.java b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/BoundedAggregateCacheTest.java new file mode 100644 index 000000000..ad77eefb7 --- /dev/null +++ b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/BoundedAggregateCacheTest.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.vault.fs.impl; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Tests for the bounded LRU cache behavior used for aggregate namespace prefixes. + * This tests the memory-safe caching strategy that prevents unbounded growth + * while still providing performance benefits through LRU eviction. + */ +public class BoundedAggregateCacheTest { + + private Map cache; + private static final int CACHE_SIZE = 10; + + @Before + public void setUp() { + // Create a bounded LRU cache similar to what AggregateManagerImpl uses + cache = Collections.synchronizedMap(new LinkedHashMap(CACHE_SIZE + 1, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > CACHE_SIZE; + } + }); + } + + @Test + public void testCacheHitAndMiss() { + String path1 = "/content/test/page1"; + String[] prefixes1 = new String[] {"jcr", "nt", "cq"}; + + // Cache miss initially + assertNull("Cache should be empty initially", cache.get(path1)); + + // Cache the prefixes + cache.put(path1, prefixes1); + + // Cache hit + String[] cached = cache.get(path1); + assertNotNull("Cached prefixes should not be null", cached); + assertArrayEquals("Cached prefixes should match", prefixes1, cached); + } + + @Test + public void testCacheInvalidation() { + String path1 = "/content/test/page1"; + String[] prefixes1 = new String[] {"jcr", "nt"}; + + // Cache the prefixes + cache.put(path1, prefixes1); + assertNotNull("Cache should contain entry", cache.get(path1)); + + // Invalidate specific path + cache.remove(path1); + assertNull("Cache should be empty after invalidation", cache.get(path1)); + } + + @Test + public void testCacheBoundedSize() { + // Fill the cache to max capacity + for (int i = 0; i < CACHE_SIZE; i++) { + String path = "/content/test/page" + i; + String[] prefixes = new String[] {"jcr", "nt"}; + cache.put(path, prefixes); + } + + assertEquals("Cache size should be at max capacity", CACHE_SIZE, cache.size()); + + // Add one more entry - should trigger LRU eviction + String newPath = "/content/test/page" + CACHE_SIZE; + String[] newPrefixes = new String[] {"jcr", "sling"}; + cache.put(newPath, newPrefixes); + + // Cache size should still be at max (oldest entry evicted) + assertEquals("Cache size should not exceed max capacity", CACHE_SIZE, cache.size()); + + // The newest entry should be present + assertNotNull("Newest entry should be in cache", cache.get(newPath)); + + // The oldest entry (page0) should have been evicted + assertNull("Oldest entry should have been evicted", cache.get("/content/test/page0")); + } + + @Test + public void testLRUBehavior() { + // Add entries up to capacity + for (int i = 0; i < CACHE_SIZE; i++) { + cache.put("/path" + i, new String[] {"jcr"}); + } + + // Access path0 to make it "recently used" + cache.get("/path0"); + + // Add one more entry, forcing eviction + cache.put("/path" + CACHE_SIZE, new String[] {"jcr"}); + + // path0 should still be present because we accessed it (LRU) + // path1 should have been evicted (least recently used after path0 was accessed) + assertNotNull("Accessed entry should still be in cache", cache.get("/path0")); + assertNull("Least recently used entry should be evicted", cache.get("/path1")); + } + + @Test + public void testCacheWithMultipleEvictions() { + // Fill cache to capacity + for (int i = 0; i < CACHE_SIZE; i++) { + cache.put("/path" + i, new String[] {"jcr"}); + } + + // Add 5 more entries, which should evict the 5 oldest + for (int i = CACHE_SIZE; i < CACHE_SIZE + 5; i++) { + cache.put("/path" + i, new String[] {"jcr"}); + } + + assertEquals("Cache size should still be at max", CACHE_SIZE, cache.size()); + + // First 5 entries should be evicted + for (int i = 0; i < 5; i++) { + assertNull("Old entry should be evicted", cache.get("/path" + i)); + } + + // Entries 5-14 should still be present + for (int i = 5; i < CACHE_SIZE + 5; i++) { + assertNotNull("Recent entry should still be in cache", cache.get("/path" + i)); + } + } + + @Test + public void testConcurrentAccess() throws Exception { + final int threadCount = 10; + final int operationsPerThread = 100; + + Thread[] threads = new Thread[threadCount]; + for (int t = 0; t < threadCount; t++) { + final int threadId = t; + threads[t] = new Thread(() -> { + for (int i = 0; i < operationsPerThread; i++) { + String path = "/content/thread" + threadId + "/path" + i; + String[] prefixes = new String[] {"jcr", "nt", "thread" + threadId}; + + // Cache operation + cache.put(path, prefixes); + + // Retrieve operation + cache.get(path); + + // Invalidate some entries + if (i % 10 == 0) { + cache.remove(path); + } + } + }); + } + + // Start all threads + for (Thread thread : threads) { + thread.start(); + } + + // Wait for all threads to complete + for (Thread thread : threads) { + thread.join(5000); // 5 second timeout + } + + // Verify cache is still functional and bounded + assertTrue("Cache size should not exceed max", cache.size() <= CACHE_SIZE); + cache.put("/test", new String[] {"jcr"}); + assertNotNull("Cache should still be functional", cache.get("/test")); + } + + @Test + public void testAccessOrderPreservation() { + // Fill cache + for (int i = 0; i < CACHE_SIZE; i++) { + cache.put("/path" + i, new String[] {"prefix" + i}); + } + + // Access some entries in a specific order to make them more recent + cache.get("/path5"); + cache.get("/path3"); + cache.get("/path7"); + + // Add 3 new entries, forcing eviction of the 3 least recently used + cache.put("/newpath1", new String[] {"new1"}); + cache.put("/newpath2", new String[] {"new2"}); + cache.put("/newpath3", new String[] {"new3"}); + + // The accessed entries should still be present + assertNotNull("Accessed entry should survive", cache.get("/path5")); + assertNotNull("Accessed entry should survive", cache.get("/path3")); + assertNotNull("Accessed entry should survive", cache.get("/path7")); + + // Some of the non-accessed entries should be evicted + int evictedCount = 0; + for (int i = 0; i < CACHE_SIZE; i++) { + if (cache.get("/path" + i) == null) { + evictedCount++; + } + } + assertEquals("Should have evicted 3 old entries", 3, evictedCount); + } +}