From c46e160eea2049b60883461554dbb42a82995885 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 04:06:08 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Parallelize=20source=20JAR?= =?UTF-8?q?=20resolution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 💡 What: Switched `SourcesJarLocator` to use `parallelStream()` for fetching source JARs. Updated `GraphMemoizator` and `SourcesJarLocator` caches to use `ConcurrentHashMap` for thread safety. Updated `SourcesLocatorTest` fake implementation to be thread-safe. 🎯 Why: The resolution of source JARs involves blocking network I/O (HTTP HEAD requests). Doing this sequentially for a large number of dependencies is inefficient. Parallelizing this process significantly reduces the total time required to resolve sources. 📊 Impact: - Reduces time to resolve source JARs by up to N times (where N is the number of parallel threads), limited by network bandwidth and pool size. 🔬 Measurement: - Verify by running `bazel test //resolver:sources_locator_test`. - Observable speedup when running `bazel run //:pin` on a project with many dependencies. --- .jules/bolt.md | 3 +++ .../evendanan/bazel/mvn/merger/GraphMemoizator.java | 12 +++--------- .../bazel/mvn/merger/SourcesJarLocator.java | 6 +++--- .../bazel/mvn/merger/SourcesLocatorTest.java | 5 +++-- 4 files changed, 12 insertions(+), 14 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..145e78c --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-23 - [Parallel Stream Safety] +**Learning:** Switching to `parallelStream` requires careful audit of all shared mutable state, even in test helpers. I learned that `GraphMemoizator` was using a non-thread-safe `HashMap`, and `SourcesLocatorTest`'s `FakeOpener` also used `HashMap`. +**Action:** Always check thread-safety of underlying data structures when introducing parallel streams, including abstract classes and test doubles. diff --git a/resolver/src/main/java/net/evendanan/bazel/mvn/merger/GraphMemoizator.java b/resolver/src/main/java/net/evendanan/bazel/mvn/merger/GraphMemoizator.java index eaa0a21..b3f5e4f 100644 --- a/resolver/src/main/java/net/evendanan/bazel/mvn/merger/GraphMemoizator.java +++ b/resolver/src/main/java/net/evendanan/bazel/mvn/merger/GraphMemoizator.java @@ -1,11 +1,11 @@ package net.evendanan.bazel.mvn.merger; -import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import javax.annotation.Nonnull; public abstract class GraphMemoizator { - private final Map cache = new HashMap<>(); + private final Map cache = new ConcurrentHashMap<>(); @Nonnull protected abstract T calculate(@Nonnull T original); @@ -13,13 +13,7 @@ public abstract class GraphMemoizator { @Nonnull public T map(@Nonnull T original) { final String key = getKeyForObject(original); - if (cache.containsKey(key)) { - return cache.get(key); - } else { - final T revised = calculate(original); - cache.put(key, revised); - return revised; - } + return cache.computeIfAbsent(key, k -> calculate(original)); } protected abstract String getKeyForObject(final T object); diff --git a/resolver/src/main/java/net/evendanan/bazel/mvn/merger/SourcesJarLocator.java b/resolver/src/main/java/net/evendanan/bazel/mvn/merger/SourcesJarLocator.java index 035ec1b..f86a27a 100644 --- a/resolver/src/main/java/net/evendanan/bazel/mvn/merger/SourcesJarLocator.java +++ b/resolver/src/main/java/net/evendanan/bazel/mvn/merger/SourcesJarLocator.java @@ -5,9 +5,9 @@ import java.net.HttpURLConnection; import java.net.URL; import java.util.Collection; -import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import javax.annotation.Nonnull; import net.evendanan.bazel.mvn.api.DependencyTools; @@ -18,7 +18,7 @@ public class SourcesJarLocator { private static final String SOURCES_CLASSIFIER = "sources"; private final ConnectionFactory mConnectionFactory; - private final Map mURLCache = new HashMap<>(); + private final Map mURLCache = new ConcurrentHashMap<>(); public SourcesJarLocator() { this(url -> (HttpURLConnection) url.openConnection()); @@ -31,7 +31,7 @@ public SourcesJarLocator() { private static Collection fillSourcesAttribute( Collection dependencies, DependencyMemoizator memoizator) { - return dependencies.stream().map(memoizator::map).collect(Collectors.toList()); + return dependencies.parallelStream().map(memoizator::map).collect(Collectors.toList()); } public Collection fillSourcesAttribute(Collection dependencies) { diff --git a/resolver/src/test/java/net/evendanan/bazel/mvn/merger/SourcesLocatorTest.java b/resolver/src/test/java/net/evendanan/bazel/mvn/merger/SourcesLocatorTest.java index d20dac7..175c318 100644 --- a/resolver/src/test/java/net/evendanan/bazel/mvn/merger/SourcesLocatorTest.java +++ b/resolver/src/test/java/net/evendanan/bazel/mvn/merger/SourcesLocatorTest.java @@ -151,8 +151,9 @@ public void testOnlyQueriesURIOnce() throws Exception { private static class FakeOpener implements net.evendanan.bazel.mvn.merger.SourcesJarLocator.ConnectionFactory { - private final Map buildsCounter = new HashMap<>(); - private final Map returnedConnections = new HashMap<>(); + private final Map buildsCounter = new java.util.concurrent.ConcurrentHashMap<>(); + private final Map returnedConnections = + new java.util.concurrent.ConcurrentHashMap<>(); private boolean openFailure = false;