From fc9c55fe7c1ca60f8c28d892bc3f561eb5d46091 Mon Sep 17 00:00:00 2001
From: Manish Patel <m.patel@kodiak.ai>
Date: Tue, 20 May 2025 12:36:29 -0700
Subject: [PATCH] Use ProcessPoolExecutor instead of ThreadPoolExecutor.

With the ThreadPoolExecutor, it was observed that the compile commands generator was effectively running single threaded, most likely due to the GIL. CPU utilization was only about one core. Using a ProcessPoolExecutor to work around the GIL seems to provide a substantial performance benefit - it was 6x faster when running on a powerful machine on a very large repo.
---
 refresh.template.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/refresh.template.py b/refresh.template.py
index 194f365e..4ab78648 100644
--- a/refresh.template.py
+++ b/refresh.template.py
@@ -1142,8 +1142,8 @@ def _convert_compile_commands(aquery_output):
 
     # Process each action from Bazelisms -> file paths and their clang commands
     # Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved
-    with concurrent.futures.ThreadPoolExecutor(
-        max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
+    with concurrent.futures.ProcessPoolExecutor(
+        max_workers=os.cpu_count() # Default before Python 3.13, after which it is os.process_cpu_count().
     ) as threadpool:
         outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)