From 9bd6ab46536bc533167a0c332ba24bd094dc9b7a Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 28 Nov 2025 15:10:39 +0000 Subject: [PATCH] Optimize _dummy_copy The optimization introduces **LRU caching** to the `get_fill_value` function, which eliminates redundant computations of the expensive `maybe_promote(dtype)` call. **What changed:** - Added `@functools.lru_cache(maxsize=128)` to a new `_get_fill_value_cached` function that wraps the original logic - Modified `get_fill_value` to delegate to the cached version - No behavioral changes - same inputs produce identical outputs **Why this speeds up the code:** The profiler shows `maybe_promote(dtype)` consuming 98.6% of `get_fill_value`'s runtime (67,850ns out of 68,839ns total). Since dtypes are immutable and fill values are deterministic, caching eliminates this repeated work. With caching, the optimized version shows `get_fill_value` taking only 39,964ns total - a **42% reduction** in this function's execution time. **Impact on workloads:** The `function_references` show `_dummy_copy` is called from `_iter_over_selections` in computation.py, which processes multiple selections over datasets/arrays. This creates a hot path where the same dtypes appear repeatedly, making the cache highly effective. The 6% overall speedup demonstrates the cumulative benefit when `get_fill_value` is called multiple times with the same dtype values. **Test case performance:** The annotated tests show 7-11% improvements in simple test cases, indicating the optimization is particularly effective for workloads with repeated dtype operations - exactly what the LRU cache is designed to accelerate. --- xarray/core/dtypes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index ccf84146819..e336a947dc4 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -101,8 +101,7 @@ def get_fill_value(dtype): ------- fill_value : Missing value corresponding to this dtype. """ - _, fill_value = maybe_promote(dtype) - return fill_value + return _get_fill_value_cached(dtype) def get_pos_infinity(dtype, max_for_int=False): @@ -193,3 +192,8 @@ def result_type( return np.dtype(object) return np.result_type(*arrays_and_dtypes) + +@functools.lru_cache(maxsize=128) +def _get_fill_value_cached(dtype): + _, fill_value = maybe_promote(dtype) + return fill_value