From b9300267f7060e954f2eea576e42997790490c33 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 12:38:24 +0000 Subject: [PATCH] Optimize compute_conv_output_shape The optimized code achieves a 94% speedup by eliminating expensive NumPy array operations and replacing them with efficient Python list comprehensions and native arithmetic. **Key optimizations:** 1. **Eliminated unnecessary NumPy array conversions**: The original code converted `spatial_shape`, `kernel_shape[:-2]`, and `dilation_rate` to NumPy arrays, even when only basic indexing and arithmetic were needed. The optimized version keeps these as native Python tuples/lists. 2. **Replaced vectorized NumPy operations with list comprehensions**: The most expensive operations were the NumPy vectorized calculations for `output_spatial_shape`. These are now computed element-wise using list comprehensions with explicit indexing, avoiding NumPy's overhead for small arrays (typically 1-3 dimensions). 3. **Streamlined None dimension handling**: Instead of mutating a NumPy array in a loop to handle None dimensions, the optimized version uses a single list comprehension to identify None positions and a tuple comprehension to create the calculation-ready spatial shape. 4. **Eliminated redundant array operations**: Removed the final `[int(i) for i in output_spatial_shape]` conversion since the list comprehensions already produce integers directly. **Why this works**: For small arrays (1-3D convolutions are most common), NumPy's vectorization overhead outweighs its benefits. The function references show this is called from convolutional layer constructors during model building, where the 94% speedup significantly improves model initialization time. The optimization is particularly effective for the common test cases with valid/same padding, showing 70-100% improvements across different input configurations. --- keras/src/ops/operation_utils.py | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/keras/src/ops/operation_utils.py b/keras/src/ops/operation_utils.py index b1ac2621de0a..231c9a2b4924 100644 --- a/keras/src/ops/operation_utils.py +++ b/keras/src/ops/operation_utils.py @@ -197,25 +197,18 @@ def compute_conv_output_shape( f"`dilation_rate={dilation_rate}` and " f"input of shape {input_shape}." ) - none_dims = [] - spatial_shape = np.array(spatial_shape) - for i in range(len(spatial_shape)): - if spatial_shape[i] is None: - # Set `None` shape to a manual value so that we can run numpy - # computation on `spatial_shape`. - spatial_shape[i] = -1 - none_dims.append(i) + none_dims = [i for i, s in enumerate(spatial_shape) if s is None] + spatial_calc = tuple(-1 if s is None else s for s in spatial_shape) - kernel_spatial_shape = np.array(kernel_shape[:-2]) - dilation_rate = np.array(dilation_rate) + kernel_spatial_shape = kernel_shape[:-2] if padding == "valid": - output_spatial_shape = ( - np.floor( - (spatial_shape - dilation_rate * (kernel_spatial_shape - 1) - 1) - / strides - ) - + 1 - ) + output_spatial_shape = [ + int(np.floor( + (spatial_calc[i] - dilation_rate[i] * (kernel_spatial_shape[i] - 1) - 1) + / strides[i] + 1 + )) if spatial_calc[i] != -1 else -1 + for i in range(len(spatial_shape)) + ] for i in range(len(output_spatial_shape)): if i not in none_dims and output_spatial_shape[i] < 0: raise ValueError( @@ -225,13 +218,16 @@ def compute_conv_output_shape( f"`dilation_rate={dilation_rate}`." ) elif padding == "same" or padding == "causal": - output_spatial_shape = np.floor((spatial_shape - 1) / strides) + 1 + output_spatial_shape = [ + int(np.floor((spatial_calc[i] - 1) / strides[i]) + 1) + if spatial_calc[i] != -1 else -1 + for i in range(len(spatial_shape)) + ] else: raise ValueError( "`padding` must be either `'valid'` or `'same'`. Received " f"{padding}." ) - output_spatial_shape = [int(i) for i in output_spatial_shape] for i in none_dims: output_spatial_shape[i] = None output_spatial_shape = tuple(output_spatial_shape)