@@ -105,14 +105,6 @@ def _normalize_cvcuda(
105105 elif len (std ) != channels :
106106 raise ValueError (f"Std should have { channels } elements. Got { len (std )} ." )
107107
108- # CV-CUDA requires float32 tensors for the mean/std parameters
109- # at small batchs, this is costly relative to normalize operation
110- # if CV-CUDA is known to be a backend, could optimize this
111- # For Normalize class:
112- # by creating tensors at class initialization time
113- # For functional API:
114- # by storing cached tensors in helper function with functools.lru_cache (would it even be worth it?)
115- # Since CV-CUDA is 1) not default backend, 2) only strictly faster at large batch size, ignore
116108 mt = torch .as_tensor (mean , dtype = torch .float32 ).reshape (1 , 1 , 1 , channels ).cuda ()
117109 st = torch .as_tensor (std , dtype = torch .float32 ).reshape (1 , 1 , 1 , channels ).cuda ()
118110 mean_cv = cvcuda .as_tensor (mt , cvcuda .TensorLayout .NHWC )
@@ -122,7 +114,7 @@ def _normalize_cvcuda(
122114
123115
124116if CVCUDA_AVAILABLE :
125- _normalize_cvcuda_registered = _register_kernel_internal (normalize , _import_cvcuda ().Tensor )(_normalize_cvcuda )
117+ _register_kernel_internal (normalize , _import_cvcuda ().Tensor )(_normalize_cvcuda )
126118
127119
128120def gaussian_blur (inpt : torch .Tensor , kernel_size : list [int ], sigma : Optional [list [float ]] = None ) -> torch .Tensor :
0 commit comments