gaussian_noise cvcuda backend

justincdavis · justincdavis · commit 01460443f648 · 2025-12-04T13:26:15.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -4075,14 +4075,28 @@ def test_kernel_uint8(self, make_input):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image, make_video],
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
+        ],
     )
     def test_functional_float(self, make_input):
         check_functional(F.gaussian_noise, make_input(dtype=torch.float32))
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image, make_video],
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
+        ],
     )
     def test_functional_uint8(self, make_input):
         check_functional(F.gaussian_noise, make_input(dtype=torch.uint8))
@@ -4093,14 +4107,28 @@ def test_functional_uint8(self, make_input):
             (F.gaussian_noise, torch.Tensor),
             (F.gaussian_noise_image, tv_tensors.Image),
             (F.gaussian_noise_video, tv_tensors.Video),
+            pytest.param(
+                F._misc._gaussian_noise_cvcuda,
+                "cvcuda.Tensor",
+                marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available"),
+            ),
         ],
     )
     def test_functional_signature(self, kernel, input_type):
+        if input_type == "cvcuda.Tensor":
+            input_type = _import_cvcuda().Tensor
         check_functional_kernel_signature_match(F.gaussian_noise, kernel=kernel, input_type=input_type)
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image, make_video],
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
+        ],
     )
     def test_transform_float(self, make_input):
         def adapter(_, input, __):
@@ -4118,7 +4146,14 @@ def adapter(_, input, __):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image, make_video],
+        [
+            make_image_tensor,
+            make_image,
+            make_video,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CV-CUDA not available")
+            ),
+        ],
     )
     def test_transform_uint8(self, make_input):
         def adapter(_, input, __):
diff --git a/torchvision/transforms/v2/functional/_misc.py b/torchvision/transforms/v2/functional/_misc.py
@@ -238,6 +238,35 @@ def _gaussian_noise_pil(
     raise ValueError("Gaussian Noise is not implemented for PIL images.")
 
 
+def _gaussian_noise_cvcuda(
+    image: "cvcuda.Tensor",
+    mean: float = 0.0,
+    sigma: float = 0.1,
+    clip: bool = True,
+) -> "cvcuda.Tensor":
+    cvcuda = _import_cvcuda()
+
+    batch_size = image.shape[0]
+    mu_tensor = cvcuda.as_tensor(torch.full((batch_size,), mean, dtype=torch.float32).cuda(), "N")
+    sigma_tensor = cvcuda.as_tensor(torch.full((batch_size,), sigma, dtype=torch.float32).cuda(), "N")
+
+    # per-channel means each channel gets unique random noise, same behavior as torch.randn_like
+    # produce a seed with torch RNG, if seed is manually set then this will be deterministic
+    # note: clip is not supported in CV-CUDA, so we don't need to clamp the values
+    # by default, clamping is done for floats, and uint8 overflows so is clamped from 0-255 anyways
+    return cvcuda.gaussiannoise(
+        image,
+        mu=mu_tensor,
+        sigma=sigma_tensor,
+        per_channel=True,
+        seed=int(torch.empty((), dtype=torch.int64).random_().item()),
+    )
+
+
+if CVCUDA_AVAILABLE:
+    _register_kernel_internal(gaussian_noise, _import_cvcuda().Tensor)(_gaussian_noise_cvcuda)
+
+
 def to_dtype(inpt: torch.Tensor, dtype: torch.dtype = torch.float, scale: bool = False) -> torch.Tensor:
     """See :func:`~torchvision.transforms.v2.ToDtype` for details."""
     if torch.jit.is_scripting():