initial cvcuda normalize kernel implementation

justincdavis · justincdavis · commit 1e864d86fbd8 · 2025-12-04T11:07:40.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -5633,6 +5633,79 @@ def test_correctness_image(self, mean, std, dtype, fn):
         assert_equal(actual, expected)
 
 
+@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+@needs_cuda
+class TestNormalizeCVCUDA:
+    MEANS_STDS = {
+        "RGB": TestNormalize.MEANS_STDS,
+        "GRAY": [([0.5], [2.0])],
+    }
+    MEAN_STD = {
+        "RGB": MEANS_STDS["RGB"][0],
+        "GRAY": MEANS_STDS["GRAY"][0],
+    }
+
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
+    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
+    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
+    def test_functional(self, color_space, batch_dims, dtype):
+        means_stds = self.MEANS_STDS[color_space]
+        for mean, std in means_stds:
+            image = make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims)
+            check_functional(F.normalize, image, mean=mean, std=std)
+
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
+    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
+    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
+    def test_functional_scalar(self, color_space, batch_dims, dtype):
+        image = make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims)
+        check_functional(F.normalize, image, mean=0.5, std=2.0)
+
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
+    @pytest.mark.parametrize("batch_dims", [(1,)])
+    def test_functional_error(self, dtype, batch_dims):
+        rgb_mean, rgb_std = self.MEAN_STD["RGB"]
+        gray_mean, gray_std = self.MEAN_STD["GRAY"]
+
+        with pytest.raises(ValueError, match="Inplace normalization is not supported for CVCUDA."):
+            F.normalize(make_image_cvcuda(batch_dims=batch_dims, dtype=dtype), mean=rgb_mean, std=rgb_std, inplace=True)
+
+        with pytest.raises(ValueError, match="Mean should have 3 elements. Got 1."):
+            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="RGB", dtype=dtype), mean=gray_mean, std=rgb_std)
+
+        with pytest.raises(ValueError, match="Std should have 3 elements. Got 1."):
+            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="RGB", dtype=dtype), mean=rgb_mean, std=gray_std)
+
+        with pytest.raises(ValueError, match="Mean should have 1 elements. Got 3."):
+            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="GRAY", dtype=dtype), mean=rgb_mean, std=gray_std)
+
+        with pytest.raises(ValueError, match="Std should have 1 elements. Got 3."):
+            F.normalize(make_image_cvcuda(batch_dims=batch_dims, color_space="GRAY", dtype=dtype), mean=gray_mean, std=rgb_std)
+
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.uint16, torch.float32])
+    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
+    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
+    def test_transform(self, dtype, color_space, batch_dims):
+        means_stds = self.MEANS_STDS[color_space]
+        for mean, std in means_stds:
+            check_transform(
+                transforms.Normalize(mean=mean, std=std),
+                make_image_cvcuda(color_space=color_space, dtype=dtype, batch_dims=batch_dims),
+            )
+
+    @pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
+    def test_correctness_image(self, batch_dims):
+        mean, std = self.MEAN_STD["RGB"]
+        torch_image = make_image(batch_dims=batch_dims, dtype=torch.float32, device="cuda")
+        cvc_image = F.to_cvcuda_tensor(torch_image)
+
+        gold = F.normalize(torch_image, mean=mean, std=std)
+        image = F.normalize(cvc_image, mean=mean, std=std)
+        image = F.cvcuda_to_tensor(image)
+
+        assert_close(image, gold, rtol=1e-7, atol=1e-7)
+
+
 class TestClampBoundingBoxes:
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("clamping_mode", ("soft", "hard", None))
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
@@ -153,6 +153,7 @@
     gaussian_noise_image,
     gaussian_noise_video,
     normalize,
+    normalize_cvcuda,
     normalize_image,
     normalize_video,
     sanitize_bounding_boxes,
diff --git a/torchvision/transforms/v2/functional/_misc.py b/torchvision/transforms/v2/functional/_misc.py
@@ -79,6 +79,41 @@ def normalize_video(video: torch.Tensor, mean: list[float], std: list[float], in
     return normalize_image(video, mean, std, inplace=inplace)
 
 
+def normalize_cvcuda(
+    image: "cvcuda.Tensor",
+    mean: Sequence[float | int] | float | int,
+    std: Sequence[float | int] | float | int,
+    inplace: bool = False,
+) -> "cvcuda.Tensor":
+    if inplace:
+        raise ValueError("Inplace normalization is not supported for CVCUDA.")
+
+    channels = image.shape[3]
+    if isinstance(mean, float | int):
+        mean = [mean] * channels
+    elif len(mean) != channels:
+        raise ValueError(f"Mean should have {channels} elements. Got {len(mean)}.")
+    if isinstance(std, float | int):
+        std = [std] * channels
+    elif len(std) != channels:
+        raise ValueError(f"Std should have {channels} elements. Got {len(std)}.")
+
+    mean = torch.as_tensor(mean, dtype=torch.float32)
+    std = torch.as_tensor(std, dtype=torch.float32)
+    mean_tensor = mean.reshape(1, 1, 1, channels)
+    std_tensor = std.reshape(1, 1, 1, channels)
+    mean_tensor = mean_tensor.cuda()
+    std_tensor = std_tensor.cuda()
+    mean_cv = cvcuda.as_tensor(mean_tensor, cvcuda.TensorLayout.NHWC)
+    std_cv = cvcuda.as_tensor(std_tensor, cvcuda.TensorLayout.NHWC)
+
+    return cvcuda.normalize(image, base=mean_cv, scale=std_cv, flags=cvcuda.NormalizeFlags.SCALE_IS_STDDEV)
+
+
+if CVCUDA_AVAILABLE:
+    _normalize_cvcuda = _register_kernel_internal(normalize, cvcuda.Tensor)(normalize_cvcuda)
+
+
 def gaussian_blur(inpt: torch.Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.GaussianBlur` for details."""
     if torch.jit.is_scripting():