add docstring for explain scaling setup, combine correctness checks

justincdavis · justincdavis · commit 42019eb8b389 · 2025-12-04T10:44:09.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -2712,21 +2712,74 @@ def fn(value):
     @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("scale", (True, False))
-    def test_image_correctness(self, input_dtype, output_dtype, device, scale):
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+            ),
+        ],
+    )
+    def test_image_correctness(self, input_dtype, output_dtype, device, scale, make_input):
         if input_dtype.is_floating_point and output_dtype == torch.int64:
             pytest.xfail("float to int64 conversion is not supported")
         if input_dtype == torch.uint8 and output_dtype == torch.uint16 and device == "cuda":
             pytest.xfail("uint8 to uint16 conversion is not supported on cuda")
+        if input_dtype == torch.uint8 and output_dtype == torch.uint16 and scale and make_input == make_image_cvcuda:
+            pytest.xfail("uint8 to uint16 conversion with scale is not supported in F._misc._to_dtype_cvcuda")
 
-        input = make_image(dtype=input_dtype, device=device)
-
+        input = make_input(dtype=input_dtype, device=device)
         out = F.to_dtype(input, dtype=output_dtype, scale=scale)
-        expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale)
 
-        if input_dtype.is_floating_point and not output_dtype.is_floating_point and scale:
-            torch.testing.assert_close(out, expected, atol=1, rtol=0)
-        else:
-            torch.testing.assert_close(out, expected)
+        if isinstance(input, torch.Tensor):
+            expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale)
+            if input_dtype.is_floating_point and not output_dtype.is_floating_point and scale:
+                torch.testing.assert_close(out, expected, atol=1, rtol=0)
+            else:
+                torch.testing.assert_close(out, expected)
+        else:  # cvcuda.Tensor
+            expected = self.reference_convert_dtype_image_tensor(
+                F.cvcuda_to_tensor(input), dtype=output_dtype, scale=scale
+            )
+            out = F.cvcuda_to_tensor(out)
+            # there are some differences in dtype conversion between torchvision and cvcuda
+            # due to different rounding behavior when converting between types with different bit widths
+            # Check if we're converting to a type with more bits (without scaling)
+            in_bits = torch.iinfo(input_dtype).bits if not input_dtype.is_floating_point else None
+            out_bits = torch.iinfo(output_dtype).bits if not output_dtype.is_floating_point else None
+
+            if scale:
+                if input_dtype.is_floating_point and not output_dtype.is_floating_point:
+                    # float -> int with scaling: allow for rounding differences
+                    torch.testing.assert_close(out, expected, atol=1, rtol=0)
+                elif input_dtype == torch.uint16 and output_dtype == torch.uint8:
+                    # uint16 -> uint8 with scaling: allow large differences
+                    torch.testing.assert_close(out, expected, atol=255, rtol=0)
+                else:
+                    torch.testing.assert_close(out, expected)
+            else:
+                if in_bits is not None and out_bits is not None and out_bits > in_bits:
+                    # uint to larger uint without scaling: allow large differences due to bit expansion
+                    if input_dtype == torch.uint8 and output_dtype == torch.uint16:
+                        torch.testing.assert_close(out, expected, atol=255, rtol=0)
+                    else:
+                        torch.testing.assert_close(out, expected, atol=1, rtol=0)
+                elif not input_dtype.is_floating_point and not output_dtype.is_floating_point:
+                    # uint to uint without scaling (same or smaller bits): allow for rounding
+                    if input_dtype == torch.uint16 and output_dtype == torch.uint8:
+                        # uint16 -> uint8 can have large differences due to bit reduction
+                        torch.testing.assert_close(out, expected, atol=255, rtol=0)
+                    else:
+                        torch.testing.assert_close(out, expected)
+                elif input_dtype.is_floating_point and not output_dtype.is_floating_point:
+                    # float -> uint without scaling: allow for rounding differences
+                    torch.testing.assert_close(out, expected, atol=1, rtol=0)
+                elif not input_dtype.is_floating_point and output_dtype.is_floating_point:
+                    # uint -> float without scaling: allow for rounding differences
+                    torch.testing.assert_close(out, expected, atol=1, rtol=0)
+                else:
+                    torch.testing.assert_close(out, expected)
 
     def was_scaled(self, inpt):
         # this assumes the target dtype is float
@@ -2825,65 +2878,6 @@ def test_uint16(self):
         assert_equal(F.to_dtype(img_float32, torch.uint8, scale=True), img_uint8)
         assert_close(F.to_dtype(img_uint8, torch.float32, scale=True), img_float32, rtol=0, atol=1e-2)
 
-    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
-    @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
-    @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
-    @pytest.mark.parametrize("device", cpu_and_cuda())
-    @pytest.mark.parametrize("scale", (True, False))
-    def test_cvcuda_parity(self, input_dtype, output_dtype, device, scale):
-        if input_dtype.is_floating_point and output_dtype == torch.int64:
-            pytest.xfail("float to int64 conversion is not supported")
-        if input_dtype == torch.uint8 and output_dtype == torch.uint16 and device == "cuda":
-            pytest.xfail("uint8 to uint16 conversion is not supported on cuda")
-        if input_dtype == torch.uint8 and output_dtype == torch.uint16 and scale:
-            pytest.xfail("uint8 to uint16 conversion with scale is not supported in F.to_dtype_image")
-
-        cvc_input = make_image_cvcuda(batch_dims=(1,), dtype=input_dtype, device=device)
-        torch_input = F.cvcuda_to_tensor(cvc_input)
-
-        out = F.to_dtype(cvc_input, dtype=output_dtype, scale=scale)
-        out = F.cvcuda_to_tensor(out)
-
-        expected = F.to_dtype(torch_input, dtype=output_dtype, scale=scale)
-
-        # there are some differences in dtype conversion between torchvision and cvcuda
-        # due to different rounding behavior when converting between types with different bit widths
-        # Check if we're converting to a type with more bits (without scaling)
-        in_bits = torch.iinfo(input_dtype).bits if not input_dtype.is_floating_point else None
-        out_bits = torch.iinfo(output_dtype).bits if not output_dtype.is_floating_point else None
-
-        if scale:
-            if input_dtype.is_floating_point and not output_dtype.is_floating_point:
-                # float -> int with scaling: allow for rounding differences
-                torch.testing.assert_close(out, expected, atol=1, rtol=0)
-            elif input_dtype == torch.uint16 and output_dtype == torch.uint8:
-                # uint16 -> uint8 with scaling: allow large differences
-                torch.testing.assert_close(out, expected, atol=255, rtol=0)
-            else:
-                torch.testing.assert_close(out, expected)
-        else:
-            if in_bits is not None and out_bits is not None and out_bits > in_bits:
-                # uint to larger uint without scaling: allow large differences due to bit expansion
-                if input_dtype == torch.uint8 and output_dtype == torch.uint16:
-                    torch.testing.assert_close(out, expected, atol=255, rtol=0)
-                else:
-                    torch.testing.assert_close(out, expected, atol=1, rtol=0)
-            elif not input_dtype.is_floating_point and not output_dtype.is_floating_point:
-                # uint to uint without scaling (same or smaller bits): allow for rounding
-                if input_dtype == torch.uint16 and output_dtype == torch.uint8:
-                    # uint16 -> uint8 can have large differences due to bit reduction
-                    torch.testing.assert_close(out, expected, atol=255, rtol=0)
-                else:
-                    torch.testing.assert_close(out, expected)
-            elif input_dtype.is_floating_point and not output_dtype.is_floating_point:
-                # float -> uint without scaling: allow for rounding differences
-                torch.testing.assert_close(out, expected, atol=1, rtol=0)
-            elif not input_dtype.is_floating_point and output_dtype.is_floating_point:
-                # uint -> float without scaling: allow for rounding differences
-                torch.testing.assert_close(out, expected, atol=1, rtol=0)
-            else:
-                torch.testing.assert_close(out, expected)
-
 
 class TestAdjustBrightness:
     _CORRECTNESS_BRIGHTNESS_FACTORS = [0.5, 0.0, 1.0, 5.0]
diff --git a/torchvision/transforms/v2/functional/_misc.py b/torchvision/transforms/v2/functional/_misc.py
@@ -376,6 +376,27 @@ def _to_dtype_cvcuda(
     dtype: torch.dtype,
     scale: bool = False,
 ) -> "cvcuda.Tensor":
+    """
+    Convert the dtype of a CV-CUDA tensor, based on a torch.dtype.
+
+    Args:
+        inpt: The CV-CUDA tensor to convert the dtype of.
+        dtype: The torch.dtype to convert the dtype to.
+        scale: Whether to scale the values to the new dtype.
+            There are four cases for the scaling setup:
+            1. float -> float
+            2. int -> int
+            3. float -> int
+            4. int -> float
+            If scale is True, the values will be scaled to the new dtype.
+            If scale is False, the values will not be scaled.
+            The scale values for float -> float and int -> int are 1.0 and 0.0 respectively.
+            The scale values for float -> int and int -> float are the maximum value of the new dtype.
+
+    Returns:
+        out (cvcuda.Tensor): The CV-CUDA tensor with the converted dtype.
+
+    """
     cvcuda = _import_cvcuda()
 
     dtype_in = _cvcuda_to_torch_dtypes[inpt.dtype]