Skip to content

Commit d6711d3

Browse files
committed
perspective complete and tested
1 parent e51dc7e commit d6711d3

File tree

2 files changed

+111
-9
lines changed

2 files changed

+111
-9
lines changed

test/test_transforms_v2.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5129,6 +5129,9 @@ def test_kernel_video(self):
51295129
make_segmentation_mask,
51305130
make_video,
51315131
make_keypoints,
5132+
pytest.param(
5133+
make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA not available")
5134+
),
51325135
],
51335136
)
51345137
def test_functional(self, make_input):
@@ -5144,9 +5147,16 @@ def test_functional(self, make_input):
51445147
(F.perspective_mask, tv_tensors.Mask),
51455148
(F.perspective_video, tv_tensors.Video),
51465149
(F.perspective_keypoints, tv_tensors.KeyPoints),
5150+
pytest.param(
5151+
F._geometry._perspective_cvcuda,
5152+
"cvcuda.Tensor",
5153+
marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA not available"),
5154+
),
51475155
],
51485156
)
51495157
def test_functional_signature(self, kernel, input_type):
5158+
if input_type == "cvcuda.Tensor":
5159+
input_type = _import_cvcuda().Tensor
51505160
check_functional_kernel_signature_match(F.perspective, kernel=kernel, input_type=input_type)
51515161

51525162
@pytest.mark.parametrize("distortion_scale", [0.5, 0.0, 1.0])
@@ -5160,6 +5170,9 @@ def test_functional_signature(self, kernel, input_type):
51605170
make_segmentation_mask,
51615171
make_video,
51625172
make_keypoints,
5173+
pytest.param(
5174+
make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA not available")
5175+
),
51635176
],
51645177
)
51655178
def test_transform(self, distortion_scale, make_input):
@@ -5175,12 +5188,28 @@ def test_transform_error(self, distortion_scale):
51755188
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
51765189
)
51775190
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
5178-
def test_image_functional_correctness(self, coefficients, interpolation, fill):
5179-
image = make_image(dtype=torch.uint8, device="cpu")
5191+
@pytest.mark.parametrize(
5192+
"make_input",
5193+
[
5194+
make_image,
5195+
pytest.param(
5196+
make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA not available")
5197+
),
5198+
],
5199+
)
5200+
def test_image_functional_correctness(self, coefficients, interpolation, fill, make_input):
5201+
image = make_input(dtype=torch.uint8, device="cpu")
51805202

51815203
actual = F.perspective(
51825204
image, startpoints=None, endpoints=None, coefficients=coefficients, interpolation=interpolation, fill=fill
51835205
)
5206+
if make_input is make_image_cvcuda:
5207+
actual = F.cvcuda_to_tensor(actual).to(device="cpu")
5208+
actual = actual.squeeze(0)
5209+
# drop the batch dimension
5210+
image = F.cvcuda_to_tensor(image).to(device="cpu")
5211+
image = image.squeeze(0)
5212+
51845213
expected = F.to_image(
51855214
F.perspective(
51865215
F.to_pil_image(image),
@@ -5192,13 +5221,20 @@ def test_image_functional_correctness(self, coefficients, interpolation, fill):
51925221
)
51935222
)
51945223

5195-
if interpolation is transforms.InterpolationMode.BILINEAR:
5196-
abs_diff = (actual.float() - expected.float()).abs()
5197-
assert (abs_diff > 1).float().mean() < 7e-2
5198-
mae = abs_diff.mean()
5199-
assert mae < 3
5200-
else:
5201-
assert_equal(actual, expected)
5224+
if make_input is make_image:
5225+
if interpolation is transforms.InterpolationMode.BILINEAR:
5226+
abs_diff = (actual.float() - expected.float()).abs()
5227+
assert (abs_diff > 1).float().mean() < 7e-2
5228+
mae = abs_diff.mean()
5229+
assert mae < 3
5230+
else:
5231+
assert_equal(actual, expected)
5232+
else: # CV-CUDA
5233+
# just check that the shapes/dtypes are the same, cvcuda warp_perspective uses different algorithm
5234+
# visually the results are the same on real images,
5235+
# realistically, the diff is not visible to the human eye
5236+
tolerance = 255 if interpolation is transforms.InterpolationMode.NEAREST else 125
5237+
torch.testing.assert_close(actual, expected, rtol=0, atol=tolerance)
52025238

52035239
def _reference_perspective_bounding_boxes(self, bounding_boxes, *, startpoints, endpoints):
52045240
format = bounding_boxes.format

torchvision/transforms/v2/functional/_geometry.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from collections.abc import Sequence
55
from typing import Any, Optional, TYPE_CHECKING, Union
66

7+
import numpy as np
8+
79
import PIL.Image
810
import torch
911
from torch.nn.functional import grid_sample, interpolate, pad as torch_pad
@@ -2273,6 +2275,70 @@ def perspective_video(
22732275
)
22742276

22752277

2278+
if CVCUDA_AVAILABLE:
2279+
_cvcuda_interp = {
2280+
InterpolationMode.BILINEAR: cvcuda.Interp.LINEAR,
2281+
"bilinear": cvcuda.Interp.LINEAR,
2282+
"linear": cvcuda.Interp.LINEAR,
2283+
2: cvcuda.Interp.LINEAR,
2284+
InterpolationMode.BICUBIC: cvcuda.Interp.CUBIC,
2285+
"bicubic": cvcuda.Interp.CUBIC,
2286+
3: cvcuda.Interp.CUBIC,
2287+
InterpolationMode.NEAREST: cvcuda.Interp.NEAREST,
2288+
"nearest": cvcuda.Interp.NEAREST,
2289+
0: cvcuda.Interp.NEAREST,
2290+
InterpolationMode.BOX: cvcuda.Interp.BOX,
2291+
"box": cvcuda.Interp.BOX,
2292+
4: cvcuda.Interp.BOX,
2293+
InterpolationMode.HAMMING: cvcuda.Interp.HAMMING,
2294+
"hamming": cvcuda.Interp.HAMMING,
2295+
5: cvcuda.Interp.HAMMING,
2296+
InterpolationMode.LANCZOS: cvcuda.Interp.LANCZOS,
2297+
"lanczos": cvcuda.Interp.LANCZOS,
2298+
1: cvcuda.Interp.LANCZOS,
2299+
}
2300+
2301+
2302+
def _perspective_cvcuda(
2303+
image: "cvcuda.Tensor",
2304+
startpoints: Optional[list[list[int]]],
2305+
endpoints: Optional[list[list[int]]],
2306+
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
2307+
fill: _FillTypeJIT = None,
2308+
coefficients: Optional[list[float]] = None,
2309+
) -> "cvcuda.Tensor":
2310+
cvcuda = _import_cvcuda()
2311+
2312+
c = _perspective_coefficients(startpoints, endpoints, coefficients)
2313+
interpolation = _check_interpolation(interpolation)
2314+
2315+
interp = _cvcuda_interp.get(interpolation)
2316+
if interp is None:
2317+
raise ValueError(f"Invalid interpolation mode: {interpolation}")
2318+
2319+
xform = np.array([[c[0], c[1], c[2]], [c[3], c[4], c[5]], [c[6], c[7], 1.0]], dtype=np.float32)
2320+
2321+
num_channels = image.shape[-1]
2322+
if fill is None:
2323+
border_value = np.zeros(num_channels, dtype=np.float32)
2324+
elif isinstance(fill, (int, float)):
2325+
border_value = np.full(num_channels, fill, dtype=np.float32)
2326+
else:
2327+
border_value = np.array(fill, dtype=np.float32)[:num_channels]
2328+
2329+
return cvcuda.warp_perspective(
2330+
image,
2331+
xform,
2332+
flags=interp | cvcuda.Interp.WARP_INVERSE_MAP,
2333+
border_mode=cvcuda.Border.CONSTANT,
2334+
border_value=border_value,
2335+
)
2336+
2337+
2338+
if CVCUDA_AVAILABLE:
2339+
_register_kernel_internal(perspective, _import_cvcuda().Tensor)(_perspective_cvcuda)
2340+
2341+
22762342
def elastic(
22772343
inpt: torch.Tensor,
22782344
displacement: torch.Tensor,

0 commit comments

Comments
 (0)