Skip to content

Commit 06a0fe7

Browse files
committed
update rotate to use correct logic
1 parent 4ffc14e commit 06a0fe7

File tree

2 files changed

+48
-80
lines changed

2 files changed

+48
-80
lines changed

test/test_transforms_v2.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
assert_equal,
2626
cache,
2727
cpu_and_cuda,
28+
cvcuda_to_pil_compatible_tensor,
2829
freeze_rng_state,
2930
ignore_jit_no_profile_information_warning,
3031
make_bounding_boxes,
@@ -2152,10 +2153,7 @@ def test_functional_image_correctness(self, angle, center, interpolation, expand
21522153
actual = F.rotate(image, angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill)
21532154

21542155
if make_input == make_image_cvcuda:
2155-
actual = F.cvcuda_to_tensor(actual).to(device="cpu")
2156-
image = F.cvcuda_to_tensor(image)
2157-
# drop the batch dimensions
2158-
image = image.squeeze(0)
2156+
image = cvcuda_to_pil_compatible_tensor(image)
21592157

21602158
expected = F.to_image(
21612159
F.rotate(

torchvision/transforms/v2/functional/_geometry.py

Lines changed: 46 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,13 +1557,18 @@ def _rotate_cvcuda(
15571557
) -> "cvcuda.Tensor":
15581558
cvcuda = _import_cvcuda()
15591559

1560+
angle = angle % 360
1561+
1562+
if angle == 0:
1563+
return inpt
1564+
1565+
if angle == 180:
1566+
return cvcuda.flip(inpt, flipCode=-1)
1567+
15601568
interp = _cvcuda_interp.get(interpolation)
15611569
if interp is None:
15621570
raise ValueError(f"Interpolation mode {interpolation} is not supported with CV-CUDA")
15631571

1564-
if center is not None and len(center) != 2:
1565-
raise ValueError("Center must be a list of two floats")
1566-
15671572
input_height, input_width = inpt.shape[1], inpt.shape[2]
15681573
num_channels = inpt.shape[3]
15691574

@@ -1574,85 +1579,50 @@ def _rotate_cvcuda(
15741579
else:
15751580
fill_value = [float(f) for f in fill]
15761581

1577-
# Compute center offset (shift from image center)
1578-
# CV-CUDA's shift parameter is the offset from the image center
1582+
# Determine the rotation center
1583+
# torchvision uses image center by default, cvcuda rotates around upper-left (0,0)
1584+
# We need to calculate a shift to effectively rotate around the desired center
15791585
if center is None:
1580-
center_offset = (0.0, 0.0)
1586+
cx, cy = input_width / 2.0, input_height / 2.0
15811587
else:
1582-
center_offset = (center[0] - input_width / 2.0, center[1] - input_height / 2.0)
1588+
cx, cy = float(center[0]), float(center[1])
15831589

1584-
if expand:
1585-
# Calculate the expanded output size using the same logic as torch
1586-
center_f = [0.0, 0.0]
1587-
if center is not None:
1588-
center_f = [(c - s * 0.5) for c, s in zip(center, [input_width, input_height])]
1589-
matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
1590-
output_width, output_height = _compute_affine_output_size(matrix, input_width, input_height)
1591-
1592-
# compute padding
1593-
pad_left = (output_width - input_width) // 2
1594-
pad_right = output_width - input_width - pad_left
1595-
pad_top = (output_height - input_height) // 2
1596-
pad_bottom = output_height - input_height - pad_top
1597-
padded = cvcuda.copymakeborder(
1598-
inpt,
1599-
border_mode=cvcuda.Border.CONSTANT,
1600-
border_value=fill_value,
1601-
top=pad_top,
1602-
bottom=pad_bottom,
1603-
left=pad_left,
1604-
right=pad_right,
1605-
)
1590+
angle_rad = math.radians(angle)
1591+
cos_angle = math.cos(angle_rad)
1592+
sin_angle = math.sin(angle_rad)
16061593

1607-
# get the new center offset
1608-
# The center of the original image has moved by (pad_left, pad_top)
1609-
new_center_x = (input_width / 2.0 + center_offset[0]) + pad_left
1610-
new_center_y = (input_height / 2.0 + center_offset[1]) + pad_top
1611-
padded_shift = (new_center_x - output_width / 2.0, new_center_y - output_height / 2.0)
1612-
1613-
return cvcuda.rotate(padded, angle_deg=angle, shift=padded_shift, interpolation=interp)
1614-
1615-
elif fill is not None and fill_value != [0.0] * num_channels:
1616-
# For non-zero fill without expand:
1617-
# 1. Pad with fill value to create a larger canvas
1618-
# 2. Rotate around the appropriate center
1619-
# 3. Crop back to original size
1620-
1621-
# compute padding
1622-
diag = int(math.ceil(math.sqrt(input_width**2 + input_height**2)))
1623-
pad_left = (diag - input_width) // 2
1624-
pad_right = diag - input_width - pad_left
1625-
pad_top = (diag - input_height) // 2
1626-
pad_bottom = diag - input_height - pad_top
1627-
padded = cvcuda.copymakeborder(
1628-
inpt,
1629-
border_mode=cvcuda.Border.CONSTANT,
1630-
border_value=fill_value,
1631-
top=pad_top,
1632-
bottom=pad_bottom,
1633-
left=pad_left,
1634-
right=pad_right,
1635-
)
1594+
# if we are not expanding, simple case
1595+
if not expand:
1596+
shift_x = (1 - cos_angle) * cx - sin_angle * cy
1597+
shift_y = sin_angle * cx + (1 - cos_angle) * cy
16361598

1637-
# get the new center offset
1638-
padded_width, padded_height = padded.shape[2], padded.shape[1]
1639-
new_center_x = (input_width / 2.0 + center_offset[0]) + pad_left
1640-
new_center_y = (input_height / 2.0 + center_offset[1]) + pad_top
1641-
padded_shift = (new_center_x - padded_width / 2.0, new_center_y - padded_height / 2.0)
1642-
1643-
# rotate the padded image
1644-
rotated = cvcuda.rotate(padded, angle_deg=angle, shift=padded_shift, interpolation=interp)
1645-
1646-
# crop back to original size
1647-
crop_left = (rotated.shape[2] - input_width) // 2
1648-
crop_top = (rotated.shape[1] - input_height) // 2
1649-
return cvcuda.customcrop(
1650-
rotated,
1651-
rect=cvcuda.RectI(x=crop_left, y=crop_top, width=input_width, height=input_height),
1652-
)
1599+
return cvcuda.rotate(inpt, angle_deg=angle, shift=(shift_x, shift_y), interpolation=interp)
16531600

1654-
else:
1655-
return cvcuda.rotate(inpt, angle_deg=angle, shift=center_offset, interpolation=interp)
1601+
# if we need to expand, use much of the same logic as torchvision, for output size/pad
1602+
matrix = _get_inverse_affine_matrix([0.0, 0.0], -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
1603+
output_width, output_height = _compute_affine_output_size(matrix, input_width, input_height)
1604+
1605+
pad_left = (output_width - input_width) // 2
1606+
pad_right = output_width - input_width - pad_left
1607+
pad_top = (output_height - input_height) // 2
1608+
pad_bottom = output_height - input_height - pad_top
1609+
1610+
padded = cvcuda.copymakeborder(
1611+
inpt,
1612+
top=pad_top,
1613+
left=pad_left,
1614+
bottom=pad_bottom,
1615+
right=pad_right,
1616+
border_mode=cvcuda.Border.CONSTANT,
1617+
border_value=fill_value,
1618+
)
1619+
1620+
new_cx = pad_left + cx
1621+
new_cy = pad_top + cy
1622+
shift_x = (1 - cos_angle) * new_cx - sin_angle * new_cy
1623+
shift_y = sin_angle * new_cx + (1 - cos_angle) * new_cy
1624+
1625+
return cvcuda.rotate(padded, angle_deg=angle, shift=(shift_x, shift_y), interpolation=interp)
16561626

16571627

16581628
if CVCUDA_AVAILABLE:

0 commit comments

Comments
 (0)