Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion imutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,37 @@
from .convenience import url_to_image
from .convenience import auto_canny
from .convenience import grab_contours
from .convenience import find_contours
from .convenience import is_cv2
from .convenience import is_cv3
from .convenience import is_cv4
from .convenience import check_opencv_version
from .convenience import is_cv5
from .convenience import build_montages
from .convenience import adjust_brightness_contrast
from .meta import find_function

# import bbox module functions
from .bbox import iou
from .bbox import xywh_to_xyxy
from .bbox import xyxy_to_xywh
from .bbox import center_xywh_to_xyxy
from .bbox import xyxy_to_center_xywh
from .bbox import draw_bbox
from .bbox import resize_bbox
from .bbox import clip_bbox
from .bbox import bbox_area
from .bbox import batch_iou

# import augmentation module functions
from .augmentation import random_brightness_contrast
from .augmentation import add_gaussian_noise
from .augmentation import add_salt_pepper_noise
from .augmentation import cutout
from .augmentation import random_flip
from .augmentation import random_rotate
from .augmentation import random_crop
from .augmentation import random_blur
from .augmentation import color_jitter
from .augmentation import mixup
from .augmentation import random_perspective
from .augmentation import apply_augmentations
326 changes: 326 additions & 0 deletions imutils/augmentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
# author: PyImageSearch
# website: http://www.pyimagesearch.com

# import the necessary packages
import numpy as np
import cv2
import random

def random_brightness_contrast(image, brightness_range=(-30, 30), contrast_range=(0.8, 1.2)):
"""
随机调整图像的亮度和对比度

:param image: 输入图像 (OpenCV BGR 格式)
:param brightness_range: 亮度调整范围 (delta 值)
:param contrast_range: 对比度调整范围 (乘数)
:return: 增强后的图像
"""
# 随机生成亮度和对比度参数
brightness = random.uniform(brightness_range[0], brightness_range[1])
contrast = random.uniform(contrast_range[0], contrast_range[1])

# 应用调整
# 对比度调整: image * contrast
# 亮度调整: + brightness
adjusted = cv2.convertScaleAbs(image, alpha=contrast, beta=brightness)

return adjusted


def add_gaussian_noise(image, mean=0, std=25):
"""
向图像添加高斯噪声

:param image: 输入图像
:param mean: 噪声均值
:param std: 噪声标准差
:return: 添加噪声后的图像
"""
# 生成高斯噪声
noise = np.random.normal(mean, std, image.shape).astype(np.float32)

# 将图像转换为 float 并添加噪声
noisy_image = image.astype(np.float32) + noise

# 裁剪到有效范围并转换回 uint8
noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)

return noisy_image


def add_salt_pepper_noise(image, salt_prob=0.01, pepper_prob=0.01):
"""
向图像添加椒盐噪声

:param image: 输入图像
:param salt_prob: 盐噪声 (白点) 概率
:param pepper_prob: 椒噪声 (黑点) 概率
:return: 添加噪声后的图像
"""
noisy_image = image.copy()
total_pixels = image.shape[0] * image.shape[1]

# 添加盐噪声 (白点)
num_salt = int(total_pixels * salt_prob)
salt_coords = [
np.random.randint(0, i, num_salt)
for i in image.shape[:2]
]
noisy_image[salt_coords[0], salt_coords[1]] = 255

# 添加椒噪声 (黑点)
num_pepper = int(total_pixels * pepper_prob)
pepper_coords = [
np.random.randint(0, i, num_pepper)
for i in image.shape[:2]
]
noisy_image[pepper_coords[0], pepper_coords[1]] = 0

return noisy_image


def cutout(image, num_holes=1, max_h_size=8, max_w_size=8, fill_value=0):
"""
Cutout 数据增强 - 在图像上随机遮挡矩形区域

参考论文: "Improved Regularization of Convolutional Neural Networks with Cutout"

:param image: 输入图像
:param num_holes: 遮挡区域数量
:param max_h_size: 遮挡区域最大高度
:param max_w_size: 遮挡区域最大宽度
:param fill_value: 遮挡填充值 (默认 0 为黑色)
:return: 应用 Cutout 后的图像
"""
h, w = image.shape[:2]
result = image.copy()

for _ in range(num_holes):
# 随机生成遮挡区域尺寸
hole_h = random.randint(1, max_h_size)
hole_w = random.randint(1, max_w_size)

# 随机生成遮挡区域位置
y1 = random.randint(0, h - hole_h) if h > hole_h else 0
x1 = random.randint(0, w - hole_w) if w > hole_w else 0

y2 = min(y1 + hole_h, h)
x2 = min(x1 + hole_w, w)

# 应用遮挡
if len(image.shape) == 3:
result[y1:y2, x1:x2, :] = fill_value
else:
result[y1:y2, x1:x2] = fill_value

return result


def random_flip(image, flip_code=1):
"""
随机水平或垂直翻转图像

:param image: 输入图像
:param flip_code: 翻转代码 (0=垂直, 1=水平, -1=双向)
:return: 翻转后的图像 (50% 概率)
"""
if random.random() > 0.5:
return cv2.flip(image, flip_code)
return image


def random_rotate(image, angle_range=(-15, 15), scale=1.0):
"""
随机旋转图像

:param image: 输入图像
:param angle_range: 旋转角度范围
:param scale: 缩放比例
:return: 旋转后的图像
"""
angle = random.uniform(angle_range[0], angle_range[1])

h, w = image.shape[:2]
center = (w // 2, h // 2)

# 获取旋转矩阵
M = cv2.getRotationMatrix2D(center, angle, scale)

# 应用旋转
rotated = cv2.warpAffine(image, M, (w, h), borderMode=cv2.BORDER_CONSTANT,
borderValue=(128, 128, 128))

return rotated


def random_crop(image, crop_ratio=(0.8, 1.0)):
"""
随机裁剪图像

:param image: 输入图像
:param crop_ratio: 裁剪比例范围 (相对于原图)
:return: 裁剪后的图像
"""
h, w = image.shape[:2]

# 随机选择裁剪比例
ratio = random.uniform(crop_ratio[0], crop_ratio[1])

# 计算裁剪尺寸
new_h = int(h * ratio)
new_w = int(w * ratio)

# 随机选择裁剪位置
y1 = random.randint(0, h - new_h) if h > new_h else 0
x1 = random.randint(0, w - new_w) if w > new_w else 0

# 裁剪
cropped = image[y1:y1+new_h, x1:x1+new_w]

# 调整回原尺寸
return cv2.resize(cropped, (w, h))


def random_blur(image, kernel_size_range=(3, 7)):
"""
随机模糊图像

:param image: 输入图像
:param kernel_size_range: 模糊核大小范围 (奇数)
:return: 模糊后的图像 (50% 概率)
"""
if random.random() > 0.5:
# 确保核大小为奇数
k = random.randint(kernel_size_range[0] // 2, kernel_size_range[1] // 2)
kernel_size = 2 * k + 1

# 随机选择模糊类型
if random.random() > 0.5:
return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
else:
return cv2.medianBlur(image, kernel_size)
return image


def color_jitter(image, hue_range=(-10, 10), saturation_range=(-30, 30), value_range=(-30, 30)):
"""
颜色抖动 - 随机调整色调、饱和度和明度

:param image: 输入图像 (BGR 格式)
:param hue_range: 色调调整范围
:param saturation_range: 饱和度调整范围
:param value_range: 明度调整范围
:return: 调整后的图像
"""
# 转换到 HSV 色彩空间
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV).astype(np.float32)

# 随机调整
hue_delta = random.uniform(hue_range[0], hue_range[1])
sat_delta = random.uniform(saturation_range[0], saturation_range[1])
val_delta = random.uniform(value_range[0], value_range[1])

hsv[:, :, 0] = (hsv[:, :, 0] + hue_delta) % 180
hsv[:, :, 1] = np.clip(hsv[:, :, 1] + sat_delta, 0, 255)
hsv[:, :, 2] = np.clip(hsv[:, :, 2] + val_delta, 0, 255)

# 转换回 BGR
return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)


def mixup(image1, image2, alpha=0.4):
"""
Mixup 数据增强 - 将两张图像按一定比例混合

参考论文: "mixup: Beyond Empirical Risk Minimization"

:param image1: 第一张图像
:param image2: 第二张图像
:param alpha: Beta 分布参数
:return: 混合后的图像
"""
# 确保尺寸相同
if image1.shape != image2.shape:
image2 = cv2.resize(image2, (image1.shape[1], image1.shape[0]))

# 从 Beta 分布采样混合比例
lam = np.random.beta(alpha, alpha)

# 混合图像
mixed = lam * image1.astype(np.float32) + (1 - lam) * image2.astype(np.float32)

return mixed.astype(np.uint8)


def random_perspective(image, distortion_scale=0.2):
"""
随机透视变换

:param image: 输入图像
:param distortion_scale: 扭曲程度
:return: 透视变换后的图像
"""
h, w = image.shape[:2]

# 定义四个角点
margin = min(h, w) * distortion_scale

pts1 = np.float32([
[0, 0],
[w, 0],
[w, h],
[0, h]
])

# 随机扰动角点
pts2 = np.float32([
[random.uniform(0, margin), random.uniform(0, margin)],
[w - random.uniform(0, margin), random.uniform(0, margin)],
[w - random.uniform(0, margin), h - random.uniform(0, margin)],
[random.uniform(0, margin), h - random.uniform(0, margin)]
])

# 计算透视变换矩阵
M = cv2.getPerspectiveTransform(pts1, pts2)

# 应用变换
return cv2.warpPerspective(image, M, (w, h), borderMode=cv2.BORDER_CONSTANT,
borderValue=(128, 128, 128))


def apply_augmentations(image, aug_list=None):
"""
应用一系列数据增强

:param image: 输入图像
:param aug_list: 增强操作列表,如 ['flip', 'rotate', 'brightness', 'cutout']
:return: 增强后的图像
"""
if aug_list is None:
aug_list = ['flip', 'brightness', 'cutout']

result = image.copy()

for aug in aug_list:
if aug == 'flip':
result = random_flip(result)
elif aug == 'rotate':
result = random_rotate(result)
elif aug == 'brightness':
result = random_brightness_contrast(result)
elif aug == 'cutout':
result = cutout(result)
elif aug == 'gaussian_noise':
result = add_gaussian_noise(result)
elif aug == 'salt_pepper':
result = add_salt_pepper_noise(result)
elif aug == 'blur':
result = random_blur(result)
elif aug == 'color_jitter':
result = color_jitter(result)
elif aug == 'crop':
result = random_crop(result)
elif aug == 'perspective':
result = random_perspective(result)

return result
Loading