maooc · maooc · Mar 12, 2026
diff --git a/imutils/__init__.py b/imutils/__init__.py
@@ -14,10 +14,37 @@
 from .convenience import url_to_image
 from .convenience import auto_canny
 from .convenience import grab_contours
+from .convenience import find_contours
 from .convenience import is_cv2
 from .convenience import is_cv3
 from .convenience import is_cv4
-from .convenience import check_opencv_version
+from .convenience import is_cv5
 from .convenience import build_montages
 from .convenience import adjust_brightness_contrast
 from .meta import find_function
+
+# import bbox module functions
+from .bbox import iou
+from .bbox import xywh_to_xyxy
+from .bbox import xyxy_to_xywh
+from .bbox import center_xywh_to_xyxy
+from .bbox import xyxy_to_center_xywh
+from .bbox import draw_bbox
+from .bbox import resize_bbox
+from .bbox import clip_bbox
+from .bbox import bbox_area
+from .bbox import batch_iou
+
+# import augmentation module functions
+from .augmentation import random_brightness_contrast
+from .augmentation import add_gaussian_noise
+from .augmentation import add_salt_pepper_noise
+from .augmentation import cutout
+from .augmentation import random_flip
+from .augmentation import random_rotate
+from .augmentation import random_crop
+from .augmentation import random_blur
+from .augmentation import color_jitter
+from .augmentation import mixup
+from .augmentation import random_perspective
+from .augmentation import apply_augmentations
diff --git a/imutils/augmentation.py b/imutils/augmentation.py
@@ -0,0 +1,326 @@
+# author:    PyImageSearch
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+import numpy as np
+import cv2
+import random
+
+def random_brightness_contrast(image, brightness_range=(-30, 30), contrast_range=(0.8, 1.2)):
+    """
+    随机调整图像的亮度和对比度
+
+    :param image: 输入图像 (OpenCV BGR 格式)
+    :param brightness_range: 亮度调整范围 (delta 值)
+    :param contrast_range: 对比度调整范围 (乘数)
+    :return: 增强后的图像
+    """
+    # 随机生成亮度和对比度参数
+    brightness = random.uniform(brightness_range[0], brightness_range[1])
+    contrast = random.uniform(contrast_range[0], contrast_range[1])
+
+    # 应用调整
+    # 对比度调整: image * contrast
+    # 亮度调整: + brightness
+    adjusted = cv2.convertScaleAbs(image, alpha=contrast, beta=brightness)
+
+    return adjusted
+
+
+def add_gaussian_noise(image, mean=0, std=25):
+    """
+    向图像添加高斯噪声
+
+    :param image: 输入图像
+    :param mean: 噪声均值
+    :param std: 噪声标准差
+    :return: 添加噪声后的图像
+    """
+    # 生成高斯噪声
+    noise = np.random.normal(mean, std, image.shape).astype(np.float32)
+
+    # 将图像转换为 float 并添加噪声
+    noisy_image = image.astype(np.float32) + noise
+
+    # 裁剪到有效范围并转换回 uint8
+    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
+
+    return noisy_image
+
+
+def add_salt_pepper_noise(image, salt_prob=0.01, pepper_prob=0.01):
+    """
+    向图像添加椒盐噪声
+
+    :param image: 输入图像
+    :param salt_prob: 盐噪声 (白点) 概率
+    :param pepper_prob: 椒噪声 (黑点) 概率
+    :return: 添加噪声后的图像
+    """
+    noisy_image = image.copy()
+    total_pixels = image.shape[0] * image.shape[1]
+
+    # 添加盐噪声 (白点)
+    num_salt = int(total_pixels * salt_prob)
+    salt_coords = [
+        np.random.randint(0, i, num_salt) 
+        for i in image.shape[:2]
+    ]
+    noisy_image[salt_coords[0], salt_coords[1]] = 255
+
+    # 添加椒噪声 (黑点)
+    num_pepper = int(total_pixels * pepper_prob)
+    pepper_coords = [
+        np.random.randint(0, i, num_pepper) 
+        for i in image.shape[:2]
+    ]
+    noisy_image[pepper_coords[0], pepper_coords[1]] = 0
+
+    return noisy_image
+
+
+def cutout(image, num_holes=1, max_h_size=8, max_w_size=8, fill_value=0):
+    """
+    Cutout 数据增强 - 在图像上随机遮挡矩形区域
+
+    参考论文: "Improved Regularization of Convolutional Neural Networks with Cutout"
+
+    :param image: 输入图像
+    :param num_holes: 遮挡区域数量
+    :param max_h_size: 遮挡区域最大高度
+    :param max_w_size: 遮挡区域最大宽度
+    :param fill_value: 遮挡填充值 (默认 0 为黑色)
+    :return: 应用 Cutout 后的图像
+    """
+    h, w = image.shape[:2]
+    result = image.copy()
+
+    for _ in range(num_holes):
+        # 随机生成遮挡区域尺寸
+        hole_h = random.randint(1, max_h_size)
+        hole_w = random.randint(1, max_w_size)
+
+        # 随机生成遮挡区域位置
+        y1 = random.randint(0, h - hole_h) if h > hole_h else 0
+        x1 = random.randint(0, w - hole_w) if w > hole_w else 0
+
+        y2 = min(y1 + hole_h, h)
+        x2 = min(x1 + hole_w, w)
+
+        # 应用遮挡
+        if len(image.shape) == 3:
+            result[y1:y2, x1:x2, :] = fill_value
+        else:
+            result[y1:y2, x1:x2] = fill_value
+
+    return result
+
+
+def random_flip(image, flip_code=1):
+    """
+    随机水平或垂直翻转图像
+
+    :param image: 输入图像
+    :param flip_code: 翻转代码 (0=垂直, 1=水平, -1=双向)
+    :return: 翻转后的图像 (50% 概率)
+    """
+    if random.random() > 0.5:
+        return cv2.flip(image, flip_code)
+    return image
+
+
+def random_rotate(image, angle_range=(-15, 15), scale=1.0):
+    """
+    随机旋转图像
+
+    :param image: 输入图像
+    :param angle_range: 旋转角度范围
+    :param scale: 缩放比例
+    :return: 旋转后的图像
+    """
+    angle = random.uniform(angle_range[0], angle_range[1])
+
+    h, w = image.shape[:2]
+    center = (w // 2, h // 2)
+
+    # 获取旋转矩阵
+    M = cv2.getRotationMatrix2D(center, angle, scale)
+
+    # 应用旋转
+    rotated = cv2.warpAffine(image, M, (w, h), borderMode=cv2.BORDER_CONSTANT,
+                            borderValue=(128, 128, 128))
+
+    return rotated
+
+
+def random_crop(image, crop_ratio=(0.8, 1.0)):
+    """
+    随机裁剪图像
+
+    :param image: 输入图像
+    :param crop_ratio: 裁剪比例范围 (相对于原图)
+    :return: 裁剪后的图像
+    """
+    h, w = image.shape[:2]
+
+    # 随机选择裁剪比例
+    ratio = random.uniform(crop_ratio[0], crop_ratio[1])
+
+    # 计算裁剪尺寸
+    new_h = int(h * ratio)
+    new_w = int(w * ratio)
+
+    # 随机选择裁剪位置
+    y1 = random.randint(0, h - new_h) if h > new_h else 0
+    x1 = random.randint(0, w - new_w) if w > new_w else 0
+
+    # 裁剪
+    cropped = image[y1:y1+new_h, x1:x1+new_w]
+
+    # 调整回原尺寸
+    return cv2.resize(cropped, (w, h))
+
+
+def random_blur(image, kernel_size_range=(3, 7)):
+    """
+    随机模糊图像
+
+    :param image: 输入图像
+    :param kernel_size_range: 模糊核大小范围 (奇数)
+    :return: 模糊后的图像 (50% 概率)
+    """
+    if random.random() > 0.5:
+        # 确保核大小为奇数
+        k = random.randint(kernel_size_range[0] // 2, kernel_size_range[1] // 2)
+        kernel_size = 2 * k + 1
+
+        # 随机选择模糊类型
+        if random.random() > 0.5:
+            return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
+        else:
+            return cv2.medianBlur(image, kernel_size)
+    return image
+
+
+def color_jitter(image, hue_range=(-10, 10), saturation_range=(-30, 30), value_range=(-30, 30)):
+    """
+    颜色抖动 - 随机调整色调、饱和度和明度
+
+    :param image: 输入图像 (BGR 格式)
+    :param hue_range: 色调调整范围
+    :param saturation_range: 饱和度调整范围
+    :param value_range: 明度调整范围
+    :return: 调整后的图像
+    """
+    # 转换到 HSV 色彩空间
+    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV).astype(np.float32)
+
+    # 随机调整
+    hue_delta = random.uniform(hue_range[0], hue_range[1])
+    sat_delta = random.uniform(saturation_range[0], saturation_range[1])
+    val_delta = random.uniform(value_range[0], value_range[1])
+
+    hsv[:, :, 0] = (hsv[:, :, 0] + hue_delta) % 180
+    hsv[:, :, 1] = np.clip(hsv[:, :, 1] + sat_delta, 0, 255)
+    hsv[:, :, 2] = np.clip(hsv[:, :, 2] + val_delta, 0, 255)
+
+    # 转换回 BGR
+    return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+
+
+def mixup(image1, image2, alpha=0.4):
+    """
+    Mixup 数据增强 - 将两张图像按一定比例混合
+
+    参考论文: "mixup: Beyond Empirical Risk Minimization"
+
+    :param image1: 第一张图像
+    :param image2: 第二张图像
+    :param alpha: Beta 分布参数
+    :return: 混合后的图像
+    """
+    # 确保尺寸相同
+    if image1.shape != image2.shape:
+        image2 = cv2.resize(image2, (image1.shape[1], image1.shape[0]))
+
+    # 从 Beta 分布采样混合比例
+    lam = np.random.beta(alpha, alpha)
+
+    # 混合图像
+    mixed = lam * image1.astype(np.float32) + (1 - lam) * image2.astype(np.float32)
+
+    return mixed.astype(np.uint8)
+
+
+def random_perspective(image, distortion_scale=0.2):
+    """
+    随机透视变换
+
+    :param image: 输入图像
+    :param distortion_scale: 扭曲程度
+    :return: 透视变换后的图像
+    """
+    h, w = image.shape[:2]
+
+    # 定义四个角点
+    margin = min(h, w) * distortion_scale
+
+    pts1 = np.float32([
+        [0, 0],
+        [w, 0],
+        [w, h],
+        [0, h]
+    ])
+
+    # 随机扰动角点
+    pts2 = np.float32([
+        [random.uniform(0, margin), random.uniform(0, margin)],
+        [w - random.uniform(0, margin), random.uniform(0, margin)],
+        [w - random.uniform(0, margin), h - random.uniform(0, margin)],
+        [random.uniform(0, margin), h - random.uniform(0, margin)]
+    ])
+
+    # 计算透视变换矩阵
+    M = cv2.getPerspectiveTransform(pts1, pts2)
+
+    # 应用变换
+    return cv2.warpPerspective(image, M, (w, h), borderMode=cv2.BORDER_CONSTANT,
+                              borderValue=(128, 128, 128))
+
+
+def apply_augmentations(image, aug_list=None):
+    """
+    应用一系列数据增强
+
+    :param image: 输入图像
+    :param aug_list: 增强操作列表，如 ['flip', 'rotate', 'brightness', 'cutout']
+    :return: 增强后的图像
+    """
+    if aug_list is None:
+        aug_list = ['flip', 'brightness', 'cutout']
+
+    result = image.copy()
+
+    for aug in aug_list:
+        if aug == 'flip':
+            result = random_flip(result)
+        elif aug == 'rotate':
+            result = random_rotate(result)
+        elif aug == 'brightness':
+            result = random_brightness_contrast(result)
+        elif aug == 'cutout':
+            result = cutout(result)
+        elif aug == 'gaussian_noise':
+            result = add_gaussian_noise(result)
+        elif aug == 'salt_pepper':
+            result = add_salt_pepper_noise(result)
+        elif aug == 'blur':
+            result = random_blur(result)
+        elif aug == 'color_jitter':
+            result = color_jitter(result)
+        elif aug == 'crop':
+            result = random_crop(result)
+        elif aug == 'perspective':
+            result = random_perspective(result)
+
+    return result