maooc · maooc · Mar 12, 2026
diff --git a/imutils/__init__.py b/imutils/__init__.py
@@ -17,7 +17,10 @@
 from .convenience import is_cv2
 from .convenience import is_cv3
 from .convenience import is_cv4
+from .convenience import is_cv5
 from .convenience import check_opencv_version
 from .convenience import build_montages
 from .convenience import adjust_brightness_contrast
 from .meta import find_function
+from . import bbox
+from . import augmentation
diff --git a/imutils/augmentation/__init__.py b/imutils/augmentation/__init__.py
@@ -0,0 +1,5 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+from .augmentation import *
diff --git a/imutils/augmentation/augmentation.py b/imutils/augmentation/augmentation.py
@@ -0,0 +1,107 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+import cv2
+import numpy as np
+
+def random_brightness_contrast(image, brightness_range=(-50, 50), contrast_range=(0.5, 1.5)):
+    """
+    Randomly adjust brightness and contrast of an image.
+
+    Args:
+        image: OpenCV BGR image
+        brightness_range: Tuple of (min, max) brightness adjustment
+        contrast_range: Tuple of (min, max) contrast adjustment
+
+    Returns:
+        numpy.ndarray: Augmented image
+    """
+    # make a copy of the image
+    image = image.copy()
+
+    # randomly generate brightness and contrast values
+    brightness = np.random.uniform(brightness_range[0], brightness_range[1])
+    contrast = np.random.uniform(contrast_range[0], contrast_range[1])
+
+    # apply brightness and contrast adjustment
+    adjusted = cv2.convertScaleAbs(image, alpha=contrast, beta=brightness)
+
+    return adjusted
+
+def add_gaussian_noise(image, mean=0, std=25):
+    """
+    Add Gaussian noise to an image.
+
+    Args:
+        image: OpenCV BGR image
+        mean: Mean of Gaussian distribution
+        std: Standard deviation of Gaussian distribution
+
+    Returns:
+        numpy.ndarray: Image with Gaussian noise added
+    """
+    # make a copy of the image
+    image = image.copy()
+
+    # generate Gaussian noise
+    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
+
+    # add noise to image
+    noisy = cv2.add(image, noise)
+
+    return noisy
+
+def add_salt_pepper_noise(image, salt_prob=0.02, pepper_prob=0.02):
+    """
+    Add salt and pepper noise to an image.
+
+    Args:
+        image: OpenCV BGR image
+        salt_prob: Probability of salt noise (white pixels)
+        pepper_prob: Probability of pepper noise (black pixels)
+
+    Returns:
+        numpy.ndarray: Image with salt and pepper noise added
+    """
+    # make a copy of the image
+    image = image.copy()
+
+    # generate salt noise
+    salt_mask = np.random.random(image.shape[:2]) < salt_prob
+    image[salt_mask] = 255
+
+    # generate pepper noise
+    pepper_mask = np.random.random(image.shape[:2]) < pepper_prob
+    image[pepper_mask] = 0
+
+    return image
+
+def cutout(image, num_holes=1, max_size=0.3):
+    """
+    Apply Cutout augmentation to an image.
+
+    Args:
+        image: OpenCV BGR image
+        num_holes: Number of holes to cut out
+        max_size: Maximum size of holes as fraction of image size
+
+    Returns:
+        numpy.ndarray: Image with cutout holes
+    """
+    # make a copy of the image
+    image = image.copy()
+
+    h, w = image.shape[:2]
+    max_hole_size = int(min(w, h) * max_size)
+
+    for _ in range(num_holes):
+        # random hole size and position
+        hole_size = np.random.randint(1, max_hole_size)
+        x = np.random.randint(0, w - hole_size)
+        y = np.random.randint(0, h - hole_size)
+
+        # fill hole with black
+        image[y:y+hole_size, x:x+hole_size] = 0
+
+    return image
diff --git a/imutils/bbox/__init__.py b/imutils/bbox/__init__.py
@@ -0,0 +1,5 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+from .bbox import *
diff --git a/imutils/bbox/bbox.py b/imutils/bbox/bbox.py
@@ -0,0 +1,129 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+import cv2
+import numpy as np
+
+def iou(boxA, boxB):
+    """
+    Calculate the Intersection over Union (IoU) of two bounding boxes.
+
+    Args:
+        boxA: Bounding box in format [x1, y1, x2, y2]
+        boxB: Bounding box in format [x1, y1, x2, y2]
+
+    Returns:
+        float: IoU value between 0 and 1
+    """
+    # determine the (x, y)-coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+
+    # compute the area of intersection rectangle
+    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
+    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
+
+    # compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + ground-truth
+    # areas - the interesection area
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+
+    # return the intersection over union value
+    return iou
+
+def xywh_to_xyxy(box):
+    """
+    Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2].
+
+    Args:
+        box: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner
+
+    Returns:
+        list: Bounding box in format [x1, y1, x2, y2]
+    """
+    x, y, w, h = box
+    return [x, y, x + w, y + h]
+
+def xyxy_to_xywh(box):
+    """
+    Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h].
+
+    Args:
+        box: Bounding box in format [x1, y1, x2, y2]
+
+    Returns:
+        list: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner
+    """
+    x1, y1, x2, y2 = box
+    return [x1, y1, x2 - x1, y2 - y1]
+
+def draw_bbox(image, box, label=None, color=(0, 255, 0), thickness=2, font_scale=0.5):
+    """
+    Draw a bounding box with optional label on an image.
+
+    Args:
+        image: OpenCV image
+        box: Bounding box in format [x1, y1, x2, y2]
+        label: Optional label text
+        color: BGR color tuple
+        thickness: Line thickness
+        font_scale: Font scale for label
+
+    Returns:
+        numpy.ndarray: Image with bounding box drawn
+    """
+    # make a copy of the image to avoid modifying the original
+    image = image.copy()
+
+    # draw the bounding box
+    cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, thickness)
+
+    # if a label is provided, draw it
+    if label:
+        # get text size
+        (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1)
+
+        # calculate text position (above the bounding box)
+        text_x = box[0]
+        text_y = box[1] - 10 if box[1] - 10 > text_height else box[1] + text_height + 5
+
+        # draw filled rectangle for text background
+        cv2.rectangle(image, (text_x, text_y - text_height - 5), (text_x + text_width + 5, text_y), color, -1)
+
+        # draw text
+        cv2.putText(image, label, (text_x + 5, text_y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), 1)
+
+    return image
+
+def scale_bbox(box, original_size, new_size):
+    """
+    Scale a bounding box coordinates when an image is resized.
+
+    Args:
+        box: Bounding box in format [x1, y1, x2, y2]
+        original_size: Tuple of (original_width, original_height)
+        new_size: Tuple of (new_width, new_height)
+
+    Returns:
+        list: Scaled bounding box in format [x1, y1, x2, y2]
+    """
+    # calculate scaling factors
+    width_scale = new_size[0] / original_size[0]
+    height_scale = new_size[1] / original_size[1]
+
+    # scale the bounding box
+    scaled_box = [
+        int(box[0] * width_scale),
+        int(box[1] * height_scale),
+        int(box[2] * width_scale),
+        int(box[3] * height_scale)
+    ]
+
+    return scaled_box
diff --git a/imutils/convenience.py b/imutils/convenience.py
@@ -62,7 +62,7 @@ def rotate_bound(image, angle):
     # perform the actual rotation and return the image
     return cv2.warpAffine(image, M, (nW, nH))
 
-def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
+def resize(image, width=None, height=None, inter=cv2.INTER_AREA, bboxes=None):
     # initialize the dimensions of the image to be resized and
     # grab the image size
     dim = None
@@ -90,6 +90,15 @@ def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
     # resize the image
     resized = cv2.resize(image, dim, interpolation=inter)
 
+    # if bounding boxes are provided, scale them too
+    if bboxes is not None:
+        from . import bbox
+        scaled_bboxes = []
+        for box in bboxes:
+            scaled_box = bbox.scale_bbox(box, (w, h), dim)
+            scaled_bboxes.append(scaled_box)
+        return resized, scaled_bboxes
+
     # return the resized image
     return resized
 
@@ -207,6 +216,17 @@ def is_cv4(or_better=False):
     # otherwise we want to check for *strictly* OpenCV 4
     return major == 4
 
+def is_cv5(or_better=False):
+    # grab the OpenCV major version number
+    major = get_opencv_major_version()
+
+    # check to see if we are using *at least* OpenCV 5
+    if or_better:
+        return major >= 5
+
+    # otherwise we want to check for *strictly* OpenCV 5
+    return major == 5
+
 def get_opencv_major_version(lib=None):
     # if the supplied library is None, import OpenCV
     if lib is None:

diff --git a/imutils/feature/rootsift.py b/imutils/feature/rootsift.py
@@ -10,7 +10,7 @@ def __init__(self):
 		if is_cv2():
 			self.extractor = cv2.DescriptorExtractor_create("SIFT")
 
-		# otherwise initialize the SIFT feature extractor for OpenCV 3+
+		# otherwise initialize the SIFT feature extractor for OpenCV 3+, 4+, or 5+
 		else:
 			self.extractor = cv2.xfeatures2d.SIFT_create()
 

diff --git a/imutils/video/count_frames.py b/imutils/video/count_frames.py
@@ -1,5 +1,5 @@
 # import the necessary packages
-from ..convenience import is_cv3
+from ..convenience import is_cv3, is_cv4, is_cv5
 import cv2
 
 def count_frames(path, override=False):
@@ -21,8 +21,8 @@ def count_frames(path, override=False):
 		# or may fail entirely based on your which video codecs
 		# you have installed
 		try:
-			# check if we are using OpenCV 3
-			if is_cv3():
+			# check if we are using OpenCV 3, 4, or 5
+			if is_cv3() or is_cv4() or is_cv5():
 				total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
 			# otherwise, we are using OpenCV 2.4

diff --git a/imutils/video/videostream.py b/imutils/video/videostream.py
@@ -3,7 +3,7 @@
 
 class VideoStream:
 	def __init__(self, src=0, usePiCamera=False, resolution=(320, 240),
-		framerate=32, **kwargs):
+		framerate=32, retry_interval=2, timeout=10, skip_frames=0, **kwargs):
 		# check to see if the picamera module should be used
 		if usePiCamera:
 			# only import the picamera packages unless we are
@@ -20,7 +20,7 @@ def __init__(self, src=0, usePiCamera=False, resolution=(320, 240),
 		# otherwise, we are using OpenCV so initialize the webcam
 		# stream
 		else:
-			self.stream = WebcamVideoStream(src=src)
+			self.stream = WebcamVideoStream(src=src, retry_interval=retry_interval, timeout=timeout, skip_frames=skip_frames)
 
 	def start(self):
 		# start the threaded video stream