From b3f966e3dfc9995bce05f110b5b4e599936c6643 Mon Sep 17 00:00:00 2001
From: lwk <3098293798@qq.com>
Date: Thu, 12 Mar 2026 21:42:30 +0800
Subject: [PATCH] feat: apply 01-seed model changes

---
 imutils/__init__.py                  |   3 +
 imutils/augmentation/__init__.py     |   5 ++
 imutils/augmentation/augmentation.py | 107 ++++++++++++++++++++++
 imutils/bbox/__init__.py             |   5 ++
 imutils/bbox/bbox.py                 | 129 +++++++++++++++++++++++++++
 imutils/convenience.py               |  22 ++++-
 imutils/feature/rootsift.py          |   2 +-
 imutils/video/count_frames.py        |   6 +-
 imutils/video/videostream.py         |   4 +-
 imutils/video/webcamvideostream.py   | 108 +++++++++++++++++++++-
 10 files changed, 381 insertions(+), 10 deletions(-)
 create mode 100644 imutils/augmentation/__init__.py
 create mode 100644 imutils/augmentation/augmentation.py
 create mode 100644 imutils/bbox/__init__.py
 create mode 100644 imutils/bbox/bbox.py

diff --git a/imutils/__init__.py b/imutils/__init__.py
index e5643ca..efc6c59 100755
--- a/imutils/__init__.py
+++ b/imutils/__init__.py
@@ -17,7 +17,10 @@
 from .convenience import is_cv2
 from .convenience import is_cv3
 from .convenience import is_cv4
+from .convenience import is_cv5
 from .convenience import check_opencv_version
 from .convenience import build_montages
 from .convenience import adjust_brightness_contrast
 from .meta import find_function
+from . import bbox
+from . import augmentation
diff --git a/imutils/augmentation/__init__.py b/imutils/augmentation/__init__.py
new file mode 100644
index 0000000..1829626
--- /dev/null
+++ b/imutils/augmentation/__init__.py
@@ -0,0 +1,5 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+from .augmentation import *
diff --git a/imutils/augmentation/augmentation.py b/imutils/augmentation/augmentation.py
new file mode 100644
index 0000000..2bad5b9
--- /dev/null
+++ b/imutils/augmentation/augmentation.py
@@ -0,0 +1,107 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+import cv2
+import numpy as np
+
+def random_brightness_contrast(image, brightness_range=(-50, 50), contrast_range=(0.5, 1.5)):
+    """
+    Randomly adjust brightness and contrast of an image.
+    
+    Args:
+        image: OpenCV BGR image
+        brightness_range: Tuple of (min, max) brightness adjustment
+        contrast_range: Tuple of (min, max) contrast adjustment
+    
+    Returns:
+        numpy.ndarray: Augmented image
+    """
+    # make a copy of the image
+    image = image.copy()
+    
+    # randomly generate brightness and contrast values
+    brightness = np.random.uniform(brightness_range[0], brightness_range[1])
+    contrast = np.random.uniform(contrast_range[0], contrast_range[1])
+    
+    # apply brightness and contrast adjustment
+    adjusted = cv2.convertScaleAbs(image, alpha=contrast, beta=brightness)
+    
+    return adjusted
+
+def add_gaussian_noise(image, mean=0, std=25):
+    """
+    Add Gaussian noise to an image.
+    
+    Args:
+        image: OpenCV BGR image
+        mean: Mean of Gaussian distribution
+        std: Standard deviation of Gaussian distribution
+    
+    Returns:
+        numpy.ndarray: Image with Gaussian noise added
+    """
+    # make a copy of the image
+    image = image.copy()
+    
+    # generate Gaussian noise
+    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
+    
+    # add noise to image
+    noisy = cv2.add(image, noise)
+    
+    return noisy
+
+def add_salt_pepper_noise(image, salt_prob=0.02, pepper_prob=0.02):
+    """
+    Add salt and pepper noise to an image.
+    
+    Args:
+        image: OpenCV BGR image
+        salt_prob: Probability of salt noise (white pixels)
+        pepper_prob: Probability of pepper noise (black pixels)
+    
+    Returns:
+        numpy.ndarray: Image with salt and pepper noise added
+    """
+    # make a copy of the image
+    image = image.copy()
+    
+    # generate salt noise
+    salt_mask = np.random.random(image.shape[:2]) < salt_prob
+    image[salt_mask] = 255
+    
+    # generate pepper noise
+    pepper_mask = np.random.random(image.shape[:2]) < pepper_prob
+    image[pepper_mask] = 0
+    
+    return image
+
+def cutout(image, num_holes=1, max_size=0.3):
+    """
+    Apply Cutout augmentation to an image.
+    
+    Args:
+        image: OpenCV BGR image
+        num_holes: Number of holes to cut out
+        max_size: Maximum size of holes as fraction of image size
+    
+    Returns:
+        numpy.ndarray: Image with cutout holes
+    """
+    # make a copy of the image
+    image = image.copy()
+    
+    h, w = image.shape[:2]
+    max_hole_size = int(min(w, h) * max_size)
+    
+    for _ in range(num_holes):
+        # random hole size and position
+        hole_size = np.random.randint(1, max_hole_size)
+        x = np.random.randint(0, w - hole_size)
+        y = np.random.randint(0, h - hole_size)
+        
+        # fill hole with black
+        image[y:y+hole_size, x:x+hole_size] = 0
+    
+    return image
diff --git a/imutils/bbox/__init__.py b/imutils/bbox/__init__.py
new file mode 100644
index 0000000..a9be41f
--- /dev/null
+++ b/imutils/bbox/__init__.py
@@ -0,0 +1,5 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+from .bbox import *
diff --git a/imutils/bbox/bbox.py b/imutils/bbox/bbox.py
new file mode 100644
index 0000000..bce4f40
--- /dev/null
+++ b/imutils/bbox/bbox.py
@@ -0,0 +1,129 @@
+# author:    Adrian Rosebrock
+# website:   http://www.pyimagesearch.com
+
+# import the necessary packages
+import cv2
+import numpy as np
+
+def iou(boxA, boxB):
+    """
+    Calculate the Intersection over Union (IoU) of two bounding boxes.
+    
+    Args:
+        boxA: Bounding box in format [x1, y1, x2, y2]
+        boxB: Bounding box in format [x1, y1, x2, y2]
+    
+    Returns:
+        float: IoU value between 0 and 1
+    """
+    # determine the (x, y)-coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+
+    # compute the area of intersection rectangle
+    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
+    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
+
+    # compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + ground-truth
+    # areas - the interesection area
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+
+    # return the intersection over union value
+    return iou
+
+def xywh_to_xyxy(box):
+    """
+    Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2].
+    
+    Args:
+        box: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner
+    
+    Returns:
+        list: Bounding box in format [x1, y1, x2, y2]
+    """
+    x, y, w, h = box
+    return [x, y, x + w, y + h]
+
+def xyxy_to_xywh(box):
+    """
+    Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h].
+    
+    Args:
+        box: Bounding box in format [x1, y1, x2, y2]
+    
+    Returns:
+        list: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner
+    """
+    x1, y1, x2, y2 = box
+    return [x1, y1, x2 - x1, y2 - y1]
+
+def draw_bbox(image, box, label=None, color=(0, 255, 0), thickness=2, font_scale=0.5):
+    """
+    Draw a bounding box with optional label on an image.
+    
+    Args:
+        image: OpenCV image
+        box: Bounding box in format [x1, y1, x2, y2]
+        label: Optional label text
+        color: BGR color tuple
+        thickness: Line thickness
+        font_scale: Font scale for label
+    
+    Returns:
+        numpy.ndarray: Image with bounding box drawn
+    """
+    # make a copy of the image to avoid modifying the original
+    image = image.copy()
+    
+    # draw the bounding box
+    cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, thickness)
+    
+    # if a label is provided, draw it
+    if label:
+        # get text size
+        (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1)
+        
+        # calculate text position (above the bounding box)
+        text_x = box[0]
+        text_y = box[1] - 10 if box[1] - 10 > text_height else box[1] + text_height + 5
+        
+        # draw filled rectangle for text background
+        cv2.rectangle(image, (text_x, text_y - text_height - 5), (text_x + text_width + 5, text_y), color, -1)
+        
+        # draw text
+        cv2.putText(image, label, (text_x + 5, text_y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), 1)
+    
+    return image
+
+def scale_bbox(box, original_size, new_size):
+    """
+    Scale a bounding box coordinates when an image is resized.
+    
+    Args:
+        box: Bounding box in format [x1, y1, x2, y2]
+        original_size: Tuple of (original_width, original_height)
+        new_size: Tuple of (new_width, new_height)
+    
+    Returns:
+        list: Scaled bounding box in format [x1, y1, x2, y2]
+    """
+    # calculate scaling factors
+    width_scale = new_size[0] / original_size[0]
+    height_scale = new_size[1] / original_size[1]
+    
+    # scale the bounding box
+    scaled_box = [
+        int(box[0] * width_scale),
+        int(box[1] * height_scale),
+        int(box[2] * width_scale),
+        int(box[3] * height_scale)
+    ]
+    
+    return scaled_box
diff --git a/imutils/convenience.py b/imutils/convenience.py
index 9704eb2..21482d0 100644
--- a/imutils/convenience.py
+++ b/imutils/convenience.py
@@ -62,7 +62,7 @@ def rotate_bound(image, angle):
     # perform the actual rotation and return the image
     return cv2.warpAffine(image, M, (nW, nH))
 
-def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
+def resize(image, width=None, height=None, inter=cv2.INTER_AREA, bboxes=None):
     # initialize the dimensions of the image to be resized and
     # grab the image size
     dim = None
@@ -90,6 +90,15 @@ def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
     # resize the image
     resized = cv2.resize(image, dim, interpolation=inter)
 
+    # if bounding boxes are provided, scale them too
+    if bboxes is not None:
+        from . import bbox
+        scaled_bboxes = []
+        for box in bboxes:
+            scaled_box = bbox.scale_bbox(box, (w, h), dim)
+            scaled_bboxes.append(scaled_box)
+        return resized, scaled_bboxes
+
     # return the resized image
     return resized
 
@@ -207,6 +216,17 @@ def is_cv4(or_better=False):
     # otherwise we want to check for *strictly* OpenCV 4
     return major == 4
 
+def is_cv5(or_better=False):
+    # grab the OpenCV major version number
+    major = get_opencv_major_version()
+
+    # check to see if we are using *at least* OpenCV 5
+    if or_better:
+        return major >= 5
+
+    # otherwise we want to check for *strictly* OpenCV 5
+    return major == 5
+
 def get_opencv_major_version(lib=None):
     # if the supplied library is None, import OpenCV
     if lib is None:
diff --git a/imutils/feature/rootsift.py b/imutils/feature/rootsift.py
index 2a11abc..010d4f6 100644
--- a/imutils/feature/rootsift.py
+++ b/imutils/feature/rootsift.py
@@ -10,7 +10,7 @@ def __init__(self):
 		if is_cv2():
 			self.extractor = cv2.DescriptorExtractor_create("SIFT")
 
-		# otherwise initialize the SIFT feature extractor for OpenCV 3+
+		# otherwise initialize the SIFT feature extractor for OpenCV 3+, 4+, or 5+
 		else:
 			self.extractor = cv2.xfeatures2d.SIFT_create()
 
diff --git a/imutils/video/count_frames.py b/imutils/video/count_frames.py
index d1fd41f..e4a7a98 100644
--- a/imutils/video/count_frames.py
+++ b/imutils/video/count_frames.py
@@ -1,5 +1,5 @@
 # import the necessary packages
-from ..convenience import is_cv3
+from ..convenience import is_cv3, is_cv4, is_cv5
 import cv2
 
 def count_frames(path, override=False):
@@ -21,8 +21,8 @@ def count_frames(path, override=False):
 		# or may fail entirely based on your which video codecs
 		# you have installed
 		try:
-			# check if we are using OpenCV 3
-			if is_cv3():
+			# check if we are using OpenCV 3, 4, or 5
+			if is_cv3() or is_cv4() or is_cv5():
 				total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
 			# otherwise, we are using OpenCV 2.4
diff --git a/imutils/video/videostream.py b/imutils/video/videostream.py
index 4f9de0b..7422c8b 100644
--- a/imutils/video/videostream.py
+++ b/imutils/video/videostream.py
@@ -3,7 +3,7 @@
 
 class VideoStream:
 	def __init__(self, src=0, usePiCamera=False, resolution=(320, 240),
-		framerate=32, **kwargs):
+		framerate=32, retry_interval=2, timeout=10, skip_frames=0, **kwargs):
 		# check to see if the picamera module should be used
 		if usePiCamera:
 			# only import the picamera packages unless we are
@@ -20,7 +20,7 @@ def __init__(self, src=0, usePiCamera=False, resolution=(320, 240),
 		# otherwise, we are using OpenCV so initialize the webcam
 		# stream
 		else:
-			self.stream = WebcamVideoStream(src=src)
+			self.stream = WebcamVideoStream(src=src, retry_interval=retry_interval, timeout=timeout, skip_frames=skip_frames)
 
 	def start(self):
 		# start the threaded video stream
diff --git a/imutils/video/webcamvideostream.py b/imutils/video/webcamvideostream.py
index dbe8751..67594e4 100644
--- a/imutils/video/webcamvideostream.py
+++ b/imutils/video/webcamvideostream.py
@@ -1,11 +1,13 @@
 # import the necessary packages
 from threading import Thread
 import cv2
+import time
 
 class WebcamVideoStream:
-	def __init__(self, src=0, name="WebcamVideoStream"):
+	def __init__(self, src=0, name="WebcamVideoStream", retry_interval=2, timeout=10, skip_frames=0):
 		# initialize the video camera stream and read the first frame
 		# from the stream
+		self.src = src
 		self.stream = cv2.VideoCapture(src)
 		(self.grabbed, self.frame) = self.stream.read()
 
@@ -15,6 +17,14 @@ def __init__(self, src=0, name="WebcamVideoStream"):
 		# initialize the variable used to indicate if the thread should
 		# be stopped
 		self.stopped = False
+		
+		# network fault tolerance parameters
+		self.retry_interval = retry_interval  # seconds between reconnection attempts
+		self.timeout = timeout  # seconds before timing out a connection attempt
+		
+		# frame skipping parameters
+		self.skip_frames = skip_frames  # number of frames to skip between captures
+		self.frame_count = 0
 
 	def start(self):
 		# start the thread to read frames from the video stream
@@ -30,8 +40,97 @@ def update(self):
 			if self.stopped:
 				return
 
-			# otherwise, read the next frame from the stream
-			(self.grabbed, self.frame) = self.stream.read()
+			# try to read the next frame from the stream
+			try:
+				# set timeout for network streams
+				if isinstance(self.src, str) and (self.src.startswith('rtsp://') or self.src.startswith('http://')):
+					# Set capture properties for network streams
+					self.stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+					
+					# Read with timeout handling
+					start_time = time.time()
+					success = False
+					while time.time() - start_time < self.timeout:
+						# Read frame
+						(self.grabbed, frame) = self.stream.read()
+						
+						# Check if frame was grabbed
+						if self.grabbed:
+							# Handle frame skipping
+							if self.frame_count % (self.skip_frames + 1) == 0:
+								self.frame = frame
+								success = True
+							
+							# Increment frame count
+							self.frame_count += 1
+							
+							# If we have a successful frame, break
+							if success:
+								break
+						
+						# Small delay to prevent busy looping
+						time.sleep(0.01)
+					
+					if not success:
+						# Connection lost, try to reconnect
+						self._reconnect()
+						continue
+				
+				else:
+					# Regular webcam, no timeout needed
+					(self.grabbed, frame) = self.stream.read()
+					
+					# Handle frame skipping
+					if self.grabbed:
+						if self.frame_count % (self.skip_frames + 1) == 0:
+							self.frame = frame
+						
+					# Increment frame count
+					self.frame_count += 1
+				
+				# If frame not grabbed, try to reconnect
+				if not self.grabbed:
+					self._reconnect()
+					continue
+			
+			except Exception as e:
+				# Handle any exceptions, try to reconnect
+				print(f"Error reading frame: {e}")
+				self._reconnect()
+				continue
+
+	def _reconnect(self):
+		"""
+		Attempt to reconnect to the video stream
+		"""
+		print(f"Attempting to reconnect to stream {self.src}...")
+		
+		# Release the current stream
+		if self.stream.isOpened():
+			self.stream.release()
+		
+		# Wait before attempting to reconnect
+		time.sleep(self.retry_interval)
+		
+		# Try to open the stream again
+		self.stream = cv2.VideoCapture(self.src)
+		
+		# Try to read a frame to verify connection
+		connected = False
+		start_time = time.time()
+		while time.time() - start_time < self.timeout:
+			(grabbed, frame) = self.stream.read()
+			if grabbed:
+				self.grabbed = grabbed
+				self.frame = frame
+				connected = True
+				break
+			time.sleep(0.1)
+		
+		if connected:
+			print(f"Successfully reconnected to stream {self.src}")
+		else:
+			print(f"Failed to reconnect to stream {self.src}")
 
 	def read(self):
 		# return the frame most recently read
@@ -40,3 +139,6 @@ def read(self):
 	def stop(self):
 		# indicate that the thread should be stopped
 		self.stopped = True
+		# release the stream
+		if self.stream.isOpened():
+			self.stream.release()