From b3f966e3dfc9995bce05f110b5b4e599936c6643 Mon Sep 17 00:00:00 2001 From: lwk <3098293798@qq.com> Date: Thu, 12 Mar 2026 21:42:30 +0800 Subject: [PATCH] feat: apply 01-seed model changes --- imutils/__init__.py | 3 + imutils/augmentation/__init__.py | 5 ++ imutils/augmentation/augmentation.py | 107 ++++++++++++++++++++++ imutils/bbox/__init__.py | 5 ++ imutils/bbox/bbox.py | 129 +++++++++++++++++++++++++++ imutils/convenience.py | 22 ++++- imutils/feature/rootsift.py | 2 +- imutils/video/count_frames.py | 6 +- imutils/video/videostream.py | 4 +- imutils/video/webcamvideostream.py | 108 +++++++++++++++++++++- 10 files changed, 381 insertions(+), 10 deletions(-) create mode 100644 imutils/augmentation/__init__.py create mode 100644 imutils/augmentation/augmentation.py create mode 100644 imutils/bbox/__init__.py create mode 100644 imutils/bbox/bbox.py diff --git a/imutils/__init__.py b/imutils/__init__.py index e5643ca..efc6c59 100755 --- a/imutils/__init__.py +++ b/imutils/__init__.py @@ -17,7 +17,10 @@ from .convenience import is_cv2 from .convenience import is_cv3 from .convenience import is_cv4 +from .convenience import is_cv5 from .convenience import check_opencv_version from .convenience import build_montages from .convenience import adjust_brightness_contrast from .meta import find_function +from . import bbox +from . import augmentation diff --git a/imutils/augmentation/__init__.py b/imutils/augmentation/__init__.py new file mode 100644 index 0000000..1829626 --- /dev/null +++ b/imutils/augmentation/__init__.py @@ -0,0 +1,5 @@ +# author: Adrian Rosebrock +# website: http://www.pyimagesearch.com + +# import the necessary packages +from .augmentation import * diff --git a/imutils/augmentation/augmentation.py b/imutils/augmentation/augmentation.py new file mode 100644 index 0000000..2bad5b9 --- /dev/null +++ b/imutils/augmentation/augmentation.py @@ -0,0 +1,107 @@ +# author: Adrian Rosebrock +# website: http://www.pyimagesearch.com + +# import the necessary packages +import cv2 +import numpy as np + +def random_brightness_contrast(image, brightness_range=(-50, 50), contrast_range=(0.5, 1.5)): + """ + Randomly adjust brightness and contrast of an image. + + Args: + image: OpenCV BGR image + brightness_range: Tuple of (min, max) brightness adjustment + contrast_range: Tuple of (min, max) contrast adjustment + + Returns: + numpy.ndarray: Augmented image + """ + # make a copy of the image + image = image.copy() + + # randomly generate brightness and contrast values + brightness = np.random.uniform(brightness_range[0], brightness_range[1]) + contrast = np.random.uniform(contrast_range[0], contrast_range[1]) + + # apply brightness and contrast adjustment + adjusted = cv2.convertScaleAbs(image, alpha=contrast, beta=brightness) + + return adjusted + +def add_gaussian_noise(image, mean=0, std=25): + """ + Add Gaussian noise to an image. + + Args: + image: OpenCV BGR image + mean: Mean of Gaussian distribution + std: Standard deviation of Gaussian distribution + + Returns: + numpy.ndarray: Image with Gaussian noise added + """ + # make a copy of the image + image = image.copy() + + # generate Gaussian noise + noise = np.random.normal(mean, std, image.shape).astype(np.uint8) + + # add noise to image + noisy = cv2.add(image, noise) + + return noisy + +def add_salt_pepper_noise(image, salt_prob=0.02, pepper_prob=0.02): + """ + Add salt and pepper noise to an image. + + Args: + image: OpenCV BGR image + salt_prob: Probability of salt noise (white pixels) + pepper_prob: Probability of pepper noise (black pixels) + + Returns: + numpy.ndarray: Image with salt and pepper noise added + """ + # make a copy of the image + image = image.copy() + + # generate salt noise + salt_mask = np.random.random(image.shape[:2]) < salt_prob + image[salt_mask] = 255 + + # generate pepper noise + pepper_mask = np.random.random(image.shape[:2]) < pepper_prob + image[pepper_mask] = 0 + + return image + +def cutout(image, num_holes=1, max_size=0.3): + """ + Apply Cutout augmentation to an image. + + Args: + image: OpenCV BGR image + num_holes: Number of holes to cut out + max_size: Maximum size of holes as fraction of image size + + Returns: + numpy.ndarray: Image with cutout holes + """ + # make a copy of the image + image = image.copy() + + h, w = image.shape[:2] + max_hole_size = int(min(w, h) * max_size) + + for _ in range(num_holes): + # random hole size and position + hole_size = np.random.randint(1, max_hole_size) + x = np.random.randint(0, w - hole_size) + y = np.random.randint(0, h - hole_size) + + # fill hole with black + image[y:y+hole_size, x:x+hole_size] = 0 + + return image diff --git a/imutils/bbox/__init__.py b/imutils/bbox/__init__.py new file mode 100644 index 0000000..a9be41f --- /dev/null +++ b/imutils/bbox/__init__.py @@ -0,0 +1,5 @@ +# author: Adrian Rosebrock +# website: http://www.pyimagesearch.com + +# import the necessary packages +from .bbox import * diff --git a/imutils/bbox/bbox.py b/imutils/bbox/bbox.py new file mode 100644 index 0000000..bce4f40 --- /dev/null +++ b/imutils/bbox/bbox.py @@ -0,0 +1,129 @@ +# author: Adrian Rosebrock +# website: http://www.pyimagesearch.com + +# import the necessary packages +import cv2 +import numpy as np + +def iou(boxA, boxB): + """ + Calculate the Intersection over Union (IoU) of two bounding boxes. + + Args: + boxA: Bounding box in format [x1, y1, x2, y2] + boxB: Bounding box in format [x1, y1, x2, y2] + + Returns: + float: IoU value between 0 and 1 + """ + # determine the (x, y)-coordinates of the intersection rectangle + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + # compute the area of intersection rectangle + interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) + + # compute the area of both the prediction and ground-truth + # rectangles + boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) + boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) + + # compute the intersection over union by taking the intersection + # area and dividing it by the sum of prediction + ground-truth + # areas - the interesection area + iou = interArea / float(boxAArea + boxBArea - interArea) + + # return the intersection over union value + return iou + +def xywh_to_xyxy(box): + """ + Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]. + + Args: + box: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner + + Returns: + list: Bounding box in format [x1, y1, x2, y2] + """ + x, y, w, h = box + return [x, y, x + w, y + h] + +def xyxy_to_xywh(box): + """ + Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]. + + Args: + box: Bounding box in format [x1, y1, x2, y2] + + Returns: + list: Bounding box in format [x, y, w, h] where (x, y) is the top-left corner + """ + x1, y1, x2, y2 = box + return [x1, y1, x2 - x1, y2 - y1] + +def draw_bbox(image, box, label=None, color=(0, 255, 0), thickness=2, font_scale=0.5): + """ + Draw a bounding box with optional label on an image. + + Args: + image: OpenCV image + box: Bounding box in format [x1, y1, x2, y2] + label: Optional label text + color: BGR color tuple + thickness: Line thickness + font_scale: Font scale for label + + Returns: + numpy.ndarray: Image with bounding box drawn + """ + # make a copy of the image to avoid modifying the original + image = image.copy() + + # draw the bounding box + cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, thickness) + + # if a label is provided, draw it + if label: + # get text size + (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1) + + # calculate text position (above the bounding box) + text_x = box[0] + text_y = box[1] - 10 if box[1] - 10 > text_height else box[1] + text_height + 5 + + # draw filled rectangle for text background + cv2.rectangle(image, (text_x, text_y - text_height - 5), (text_x + text_width + 5, text_y), color, -1) + + # draw text + cv2.putText(image, label, (text_x + 5, text_y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), 1) + + return image + +def scale_bbox(box, original_size, new_size): + """ + Scale a bounding box coordinates when an image is resized. + + Args: + box: Bounding box in format [x1, y1, x2, y2] + original_size: Tuple of (original_width, original_height) + new_size: Tuple of (new_width, new_height) + + Returns: + list: Scaled bounding box in format [x1, y1, x2, y2] + """ + # calculate scaling factors + width_scale = new_size[0] / original_size[0] + height_scale = new_size[1] / original_size[1] + + # scale the bounding box + scaled_box = [ + int(box[0] * width_scale), + int(box[1] * height_scale), + int(box[2] * width_scale), + int(box[3] * height_scale) + ] + + return scaled_box diff --git a/imutils/convenience.py b/imutils/convenience.py index 9704eb2..21482d0 100644 --- a/imutils/convenience.py +++ b/imutils/convenience.py @@ -62,7 +62,7 @@ def rotate_bound(image, angle): # perform the actual rotation and return the image return cv2.warpAffine(image, M, (nW, nH)) -def resize(image, width=None, height=None, inter=cv2.INTER_AREA): +def resize(image, width=None, height=None, inter=cv2.INTER_AREA, bboxes=None): # initialize the dimensions of the image to be resized and # grab the image size dim = None @@ -90,6 +90,15 @@ def resize(image, width=None, height=None, inter=cv2.INTER_AREA): # resize the image resized = cv2.resize(image, dim, interpolation=inter) + # if bounding boxes are provided, scale them too + if bboxes is not None: + from . import bbox + scaled_bboxes = [] + for box in bboxes: + scaled_box = bbox.scale_bbox(box, (w, h), dim) + scaled_bboxes.append(scaled_box) + return resized, scaled_bboxes + # return the resized image return resized @@ -207,6 +216,17 @@ def is_cv4(or_better=False): # otherwise we want to check for *strictly* OpenCV 4 return major == 4 +def is_cv5(or_better=False): + # grab the OpenCV major version number + major = get_opencv_major_version() + + # check to see if we are using *at least* OpenCV 5 + if or_better: + return major >= 5 + + # otherwise we want to check for *strictly* OpenCV 5 + return major == 5 + def get_opencv_major_version(lib=None): # if the supplied library is None, import OpenCV if lib is None: diff --git a/imutils/feature/rootsift.py b/imutils/feature/rootsift.py index 2a11abc..010d4f6 100644 --- a/imutils/feature/rootsift.py +++ b/imutils/feature/rootsift.py @@ -10,7 +10,7 @@ def __init__(self): if is_cv2(): self.extractor = cv2.DescriptorExtractor_create("SIFT") - # otherwise initialize the SIFT feature extractor for OpenCV 3+ + # otherwise initialize the SIFT feature extractor for OpenCV 3+, 4+, or 5+ else: self.extractor = cv2.xfeatures2d.SIFT_create() diff --git a/imutils/video/count_frames.py b/imutils/video/count_frames.py index d1fd41f..e4a7a98 100644 --- a/imutils/video/count_frames.py +++ b/imutils/video/count_frames.py @@ -1,5 +1,5 @@ # import the necessary packages -from ..convenience import is_cv3 +from ..convenience import is_cv3, is_cv4, is_cv5 import cv2 def count_frames(path, override=False): @@ -21,8 +21,8 @@ def count_frames(path, override=False): # or may fail entirely based on your which video codecs # you have installed try: - # check if we are using OpenCV 3 - if is_cv3(): + # check if we are using OpenCV 3, 4, or 5 + if is_cv3() or is_cv4() or is_cv5(): total = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) # otherwise, we are using OpenCV 2.4 diff --git a/imutils/video/videostream.py b/imutils/video/videostream.py index 4f9de0b..7422c8b 100644 --- a/imutils/video/videostream.py +++ b/imutils/video/videostream.py @@ -3,7 +3,7 @@ class VideoStream: def __init__(self, src=0, usePiCamera=False, resolution=(320, 240), - framerate=32, **kwargs): + framerate=32, retry_interval=2, timeout=10, skip_frames=0, **kwargs): # check to see if the picamera module should be used if usePiCamera: # only import the picamera packages unless we are @@ -20,7 +20,7 @@ def __init__(self, src=0, usePiCamera=False, resolution=(320, 240), # otherwise, we are using OpenCV so initialize the webcam # stream else: - self.stream = WebcamVideoStream(src=src) + self.stream = WebcamVideoStream(src=src, retry_interval=retry_interval, timeout=timeout, skip_frames=skip_frames) def start(self): # start the threaded video stream diff --git a/imutils/video/webcamvideostream.py b/imutils/video/webcamvideostream.py index dbe8751..67594e4 100644 --- a/imutils/video/webcamvideostream.py +++ b/imutils/video/webcamvideostream.py @@ -1,11 +1,13 @@ # import the necessary packages from threading import Thread import cv2 +import time class WebcamVideoStream: - def __init__(self, src=0, name="WebcamVideoStream"): + def __init__(self, src=0, name="WebcamVideoStream", retry_interval=2, timeout=10, skip_frames=0): # initialize the video camera stream and read the first frame # from the stream + self.src = src self.stream = cv2.VideoCapture(src) (self.grabbed, self.frame) = self.stream.read() @@ -15,6 +17,14 @@ def __init__(self, src=0, name="WebcamVideoStream"): # initialize the variable used to indicate if the thread should # be stopped self.stopped = False + + # network fault tolerance parameters + self.retry_interval = retry_interval # seconds between reconnection attempts + self.timeout = timeout # seconds before timing out a connection attempt + + # frame skipping parameters + self.skip_frames = skip_frames # number of frames to skip between captures + self.frame_count = 0 def start(self): # start the thread to read frames from the video stream @@ -30,8 +40,97 @@ def update(self): if self.stopped: return - # otherwise, read the next frame from the stream - (self.grabbed, self.frame) = self.stream.read() + # try to read the next frame from the stream + try: + # set timeout for network streams + if isinstance(self.src, str) and (self.src.startswith('rtsp://') or self.src.startswith('http://')): + # Set capture properties for network streams + self.stream.set(cv2.CAP_PROP_BUFFERSIZE, 1) + + # Read with timeout handling + start_time = time.time() + success = False + while time.time() - start_time < self.timeout: + # Read frame + (self.grabbed, frame) = self.stream.read() + + # Check if frame was grabbed + if self.grabbed: + # Handle frame skipping + if self.frame_count % (self.skip_frames + 1) == 0: + self.frame = frame + success = True + + # Increment frame count + self.frame_count += 1 + + # If we have a successful frame, break + if success: + break + + # Small delay to prevent busy looping + time.sleep(0.01) + + if not success: + # Connection lost, try to reconnect + self._reconnect() + continue + + else: + # Regular webcam, no timeout needed + (self.grabbed, frame) = self.stream.read() + + # Handle frame skipping + if self.grabbed: + if self.frame_count % (self.skip_frames + 1) == 0: + self.frame = frame + + # Increment frame count + self.frame_count += 1 + + # If frame not grabbed, try to reconnect + if not self.grabbed: + self._reconnect() + continue + + except Exception as e: + # Handle any exceptions, try to reconnect + print(f"Error reading frame: {e}") + self._reconnect() + continue + + def _reconnect(self): + """ + Attempt to reconnect to the video stream + """ + print(f"Attempting to reconnect to stream {self.src}...") + + # Release the current stream + if self.stream.isOpened(): + self.stream.release() + + # Wait before attempting to reconnect + time.sleep(self.retry_interval) + + # Try to open the stream again + self.stream = cv2.VideoCapture(self.src) + + # Try to read a frame to verify connection + connected = False + start_time = time.time() + while time.time() - start_time < self.timeout: + (grabbed, frame) = self.stream.read() + if grabbed: + self.grabbed = grabbed + self.frame = frame + connected = True + break + time.sleep(0.1) + + if connected: + print(f"Successfully reconnected to stream {self.src}") + else: + print(f"Failed to reconnect to stream {self.src}") def read(self): # return the frame most recently read @@ -40,3 +139,6 @@ def read(self): def stop(self): # indicate that the thread should be stopped self.stopped = True + # release the stream + if self.stream.isOpened(): + self.stream.release()