From 0d87c66211ab306f13c6e3a25e12cc89a7ec6ebc Mon Sep 17 00:00:00 2001 From: JairdanC Date: Sat, 8 Feb 2025 03:00:02 -0500 Subject: [PATCH 01/17] Changes to .gitignore: Personal changes to make my dev env easier to branch with, not to be merged to prod Creation of kitti_segmentation_visualizer: - Implements a rough draft for overlaying panoptic segmentation masks on KITTI images - Uses OpenCV to load images, generate masks, and create semi-transparent overlays - Provides a basic framework for verifying segmentation accuracy before augmentation - Optimization needed for real-time performance --- .gitignore | 9 ++- .../kitti_segmentation_visualizer.py | 70 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 datasets/kitti/kitti_step/kitti_segmentation_visualizer.py diff --git a/.gitignore b/.gitignore index 0598689..fab0c01 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,14 @@ results/ # Ignore datasets (you may want to keep them separate) datasets/coco/ -datasets/kitti/ +#personal change not to be added to prod, but kept in my dev branch +datasets/kitti/kitti_step +!datasets/kitti/kitti_step/kitti_segmentation_visualizer.py +datasets/kitti/raw_data + + +# Ignore zipped datasets when aquired and clean data is added to repo (prevents redownloads) +*.zip # Ignore model checkpoints and trained models *.ckpt diff --git a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py new file mode 100644 index 0000000..0400b34 --- /dev/null +++ b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py @@ -0,0 +1,70 @@ +# Panoptic Map + Image -> Mask Visualizer +# Rough draft for a segmentation visualizer that takes in an image and panoptic image, then overlays them +# This will be needed and useful for verifying that image augmentation does not cause errors in the masks + +import cv2 as cv +import numpy as np +import sys + +# Numpy array of colour constants used for mask matrix, uses BRG (bc of OpenCV), key values are set to the Semantic Labeling Convention Used in Kitti +LABEL_BGR = { + 0: np.array([128, 64, 128], dtype=np.uint8), # road + 1: np.array([244, 35, 232], dtype=np.uint8), # sidewalk + 2: np.array([70, 70, 70], dtype=np.uint8), # building + 3: np.array([102, 102, 156], dtype=np.uint8), # wall + 4: np.array([190, 153, 153], dtype=np.uint8), # fence + 5: np.array([153, 153, 153], dtype=np.uint8), # pole + 6: np.array([30, 170, 250], dtype=np.uint8), # traffic light + 7: np.array([0, 220, 220], dtype=np.uint8), # traffic sign + 8: np.array([35, 142, 107], dtype=np.uint8), # vegetation + 9: np.array([152, 251, 152], dtype=np.uint8), # terrain + 10: np.array([180, 130, 70], dtype=np.uint8), # sky + 11: np.array([60, 20, 220], dtype=np.uint8), # person + 12: np.array([0, 0, 255], dtype=np.uint8), # rider + 13: np.array([142, 0, 0], dtype=np.uint8), # car + 14: np.array([70, 0, 0], dtype=np.uint8), # truck + 15: np.array([100, 60, 0], dtype=np.uint8), # bus + 16: np.array([100, 80, 0], dtype=np.uint8), # train + 17: np.array([230, 0, 0], dtype=np.uint8), # motorcycle + 18: np.array([32, 11, 119], dtype=np.uint8), # bicycle + 255: np.array([0, 0, 0], dtype=np.uint8) # void +} + +# Takes in path of image file and returns an openCV matrix +def file_loader(path): + imgMatrix = cv.imread(path) + if (np.count_nonzero(imgMatrix) == 0): + raise RuntimeError("Image was unable to be read") + return imgMatrix + + +# Extremely slow, works for now, but if the mask is generated in real time this needs to be optimized +# Takes in image matrix of panoptic map, returns mask +def generate_mask(imgMatrix): + height, width, channels = np.shape(imgMatrix) + maskMatrix = np.zeros((height, width, channels), dtype=np.uint8) + for i in range(height): + for j in range(width): + pixel = imgMatrix[i,j] #OpenCV uses BGR values + maskMatrix[i,j] = LABEL_BGR.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8)) + return maskMatrix + +def create_overlay(imgMatrix, maskMatrix): + alpha = 0.2 + segmentationMatrix = cv.addWeighted(maskMatrix, alpha, imgMatrix, 1 - alpha, 0) + return segmentationMatrix + +image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png" +panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png" + + +imgMatrix = file_loader(image_path) +maskMatrix = file_loader(panoptic_path) +maskMatrix = generate_mask(maskMatrix) + +segmentationMask = create_overlay(imgMatrix, maskMatrix) + +cv.imshow("Display window", segmentationMask) +k = cv.waitKey(0) + + From 445e278bb43bf11393d61f3439fb75b6b4d163d6 Mon Sep 17 00:00:00 2001 From: Tygo Date: Sun, 23 Feb 2025 17:51:32 -0500 Subject: [PATCH 02/17] created detection_model.py file --- main.py | 1 + src/models/detection_model.py | 0 src/models/lane_detection.py | 4 ++-- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 src/models/detection_model.py diff --git a/main.py b/main.py index 592ea2c..77ddebb 100644 --- a/main.py +++ b/main.py @@ -28,6 +28,7 @@ def main(): num_workers=2 ) + model = nn.Sequential( nn.Flatten(), nn.Linear(3*224*224, 100), diff --git a/src/models/detection_model.py b/src/models/detection_model.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/lane_detection.py b/src/models/lane_detection.py index f7f070a..e328a9c 100644 --- a/src/models/lane_detection.py +++ b/src/models/lane_detection.py @@ -39,9 +39,9 @@ # Canny edge detection, black and white representation of edges in a frame canny = cv2.Canny(grayFrame, 100, 200) cv2.imshow("Canny edge detection", canny) - + # Display the resulting frame - #cv2.imshow('F1tenth Onboard Video', frame) + # cv2.imshow('F1tenth Onboard Video', frame) # define q as the exit button if cv2.waitKey(25) & 0xFF == ord('q'): From 862c85b0849707ff6b2bc0bc9468e5a105a901fa Mon Sep 17 00:00:00 2001 From: JairdanC Date: Tue, 25 Feb 2025 19:55:17 -0500 Subject: [PATCH 03/17] Adding the segmentation visualizer for the kitti step date set to utils --- utils/kitti_seg_visualizer.py | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 utils/kitti_seg_visualizer.py diff --git a/utils/kitti_seg_visualizer.py b/utils/kitti_seg_visualizer.py new file mode 100644 index 0000000..e485016 --- /dev/null +++ b/utils/kitti_seg_visualizer.py @@ -0,0 +1,81 @@ +import cv2 as cv +import numpy as np + +# Dictionary mapping KITTI semantic labels to their corresponding BGR color values. +# Note: OpenCV uses BGR format; the label index is assumed to be encoded in the red channel. +KITTI_LABEL_COLORS = { + 0: np.array([128, 64, 128], dtype=np.uint8), # road + 1: np.array([244, 35, 232], dtype=np.uint8), # sidewalk + 2: np.array([70, 70, 70], dtype=np.uint8), # building + 3: np.array([102, 102, 156], dtype=np.uint8), # wall + 4: np.array([190, 153, 153], dtype=np.uint8), # fence + 5: np.array([153, 153, 153], dtype=np.uint8), # pole + 6: np.array([30, 170, 250], dtype=np.uint8), # traffic light + 7: np.array([0, 220, 220], dtype=np.uint8), # traffic sign + 8: np.array([35, 142, 107], dtype=np.uint8), # vegetation + 9: np.array([152, 251, 152], dtype=np.uint8), # terrain + 10: np.array([180, 130, 70], dtype=np.uint8), # sky + 11: np.array([60, 20, 220], dtype=np.uint8), # person + 12: np.array([0, 0, 255], dtype=np.uint8), # rider + 13: np.array([142, 0, 0], dtype=np.uint8), # car + 14: np.array([70, 0, 0], dtype=np.uint8), # truck + 15: np.array([100, 60, 0], dtype=np.uint8), # bus + 16: np.array([100, 80, 0], dtype=np.uint8), # train + 17: np.array([230, 0, 0], dtype=np.uint8), # motorcycle + 18: np.array([32, 11, 119], dtype=np.uint8), # bicycle + 255: np.array([0, 0, 0], dtype=np.uint8) # void +} + +def load_image(path): + + #Loads an image from the given file path using OpenCV. + #Raises a RuntimeError if the image cannot be read. + + image = cv.imread(path) + if image is None or np.count_nonzero(image) == 0: + raise RuntimeError("Image at path '{}' was unable to be read".format(path)) + return image + +def generate_mask(panoptic_image): + + #Generates a segmentation mask from the panoptic image. + #For each pixel in the panoptic image, the red channel value (index 2) is used as a key + #to retrieve the corresponding BGR color from KITTI_LABEL_COLORS. + #If the label is not found, it defaults to black. + #Note: This implementation is slow and may need optimization for real-time applications. + + height, width, channels = panoptic_image.shape + mask = np.zeros((height, width, channels), dtype=np.uint8) + for row in range(height): + for col in range(width): + pixel = panoptic_image[row, col] # OpenCV uses BGR; label is in red channel + mask[row, col] = KITTI_LABEL_COLORS.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8)) + return mask + +def create_overlay(image, mask): + + #Creates an overlay by blending the original image with the segmentation mask. + #The blending factor (alpha) determines the transparency of the mask. + + alpha = 0.5 + overlay = cv.addWeighted(mask, alpha, image, 1 - alpha, 0) + return overlay + +# Absolute path to the input image and its corresponding panoptic map. +image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png" +panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png" + +# Load the original image and panoptic map. +image = load_image(image_path) +panoptic_image = load_image(panoptic_path) + +# Generate the segmentation mask and create an overlay. +mask = generate_mask(panoptic_image) +segmentation_overlay = create_overlay(image, mask) + +# Display the resulting overlay. +cv.imshow("Segmentation Overlay", segmentation_overlay) +cv.waitKey(0) +cv.destroyAllWindows() + + From 1a7ed7d52005967a37f5e2516a53c239249a3071 Mon Sep 17 00:00:00 2001 From: JairdanC Date: Tue, 25 Feb 2025 20:15:08 -0500 Subject: [PATCH 04/17] removed duplicate file --- .../kitti_segmentation_visualizer.py | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 datasets/kitti/kitti_step/kitti_segmentation_visualizer.py diff --git a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py deleted file mode 100644 index 0400b34..0000000 --- a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py +++ /dev/null @@ -1,70 +0,0 @@ -# Panoptic Map + Image -> Mask Visualizer -# Rough draft for a segmentation visualizer that takes in an image and panoptic image, then overlays them -# This will be needed and useful for verifying that image augmentation does not cause errors in the masks - -import cv2 as cv -import numpy as np -import sys - -# Numpy array of colour constants used for mask matrix, uses BRG (bc of OpenCV), key values are set to the Semantic Labeling Convention Used in Kitti -LABEL_BGR = { - 0: np.array([128, 64, 128], dtype=np.uint8), # road - 1: np.array([244, 35, 232], dtype=np.uint8), # sidewalk - 2: np.array([70, 70, 70], dtype=np.uint8), # building - 3: np.array([102, 102, 156], dtype=np.uint8), # wall - 4: np.array([190, 153, 153], dtype=np.uint8), # fence - 5: np.array([153, 153, 153], dtype=np.uint8), # pole - 6: np.array([30, 170, 250], dtype=np.uint8), # traffic light - 7: np.array([0, 220, 220], dtype=np.uint8), # traffic sign - 8: np.array([35, 142, 107], dtype=np.uint8), # vegetation - 9: np.array([152, 251, 152], dtype=np.uint8), # terrain - 10: np.array([180, 130, 70], dtype=np.uint8), # sky - 11: np.array([60, 20, 220], dtype=np.uint8), # person - 12: np.array([0, 0, 255], dtype=np.uint8), # rider - 13: np.array([142, 0, 0], dtype=np.uint8), # car - 14: np.array([70, 0, 0], dtype=np.uint8), # truck - 15: np.array([100, 60, 0], dtype=np.uint8), # bus - 16: np.array([100, 80, 0], dtype=np.uint8), # train - 17: np.array([230, 0, 0], dtype=np.uint8), # motorcycle - 18: np.array([32, 11, 119], dtype=np.uint8), # bicycle - 255: np.array([0, 0, 0], dtype=np.uint8) # void -} - -# Takes in path of image file and returns an openCV matrix -def file_loader(path): - imgMatrix = cv.imread(path) - if (np.count_nonzero(imgMatrix) == 0): - raise RuntimeError("Image was unable to be read") - return imgMatrix - - -# Extremely slow, works for now, but if the mask is generated in real time this needs to be optimized -# Takes in image matrix of panoptic map, returns mask -def generate_mask(imgMatrix): - height, width, channels = np.shape(imgMatrix) - maskMatrix = np.zeros((height, width, channels), dtype=np.uint8) - for i in range(height): - for j in range(width): - pixel = imgMatrix[i,j] #OpenCV uses BGR values - maskMatrix[i,j] = LABEL_BGR.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8)) - return maskMatrix - -def create_overlay(imgMatrix, maskMatrix): - alpha = 0.2 - segmentationMatrix = cv.addWeighted(maskMatrix, alpha, imgMatrix, 1 - alpha, 0) - return segmentationMatrix - -image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png" -panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png" - - -imgMatrix = file_loader(image_path) -maskMatrix = file_loader(panoptic_path) -maskMatrix = generate_mask(maskMatrix) - -segmentationMask = create_overlay(imgMatrix, maskMatrix) - -cv.imshow("Display window", segmentationMask) -k = cv.waitKey(0) - - From cf12ad82b6516be76a8498e937017493a9482d03 Mon Sep 17 00:00:00 2001 From: Jairdan Chopra Date: Tue, 25 Feb 2025 20:18:19 -0500 Subject: [PATCH 05/17] Update .gitignore --- .gitignore | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.gitignore b/.gitignore index 6c6ce00..b6b9905 100644 --- a/.gitignore +++ b/.gitignore @@ -14,21 +14,8 @@ __pycache__/ logs/ results/ # Ignore datasets (you may want to keep them separate) -<<<<<<< HEAD -datasets/coco/ -#personal change not to be added to prod, but kept in my dev branch -datasets/kitti/kitti_step -!datasets/kitti/kitti_step/kitti_segmentation_visualizer.py -datasets/kitti/raw_data - - -# Ignore zipped datasets when aquired and clean data is added to repo (prevents redownloads) -*.zip - -======= coco/ google-cloud-sdk/ ->>>>>>> dev # Ignore model checkpoints and trained models *.ckpt *.pth From 86088ab80822717f1b4ad595415350986041620f Mon Sep 17 00:00:00 2001 From: Guojia La Date: Tue, 25 Feb 2025 20:51:42 -0500 Subject: [PATCH 06/17] Imported dataloader to detection_model.py and created loader --- src/models/detection_model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/models/detection_model.py b/src/models/detection_model.py index e69de29..749b6a1 100644 --- a/src/models/detection_model.py +++ b/src/models/detection_model.py @@ -0,0 +1,6 @@ +import datasets.data_loader as dataloader + +loader = dataloader.create_loader("datasets/stop_signs_dataset") + + + From 3e9b9f40fc9408b1c2806ca9182a52561106056e Mon Sep 17 00:00:00 2001 From: Tygo Date: Wed, 26 Feb 2025 12:41:02 -0500 Subject: [PATCH 07/17] detection model iteration 1 --- src/models/detection_model.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/models/detection_model.py b/src/models/detection_model.py index e69de29..217f48a 100644 --- a/src/models/detection_model.py +++ b/src/models/detection_model.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +# we want 7 layers - conv1, pool1, relu1, conv2, pool2, relu2, fc1 (fully connected) +class SmallCNN(nn.Module): + def __init__(self) -> None: + super().__init__(self) + self.flatten = nn.Flatten() + self.conv1 = nn.Conv2d() + self.stop_sign_detection = nn.Sequential( + # we want 3 channels because our input size is 128x128x3 + # 64 features, square kernel of size 3 + nn.functional.conv2d(3, 64, 3, 3), + nn.pool2d(kernel_size=2,stride=2), + nn.ReLU(), + nn.functional.Conv2d(3, 32, 3, 3), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.ReLU(), + nn.Linear(9216, 32) + nn.Linear(32,2) + ) + + def forward(): + +# detection_model = SmallCNN() From acd47c1aa9ef54fb32e3f5e48f4b0e6aed25f024 Mon Sep 17 00:00:00 2001 From: Zayan Date: Thu, 27 Feb 2025 13:00:40 -0500 Subject: [PATCH 08/17] Updated data_loader.py with dataset preprocessing --- datasets/data_loader.py | 107 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/datasets/data_loader.py b/datasets/data_loader.py index 1720a45..fc59b86 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -1,8 +1,110 @@ """ -Encapsulates DataLoader-related logic, including splitting and parallel loading. +Encapsulates DataLoader-related logic, including splitting and parallel loading and transformations. """ import torch -from torch.utils.data import DataLoader, random_split +import cv2 +import numpy as np +import torchvision.transforms as transforms +from PIL import Image +from torch.utils.data import DataLoader, random_split, Dataset + +def apply_cv2_transforms(img): + """ + Applies OpenCV-based augmentations dynamically when images are loaded. + :param img: PIL Image (converted to OpenCV format) + :return: Transformed PIL Image + """ + img = np.array(img) # Convert PIL image to OpenCV format (NumPy array) + + # **Brightness & Contrast Adjustments** + alpha = np.random.uniform(0.8, 1.5) # Contrast factor + beta = np.random.randint(-30, 30) # Brightness offset + img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta) + + # **Saturation & Hue Adjustments** + img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) # Convert to HSV + img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.7, 1.3) # Modify saturation + img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-10, 10) # Modify hue + img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) # Convert back to RGB + + # **Gaussian Blur & Motion Blur** + if np.random.rand() < 0.3: + img = cv2.GaussianBlur(img, (5, 5), 0) # Simulate blur + + if np.random.rand() < 0.2: + kernel_size = 3 + kernel_motion_blur = np.zeros((kernel_size, kernel_size)) + kernel_motion_blur[int((kernel_size - 1) / 2), :] = np.ones(kernel_size) + kernel_motion_blur = kernel_motion_blur / kernel_size + img = cv2.filter2D(img, -1, kernel_motion_blur) + + # **JPEG Compression & Gaussian Noise** + if np.random.rand() < 0.3: + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(30, 90)] + _, enc_img = cv2.imencode('.jpg', img, encode_param) + img = cv2.imdecode(enc_img, cv2.IMREAD_UNCHANGED) + + if np.random.rand() < 0.3: + noise = np.random.normal(0, 10, img.shape).astype(np.uint8) + img = cv2.add(img, noise) + + # **Selective Red Boosting** + img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + img_hsv[..., 1] = img_hsv[..., 1] * 1.1 # Slightly boost red saturation + img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) + + # **Weather Simulation (Fog & Shadows)** + if np.random.rand() < 0.2: + fog_intensity = np.random.uniform(0.3, 0.7) + fog = np.full_like(img, 255, dtype=np.uint8) + img = cv2.addWeighted(img, 1 - fog_intensity, fog, fog_intensity, 0) + + if np.random.rand() < 0.2: + h, w, _ = img.shape + shadow = np.random.uniform(0.3, 0.7, (h, w, 3)) * 255 + img = cv2.addWeighted(img, 1, shadow.astype(np.uint8), -0.5, 0) + + return Image.fromarray(img) # Convert back to PIL format + +# ========== APPLY TRANSFORMATION PIPELINE ========== +def get_stop_sign_transforms(): + """ + Returns the best transformation pipeline for dynamic augmentation during training. + """ + return transforms.Compose([ + transforms.Lambda(lambda img: apply_cv2_transforms(img)), # Apply OpenCV-based augmentations dynamically + + # Torchvision Transformations + transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1), + transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ]) + +# ========== MODIFIED DATASET CLASS ========== +class StopSignDataset(Dataset): + """ + Custom dataset class for Stop Sign detection that returns both the original and augmented images. + """ + def __init__(self, image_paths, labels, transform=None): + self.image_paths = image_paths # List of image file paths + self.labels = labels # Corresponding labels + self.transform = transform # Transformation pipeline + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image = Image.open(self.image_paths[idx]).convert("RGB") # Load image + label = self.labels[idx] # Get label + + original_image = transforms.ToTensor()(image) # Convert original image to tensor + + augmented_image = self.transform(image) if self.transform else original_image # Apply augmentation + + return original_image, augmented_image, label # Return both images and label + def create_dataloader(dataset, batch_size=8, shuffle=True, num_workers=4): """ @@ -44,6 +146,7 @@ def create_train_val_test_loaders(dataset, batch_size=8, train_ratio=0.7, test_r Combines dataset splitting and DataLoader creation. :return: (train_loader, test_loader, val_loader) """ + dataset = StopSignDataset(image_paths=image_paths, labels=labels, transform=get_stop_sign_transforms()) train_dataset, test_dataset, val_dataset = split_dataset(dataset, train_ratio, test_ratio) train_loader = create_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_loader = create_dataloader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) From 991a650286d7dc0a0bcb4c599245b5e460b183d4 Mon Sep 17 00:00:00 2001 From: Zayan Date: Thu, 27 Feb 2025 13:23:30 -0500 Subject: [PATCH 09/17] Updated .gitignore to ignore WEAP_CV/ --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b6b9905..8114324 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ + # Ignore the virtual environment venv/ - +WEAP_CV/ # Ignore Python cache files __pycache__/ *.pyc From 0c237dc84f06664d7cf0b7a9db75cbce8704602a Mon Sep 17 00:00:00 2001 From: Zayan Date: Thu, 27 Feb 2025 16:18:29 -0500 Subject: [PATCH 10/17] Updated dataset pipeline with augmentations --- datasets/data_loader.py | 84 ++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/datasets/data_loader.py b/datasets/data_loader.py index fc59b86..536e747 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -8,6 +8,7 @@ from PIL import Image from torch.utils.data import DataLoader, random_split, Dataset + def apply_cv2_transforms(img): """ Applies OpenCV-based augmentations dynamically when images are loaded. @@ -16,96 +17,69 @@ def apply_cv2_transforms(img): """ img = np.array(img) # Convert PIL image to OpenCV format (NumPy array) - # **Brightness & Contrast Adjustments** - alpha = np.random.uniform(0.8, 1.5) # Contrast factor - beta = np.random.randint(-30, 30) # Brightness offset + alpha = np.random.uniform(0.9, 1.2) # Reduced contrast variation + beta = np.random.randint(-15, 15) # Reduced brightness shift img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta) - # **Saturation & Hue Adjustments** - img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) # Convert to HSV - img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.7, 1.3) # Modify saturation - img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-10, 10) # Modify hue - img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) # Convert back to RGB - - # **Gaussian Blur & Motion Blur** - if np.random.rand() < 0.3: - img = cv2.GaussianBlur(img, (5, 5), 0) # Simulate blur + img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.85, 1.15) + img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-5, 5) + img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) if np.random.rand() < 0.2: + img = cv2.GaussianBlur(img, (3, 3), 0) + + if np.random.rand() < 0.15: kernel_size = 3 kernel_motion_blur = np.zeros((kernel_size, kernel_size)) kernel_motion_blur[int((kernel_size - 1) / 2), :] = np.ones(kernel_size) kernel_motion_blur = kernel_motion_blur / kernel_size img = cv2.filter2D(img, -1, kernel_motion_blur) - # **JPEG Compression & Gaussian Noise** - if np.random.rand() < 0.3: - encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(30, 90)] + if np.random.rand() < 0.2: + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(50, 90)] _, enc_img = cv2.imencode('.jpg', img, encode_param) img = cv2.imdecode(enc_img, cv2.IMREAD_UNCHANGED) - if np.random.rand() < 0.3: - noise = np.random.normal(0, 10, img.shape).astype(np.uint8) - img = cv2.add(img, noise) - - # **Selective Red Boosting** - img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) - img_hsv[..., 1] = img_hsv[..., 1] * 1.1 # Slightly boost red saturation - img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) - - # **Weather Simulation (Fog & Shadows)** - if np.random.rand() < 0.2: - fog_intensity = np.random.uniform(0.3, 0.7) - fog = np.full_like(img, 255, dtype=np.uint8) - img = cv2.addWeighted(img, 1 - fog_intensity, fog, fog_intensity, 0) - if np.random.rand() < 0.2: - h, w, _ = img.shape - shadow = np.random.uniform(0.3, 0.7, (h, w, 3)) * 255 - img = cv2.addWeighted(img, 1, shadow.astype(np.uint8), -0.5, 0) + noise = np.random.normal(0, 5, img.shape).astype(np.uint8) + img = cv2.add(img, noise) - return Image.fromarray(img) # Convert back to PIL format + return Image.fromarray(img) -# ========== APPLY TRANSFORMATION PIPELINE ========== def get_stop_sign_transforms(): """ - Returns the best transformation pipeline for dynamic augmentation during training. + Returns the transformation pipeline for dynamic augmentation during training. """ return transforms.Compose([ - transforms.Lambda(lambda img: apply_cv2_transforms(img)), # Apply OpenCV-based augmentations dynamically - - # Torchvision Transformations - transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1), - transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)), - transforms.RandomHorizontalFlip(p=0.5), + transforms.Lambda(lambda img: apply_cv2_transforms(img)), + transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.05), + transforms.RandomAffine(degrees=7, translate=(0.05, 0.05), scale=(0.95, 1.05)), + transforms.RandomHorizontalFlip(p=0.3), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) -# ========== MODIFIED DATASET CLASS ========== class StopSignDataset(Dataset): """ - Custom dataset class for Stop Sign detection that returns both the original and augmented images. + Custom dataset class for Stop Sign detection that applies transformations during training. """ def __init__(self, image_paths, labels, transform=None): - self.image_paths = image_paths # List of image file paths - self.labels = labels # Corresponding labels - self.transform = transform # Transformation pipeline + self.image_paths = image_paths + self.labels = labels + self.transform = transform def __len__(self): return len(self.image_paths) def __getitem__(self, idx): - image = Image.open(self.image_paths[idx]).convert("RGB") # Load image - label = self.labels[idx] # Get label + image = Image.open(self.image_paths[idx]).convert("RGB") + label = self.labels[idx] - original_image = transforms.ToTensor()(image) # Convert original image to tensor + augmented_image = self.transform(image) if self.transform else transforms.ToTensor()(image) - augmented_image = self.transform(image) if self.transform else original_image # Apply augmentation - - return original_image, augmented_image, label # Return both images and label + return augmented_image, label # Only return augmented image and label - def create_dataloader(dataset, batch_size=8, shuffle=True, num_workers=4): """ Creates and returns a DataLoader @@ -141,7 +115,7 @@ def split_dataset(dataset, train_ratio=0.7, test_ratio=0.15, seed=42): torch.manual_seed(seed) return random_split(dataset, [train_size, test_size, val_size]) -def create_train_val_test_loaders(dataset, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4): +def create_train_val_test_loaders(image_paths, labels, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4): """ Combines dataset splitting and DataLoader creation. :return: (train_loader, test_loader, val_loader) From 7be75fc16328d6bcbc9ab70fdf2d9b0eb67d8e9c Mon Sep 17 00:00:00 2001 From: Tygo Date: Thu, 27 Feb 2025 20:30:38 -0500 Subject: [PATCH 11/17] fix indent --- src/models/detection_model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/models/detection_model.py b/src/models/detection_model.py index 217f48a..b2f9f3c 100644 --- a/src/models/detection_model.py +++ b/src/models/detection_model.py @@ -16,10 +16,8 @@ def __init__(self) -> None: nn.functional.Conv2d(3, 32, 3, 3), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), - nn.Linear(9216, 32) + nn.Linear(9216, 32), nn.Linear(32,2) ) - - def forward(): # detection_model = SmallCNN() From 6550f12c3a14f8dc00ce9a63004ee2aab08c0276 Mon Sep 17 00:00:00 2001 From: Tygo Date: Fri, 7 Mar 2025 14:21:19 -0500 Subject: [PATCH 12/17] cleanup + review --- datasets/data_loader.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datasets/data_loader.py b/datasets/data_loader.py index 536e747..d057223 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -15,11 +15,9 @@ def apply_cv2_transforms(img): :param img: PIL Image (converted to OpenCV format) :return: Transformed PIL Image """ + img = np.array(img) # Convert PIL image to OpenCV format (NumPy array) - - alpha = np.random.uniform(0.9, 1.2) # Reduced contrast variation - beta = np.random.randint(-15, 15) # Reduced brightness shift - img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta) + img = cv2.convertScaleAbs(img, alpha=np.random.uniform(0.9, 1.2), beta=np.random.randint(-15, 15)) # Reduced contrast variation + Reduced brightness shift img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.85, 1.15) From a72603a84a6e4f007c8842b7c7f62926a1ba066f Mon Sep 17 00:00:00 2001 From: Tygo Date: Fri, 7 Mar 2025 18:37:44 -0500 Subject: [PATCH 13/17] finish first iteration of model --- src/models/detection_model.py | 151 ++++++++++++++++++++++++++++++---- 1 file changed, 137 insertions(+), 14 deletions(-) diff --git a/src/models/detection_model.py b/src/models/detection_model.py index b2f9f3c..e2067fe 100644 --- a/src/models/detection_model.py +++ b/src/models/detection_model.py @@ -1,23 +1,146 @@ import torch import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import torch.backends.cudnn as cudnn +import numpy as np -# we want 7 layers - conv1, pool1, relu1, conv2, pool2, relu2, fc1 (fully connected) +''' +Define a small CNN. +We want 8 layers - conv1, batch + poo1, relu1, conv2, batch + pool2, relu2, fc1, fc2 +Split up the two parts 1.Model 2.Classifier +''' class SmallCNN(nn.Module): - def __init__(self) -> None: - super().__init__(self) - self.flatten = nn.Flatten() - self.conv1 = nn.Conv2d() - self.stop_sign_detection = nn.Sequential( - # we want 3 channels because our input size is 128x128x3 - # 64 features, square kernel of size 3 - nn.functional.conv2d(3, 64, 3, 3), - nn.pool2d(kernel_size=2,stride=2), + def __init__(self, num_classes=2, input_size=128) -> None: + super().__init__() + + # calculate size after convolution and pooling + feature_size = ((input_size - 2) // 2 - 2) // 2 + # Calculate the flattened feature size + self.flat_features = 64 * feature_size * feature_size + + # Small CNN part + self.stop_sign_cnn = nn.Sequential( + # First conv block + nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=0), # 64 features/outputs, input channels = 3 (RGB), square kernel of size 3, + nn.BatchNorm2d(32), + nn.MaxPool2d(kernel_size=2,stride=2), nn.ReLU(), - nn.functional.Conv2d(3, 32, 3, 3), + + # Second conv block + nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0), + nn.BatchNorm2d(64), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), - nn.Linear(9216, 32), - nn.Linear(32,2) + + # Dropout for regularization to avoid overfitting + nn.Dropout2d(0.20) + ) + + # Fully connected part + self.stop_sign_classifier = nn.Sequential( + nn.Flatten(), + nn.Linear(self.flat_features, 128), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(128, num_classes) ) -# detection_model = SmallCNN() + def forward(self, x): + x = self.stop_sign_cnn(x) + x = self.stop_sign_classifier(x) + return x + +# Create the model +detection_model = SmallCNN() + +# Check if GPU is available +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +detection_model.to(device) + +# define a loss function and optimizer +loss_fcn = nn.CrossEntropyLoss() +optimizer = optim.Adam(detection_model.parameters(), lr=0.001, betas=(0.5, 0.999), weight_decay=1e-4) # weigh decay for regularization + +# learning rate scheduler +scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1) + +def train_model(model, train_loader, val_loader, num_epochs = 25, ): + best = 0.0 # holds best accuracy + + for epoch in range(num_epochs): + print(f'Epoch {epoch + 1}/{num_epochs}') + print('-' * 10) + running_loss = 0.0 + + model.train() + running_loss = 0.0 + running_corrects = 0 + + for inputs, labels in train_loader: + print('.', end='', flush=True) + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward pass + outputs = model(inputs) + _, preds = torch.max(outputs, 1) # outputs is a tensor of shape [batch_size,2] and torch.max igores the first value + loss = loss_fcn(outputs, labels) + + # backward pass and optimize + loss.backward() + optimizer.step() + + # loss and corrects + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + + epoch_loss = running_loss / len(train_loader.dataset) + epoch_acc = running_corrects.double() / len(train_loader.dataset) + + print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') + + # Validation phase + model.eval() # Set model to evaluate mode + val_loss = 0.0 + val_corrects = 0 + + # No gradient calculation needed for validation + with torch.no_grad(): + for inputs, labels in val_loader: + inputs = inputs.to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = loss_fcn(outputs, labels) + + # Statistics + val_loss += loss.item() * inputs.size(0) + val_corrects += torch.sum(preds == labels.data) + + val_loss = val_loss / len(val_loader.dataset) + val_acc = val_corrects.double() / len(val_loader.dataset) + + print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}') + + # Update learning rate based on validation loss + scheduler.step(val_loss) + + # Save the best model + if val_acc > best_acc: + best_acc = val_acc + torch.save(model.state_dict(), 'best_model.pth') + print(f'New best model saved with accuracy: {val_acc:.4f}') + + print(f'Best validation accuracy: {best_acc:.4f}') + return model + + + + + From e892902319a80c48fd7399e56ccb72045fdf8327 Mon Sep 17 00:00:00 2001 From: TCrawley11 Date: Wed, 12 Mar 2025 14:50:13 -0400 Subject: [PATCH 14/17] Added Ubuntu Environment, start lidar and cam fusion --- src/3D/cam_to_lidar.py | 6 ++ src/models/detection_model.py | 2 +- ubuntuEnv.yml | 139 ++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 src/3D/cam_to_lidar.py create mode 100644 ubuntuEnv.yml diff --git a/src/3D/cam_to_lidar.py b/src/3D/cam_to_lidar.py new file mode 100644 index 0000000..3a4ede2 --- /dev/null +++ b/src/3D/cam_to_lidar.py @@ -0,0 +1,6 @@ +import numpy as np + +# draw center line, +center_coord = [0,0] + +# get some test pics from KITTI \ No newline at end of file diff --git a/src/models/detection_model.py b/src/models/detection_model.py index e2067fe..a1bdbe2 100644 --- a/src/models/detection_model.py +++ b/src/models/detection_model.py @@ -101,7 +101,7 @@ def train_model(model, train_loader, val_loader, num_epochs = 25, ): epoch_loss = running_loss / len(train_loader.dataset) epoch_acc = running_corrects.double() / len(train_loader.dataset) - print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') + print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') # Validation phase model.eval() # Set model to evaluate mode diff --git a/ubuntuEnv.yml b/ubuntuEnv.yml new file mode 100644 index 0000000..be5dcd2 --- /dev/null +++ b/ubuntuEnv.yml @@ -0,0 +1,139 @@ +name: ubuntuEnv +channels: + - defaults + - conda-forge + - https://repo.anaconda.com/pkgs/main + - https://repo.anaconda.com/pkgs/r +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - aom=3.6.0=h6a678d5_0 + - blas=1.1=openblas + - brotli-python=1.0.9=py310hd8f1fbe_7 + - bzip2=1.0.8=h5eee18b_6 + - c-ares=1.19.1=h5eee18b_0 + - ca-certificates=2025.2.25=h06a4308_0 + - cairo=1.16.0=hb05425b_5 + - certifi=2025.1.31=pyhd8ed1ab_0 + - cffi=1.15.0=py310h0fdd8cc_0 + - charset-normalizer=3.4.1=pyhd8ed1ab_0 + - colorama=0.4.6=pyhd8ed1ab_1 + - cpython=3.10.16=py310hd8ed1ab_1 + - cyrus-sasl=2.1.28=h52b45da_1 + - dav1d=1.2.1=h5eee18b_0 + - dbus=1.13.18=hb2f20db_0 + - eigen=3.4.0=h4bd325d_0 + - expat=2.6.4=h6a678d5_0 + - ffmpeg=6.1.1=h2a67f75_2 + - filelock=3.17.0=pyhd8ed1ab_0 + - fontconfig=2.14.1=h55d465d_3 + - freetype=2.10.4=h0708190_1 + - fsspec=2025.3.0=pyhd8ed1ab_0 + - giflib=5.2.1=h36c2ea0_2 + - glib=2.78.4=h6a678d5_0 + - glib-tools=2.78.4=h6a678d5_0 + - gmp=6.2.1=h58526e2_0 + - gmpy2=2.1.2=py310h92f7908_0 + - graphite2=1.3.14=h295c915_1 + - gst-plugins-base=1.14.1=h6a678d5_1 + - gstreamer=1.14.1=h5eee18b_1 + - h2=4.2.0=pyhd8ed1ab_0 + - harfbuzz=10.2.0=hf296adc_0 + - hdf5=1.14.5=h2b7332f_2 + - hpack=4.1.0=pyhd8ed1ab_0 + - hyperframe=6.1.0=pyhd8ed1ab_0 + - icu=73.1=h6a678d5_0 + - idna=3.10=pyhd8ed1ab_1 + - jinja2=3.1.6=pyhd8ed1ab_0 + - jpeg=9e=h166bdaf_1 + - kagglehub=0.3.8=pyhd8ed1ab_0 + - krb5=1.20.1=h143b758_1 + - lame=3.100=h7f98852_1001 + - lcms2=2.16=hb9589c4_0 + - ld_impl_linux-64=2.40=h12ee557_0 + - leptonica=1.82.0=h42c8aad_2 + - lerc=4.0.0=h6a678d5_0 + - libabseil=20240116.2=cxx17_h6a678d5_0 + - libarchive=3.7.7=hfab0078_0 + - libblas=3.9.0=16_linux64_openblas + - libcblas=3.9.0=16_linux64_openblas + - libclang=14.0.6=default_hc6dbbc7_2 + - libclang13=14.0.6=default_he11475f_2 + - libcups=2.4.2=h2d74bed_1 + - libcurl=8.12.1=hc9e6f67_0 + - libdeflate=1.22=h5eee18b_0 + - libedit=3.1.20230828=h5eee18b_0 + - libev=4.33=h516909a_1 + - libffi=3.4.4=h6a678d5_1 + - libgcc-ng=11.2.0=h1234567_1 + - libgfortran=3.0.0=1 + - libgfortran-ng=13.2.0=h69a702a_0 + - libgfortran5=13.2.0=ha4646dd_0 + - libglib=2.78.4=hdc74915_0 + - libgomp=11.2.0=h1234567_1 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_openblas + - libllvm14=14.0.6=hecde1de_4 + - libnghttp2=1.57.0=h2d74bed_0 + - libogg=1.3.4=h7f98852_1 + - libopenblas=0.3.21=h043d6bf_0 + - libopus=1.3.1=h7f98852_1 + - libpng=1.6.39=h5eee18b_0 + - libpq=17.4=hdbd6064_0 + - libprotobuf=4.25.3=he621ea3_0 + - libssh2=1.11.1=h251f7ec_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libtheora=1.1.1=h7f98852_1005 + - libtiff=4.5.1=hffd6297_1 + - libuuid=1.41.5=h5eee18b_0 + - libvorbis=1.3.7=h9c3ff4c_0 + - libvpx=1.13.1=h6a678d5_0 + - libwebp=1.3.2=h11a3e52_0 + - libwebp-base=1.3.2=h5eee18b_1 + - libxcb=1.15=h7f8727e_0 + - libxkbcommon=1.0.3=he3ba5ed_0 + - libxml2=2.13.5=hfdd30dd_0 + - lz4-c=1.9.4=h6a678d5_1 + - markupsafe=2.1.1=py310h5764c6d_1 + - mpc=1.2.1=h9f54685_0 + - mpfr=4.1.0=h9202a9a_1 + - mpmath=1.3.0=pyhd8ed1ab_1 + - mysql=8.4.0=h29a9f33_1 + - ncurses=6.4=h6a678d5_0 + - networkx=3.4=pyhd8ed1ab_0 + - numpy=1.22.3=py310h4ef5377_2 + - openblas=0.3.4=ha44fe06_0 + - opencv=4.10.0=py310h2484693_2 + - openh264=2.1.1=h780b84a_0 + - openjpeg=2.5.2=he7f1fd0_0 + - openldap=2.6.4=h42fbc30_0 + - openssl=3.0.16=h5eee18b_0 + - packaging=24.2=pyhd8ed1ab_2 + - pcre2=10.42=hebb0a14_1 + - pillow=11.1.0=py310hcea889d_0 + - pip=25.0=py310h06a4308_0 + - pixman=0.40.0=h36c2ea0_0 + - pycparser=2.22=pyh29332c3_1 + - pysocks=1.7.1=pyha55dd90_7 + - python=3.10.16=he870216_1 + - python_abi=3.10=2_cp310 + - pytorch=2.3.0=cpu_py310h1ce4368_1 + - qt-main=5.15.2=hb6262e9_12 + - readline=8.2=h5eee18b_0 + - requests=2.32.3=pyhd8ed1ab_1 + - setuptools=75.8.0=py310h06a4308_0 + - sqlite=3.45.3=h5eee18b_0 + - sympy=1.13.3=pyh2585a3b_105 + - tesseract=5.2.0=h6a678d5_2 + - tk=8.6.14=h39e8969_0 + - torchvision=0.18.1=cpu_py310h54128f0_0 + - tqdm=4.67.1=pyhd8ed1ab_1 + - typing_extensions=4.12.2=pyha770c72_1 + - tzdata=2025a=h04d1e81_0 + - urllib3=2.3.0=pyhd8ed1ab_0 + - wheel=0.45.1=py310h06a4308_0 + - xz=5.6.4=h5eee18b_1 + - zlib=1.2.13=h5eee18b_1 + - zstandard=0.23.0=py310h2c38b39_1 + - zstd=1.5.6=hc292b87_0 +prefix: /home/tcrawley11/miniconda3/envs/ubuntuEnv From c96b8ccf086a270beea5c951d9059d61cc51524b Mon Sep 17 00:00:00 2001 From: TCrawley11 Date: Wed, 12 Mar 2025 16:08:39 -0400 Subject: [PATCH 15/17] Implemented script to download dataset from roboflow url --- datasets/data_loader.py | 45 +++++++++++++++++++++++++++++++++++++++++ main.py | 6 ++++++ 2 files changed, 51 insertions(+) diff --git a/datasets/data_loader.py b/datasets/data_loader.py index d057223..a2fc708 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -8,6 +8,51 @@ from PIL import Image from torch.utils.data import DataLoader, random_split, Dataset +import os +import requests +import zipfile +from tqdm import tqdm # used for progress bars in downloading the dataset + +# this function will download the unaugmented train and val datasets from roboflow +def download_roboflow_dataset(dataset_url, output_dir='stop_sign_dataset'): + """ + Download and extract dataset from Roboflow + + Args: + dataset_url: URL to download the dataset from + output_dir: Directory where the dataset will be extracted + """ + # Create full paths + base_dir = os.path.dirname(os.path.abspath(__file__)) + output_path = os.path.join(base_dir, output_dir) + zip_path = os.path.join(output_path, "temp_dataset.zip") + + # Create directory if it doesn't exist + os.makedirs(output_path, exist_ok=True) + + # Console messages + print(f'Downloading dataset to {zip_path}...') + try: + response = requests.get(dataset_url, stream=True) + response.raise_for_status() # raise status to check for bad response + + dataset_size = int(response.headers.get('content-length', 0)) + + with open(zip_path, 'wb') as file: + for data in tqdm(response.iter_content(chunk_size=1024), total=dataset_size, unit='B', unit_scale=True): + file.write(data) + + print("\nDownload complete. Extracting files...") + + # extract zips + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(output_path) + + # delete downloaded zip file + os.remove(zip_path) + print(f'Extraction complete. Files extracted to {output_path}') + except requests.exceptions.RequestException as e: + print(f"Error downloading the dataset: {e}") def apply_cv2_transforms(img): """ diff --git a/main.py b/main.py index 77ddebb..c676d5b 100644 --- a/main.py +++ b/main.py @@ -4,8 +4,10 @@ from datasets.data_loader import create_train_val_test_loaders from torchvision import transforms from train import train_model +import datasets.data_loader as data_loader def main(): + ''' data_dir = "./coco" transform = transforms.Compose([ @@ -45,6 +47,10 @@ def main(): lr=1e-3, device=device ) + ''' + + # testing the dataloader + data_loader.download_roboflow_dataset('https://app.roboflow.com/ds/LXl5gthuky?key=9cEzxHzAiX') if __name__ == '__main__': main() From 339c4a198498c9467a1d40345f4cd8f76510de1a Mon Sep 17 00:00:00 2001 From: Zayan Date: Sat, 15 Mar 2025 04:47:45 -0400 Subject: [PATCH 16/17] Fixed dataset integration and transformation issues --- datasets/data_loader.py | 43 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/datasets/data_loader.py b/datasets/data_loader.py index a2fc708..0b941ab 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -54,6 +54,7 @@ def download_roboflow_dataset(dataset_url, output_dir='stop_sign_dataset'): except requests.exceptions.RequestException as e: print(f"Error downloading the dataset: {e}") + def apply_cv2_transforms(img): """ Applies OpenCV-based augmentations dynamically when images are loaded. @@ -168,4 +169,44 @@ def create_train_val_test_loaders(image_paths, labels, batch_size=8, train_ratio train_loader = create_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_loader = create_dataloader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) val_loader = create_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) - return train_loader, test_loader, val_loader \ No newline at end of file + return train_loader, test_loader, val_loader + + +class StopSignDataset(Dataset): + """ + Custom dataset class for Stop Sign detection that applies transformations during training. + """ + def __init__(self, img_dir, transform=None): + self.img_dir = img_dir + self.transform = transform + self.img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")] + + def __len__(self): + return len(self.img_paths) + + def __getitem__(self, idx): + img_path = self.img_paths[idx] + img = Image.open(img_path).convert("RGB") + if self.transform: + img = self.transform(img) + return img + +def connect_dataset_with_transforms(dataset_path): + """ + Connects the downloaded dataset with transformations. + Ensures dataset is properly loaded with augmentations before training. + """ + print("Applying transformations and preparing dataloaders...") + + transform_pipeline = get_stop_sign_transforms() + dataset = StopSignDataset(dataset_path, transform=transform_pipeline) + + train_size = int(0.8 * len(dataset)) + val_size = len(dataset) - train_size + train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) + + train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True, persistent_workers=True) + val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True) + + print("Transformations applied successfully! Training and validation dataloaders ready.") + return train_loader, val_loader From 2189e04bc63d4b8adb72b988931d12980a4470c8 Mon Sep 17 00:00:00 2001 From: Zain Syed <145395172+zsyed44@users.noreply.github.com> Date: Sat, 15 Mar 2025 05:11:39 -0400 Subject: [PATCH 17/17] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8114324..b56e306 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Ignore the virtual environment +.venv/ venv/ WEAP_CV/ # Ignore Python cache files