From 0d87c66211ab306f13c6e3a25e12cc89a7ec6ebc Mon Sep 17 00:00:00 2001
From: JairdanC <jairdanchopra@gmail.com>
Date: Sat, 8 Feb 2025 03:00:02 -0500
Subject: [PATCH 01/17] Changes to .gitignore:     Personal changes to make my
 dev env easier to branch with, not to be merged to prod

Creation of kitti_segmentation_visualizer:
    - Implements a rough draft for overlaying panoptic segmentation masks on KITTI images
    - Uses OpenCV to load images, generate masks, and create semi-transparent overlays
    - Provides a basic framework for verifying segmentation accuracy before augmentation
    - Optimization needed for real-time performance
---
 .gitignore                                    |  9 ++-
 .../kitti_segmentation_visualizer.py          | 70 +++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 datasets/kitti/kitti_step/kitti_segmentation_visualizer.py

diff --git a/.gitignore b/.gitignore
index 0598689..fab0c01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,7 +16,14 @@ results/
 
 # Ignore datasets (you may want to keep them separate)
 datasets/coco/
-datasets/kitti/
+#personal change not to be added to prod, but kept in my dev branch
+datasets/kitti/kitti_step
+!datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
+datasets/kitti/raw_data
+
+
+# Ignore zipped datasets when aquired and clean data is added to repo (prevents redownloads)
+*.zip
 
 # Ignore model checkpoints and trained models
 *.ckpt
diff --git a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
new file mode 100644
index 0000000..0400b34
--- /dev/null
+++ b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
@@ -0,0 +1,70 @@
+# Panoptic Map + Image -> Mask Visualizer
+# Rough draft for a segmentation visualizer that takes in an image and panoptic image, then overlays them
+# This will be needed and useful for verifying that image augmentation does not cause errors in the masks 
+
+import cv2 as cv
+import numpy as np
+import sys
+
+# Numpy array of colour constants used for mask matrix, uses BRG (bc of OpenCV), key values are set to the Semantic Labeling Convention Used in Kitti
+LABEL_BGR = {
+    0: np.array([128, 64, 128], dtype=np.uint8),   # road
+    1: np.array([244, 35, 232], dtype=np.uint8),   # sidewalk
+    2: np.array([70, 70, 70], dtype=np.uint8),     # building
+    3: np.array([102, 102, 156], dtype=np.uint8),  # wall
+    4: np.array([190, 153, 153], dtype=np.uint8),  # fence
+    5: np.array([153, 153, 153], dtype=np.uint8),  # pole
+    6: np.array([30, 170, 250], dtype=np.uint8),   # traffic light
+    7: np.array([0, 220, 220], dtype=np.uint8),    # traffic sign
+    8: np.array([35, 142, 107], dtype=np.uint8),   # vegetation
+    9: np.array([152, 251, 152], dtype=np.uint8),  # terrain
+    10: np.array([180, 130, 70], dtype=np.uint8),  # sky
+    11: np.array([60, 20, 220], dtype=np.uint8),   # person
+    12: np.array([0, 0, 255], dtype=np.uint8),     # rider
+    13: np.array([142, 0, 0], dtype=np.uint8),     # car
+    14: np.array([70, 0, 0], dtype=np.uint8),      # truck
+    15: np.array([100, 60, 0], dtype=np.uint8),    # bus
+    16: np.array([100, 80, 0], dtype=np.uint8),    # train
+    17: np.array([230, 0, 0], dtype=np.uint8),     # motorcycle
+    18: np.array([32, 11, 119], dtype=np.uint8),   # bicycle
+    255: np.array([0, 0, 0], dtype=np.uint8)       # void
+}
+
+# Takes in path of image file and returns an openCV matrix
+def file_loader(path):
+    imgMatrix = cv.imread(path)
+    if (np.count_nonzero(imgMatrix) == 0):
+        raise RuntimeError("Image was unable to be read")
+    return imgMatrix
+
+
+# Extremely slow, works for now, but if the mask is generated in real time this needs to be optimized
+# Takes in image matrix of panoptic map, returns mask
+def generate_mask(imgMatrix):
+    height, width, channels = np.shape(imgMatrix)
+    maskMatrix = np.zeros((height, width, channels), dtype=np.uint8)
+    for i in range(height):
+        for j in range(width):
+            pixel = imgMatrix[i,j] #OpenCV uses BGR values
+            maskMatrix[i,j] = LABEL_BGR.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8))
+    return maskMatrix
+
+def create_overlay(imgMatrix, maskMatrix):
+    alpha = 0.2
+    segmentationMatrix = cv.addWeighted(maskMatrix, alpha, imgMatrix, 1 - alpha, 0)
+    return segmentationMatrix
+
+image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png"
+panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png"
+
+
+imgMatrix = file_loader(image_path)
+maskMatrix = file_loader(panoptic_path)
+maskMatrix = generate_mask(maskMatrix)
+
+segmentationMask = create_overlay(imgMatrix, maskMatrix)
+
+cv.imshow("Display window", segmentationMask)
+k = cv.waitKey(0)
+
+

From 445e278bb43bf11393d61f3439fb75b6b4d163d6 Mon Sep 17 00:00:00 2001
From: Tygo <tcrawley@uwo.ca>
Date: Sun, 23 Feb 2025 17:51:32 -0500
Subject: [PATCH 02/17] created detection_model.py file

---
 main.py                       | 1 +
 src/models/detection_model.py | 0
 src/models/lane_detection.py  | 4 ++--
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 src/models/detection_model.py

diff --git a/main.py b/main.py
index 592ea2c..77ddebb 100644
--- a/main.py
+++ b/main.py
@@ -28,6 +28,7 @@ def main():
         num_workers=2
     )
 
+    
     model = nn.Sequential(
         nn.Flatten(),         
         nn.Linear(3*224*224, 100), 
diff --git a/src/models/detection_model.py b/src/models/detection_model.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/lane_detection.py b/src/models/lane_detection.py
index f7f070a..e328a9c 100644
--- a/src/models/lane_detection.py
+++ b/src/models/lane_detection.py
@@ -39,9 +39,9 @@
     # Canny edge detection, black and white representation of edges in a frame
     canny = cv2.Canny(grayFrame, 100, 200)
     cv2.imshow("Canny edge detection", canny)
-
+    
     # Display the resulting frame
-    #cv2.imshow('F1tenth Onboard Video', frame)
+    # cv2.imshow('F1tenth Onboard Video', frame)
 
      # define q as the exit button
     if cv2.waitKey(25) & 0xFF == ord('q'):

From 862c85b0849707ff6b2bc0bc9468e5a105a901fa Mon Sep 17 00:00:00 2001
From: JairdanC <jairdanchopra@gmail.com>
Date: Tue, 25 Feb 2025 19:55:17 -0500
Subject: [PATCH 03/17] Adding the segmentation visualizer for the kitti step
 date set to utils

---
 utils/kitti_seg_visualizer.py | 81 +++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 utils/kitti_seg_visualizer.py

diff --git a/utils/kitti_seg_visualizer.py b/utils/kitti_seg_visualizer.py
new file mode 100644
index 0000000..e485016
--- /dev/null
+++ b/utils/kitti_seg_visualizer.py
@@ -0,0 +1,81 @@
+import cv2 as cv
+import numpy as np
+
+# Dictionary mapping KITTI semantic labels to their corresponding BGR color values.
+# Note: OpenCV uses BGR format; the label index is assumed to be encoded in the red channel.
+KITTI_LABEL_COLORS = {
+    0: np.array([128, 64, 128], dtype=np.uint8),   # road
+    1: np.array([244, 35, 232], dtype=np.uint8),   # sidewalk
+    2: np.array([70, 70, 70], dtype=np.uint8),     # building
+    3: np.array([102, 102, 156], dtype=np.uint8),   # wall
+    4: np.array([190, 153, 153], dtype=np.uint8),   # fence
+    5: np.array([153, 153, 153], dtype=np.uint8),   # pole
+    6: np.array([30, 170, 250], dtype=np.uint8),    # traffic light
+    7: np.array([0, 220, 220], dtype=np.uint8),     # traffic sign
+    8: np.array([35, 142, 107], dtype=np.uint8),    # vegetation
+    9: np.array([152, 251, 152], dtype=np.uint8),   # terrain
+    10: np.array([180, 130, 70], dtype=np.uint8),   # sky
+    11: np.array([60, 20, 220], dtype=np.uint8),    # person
+    12: np.array([0, 0, 255], dtype=np.uint8),      # rider
+    13: np.array([142, 0, 0], dtype=np.uint8),      # car
+    14: np.array([70, 0, 0], dtype=np.uint8),       # truck
+    15: np.array([100, 60, 0], dtype=np.uint8),     # bus
+    16: np.array([100, 80, 0], dtype=np.uint8),     # train
+    17: np.array([230, 0, 0], dtype=np.uint8),      # motorcycle
+    18: np.array([32, 11, 119], dtype=np.uint8),    # bicycle
+    255: np.array([0, 0, 0], dtype=np.uint8)        # void
+}
+
+def load_image(path):
+    
+    #Loads an image from the given file path using OpenCV.
+    #Raises a RuntimeError if the image cannot be read.
+    
+    image = cv.imread(path)
+    if image is None or np.count_nonzero(image) == 0:
+        raise RuntimeError("Image at path '{}' was unable to be read".format(path))
+    return image
+
+def generate_mask(panoptic_image):
+    
+    #Generates a segmentation mask from the panoptic image.
+    #For each pixel in the panoptic image, the red channel value (index 2) is used as a key
+    #to retrieve the corresponding BGR color from KITTI_LABEL_COLORS.
+    #If the label is not found, it defaults to black.
+    #Note: This implementation is slow and may need optimization for real-time applications.
+
+    height, width, channels = panoptic_image.shape
+    mask = np.zeros((height, width, channels), dtype=np.uint8)
+    for row in range(height):
+        for col in range(width):
+            pixel = panoptic_image[row, col]  # OpenCV uses BGR; label is in red channel
+            mask[row, col] = KITTI_LABEL_COLORS.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8))
+    return mask
+
+def create_overlay(image, mask):
+    
+    #Creates an overlay by blending the original image with the segmentation mask.
+    #The blending factor (alpha) determines the transparency of the mask.
+
+    alpha = 0.5
+    overlay = cv.addWeighted(mask, alpha, image, 1 - alpha, 0)
+    return overlay
+
+# Absolute path to the input image and its corresponding panoptic map.
+image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png"
+panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png"
+
+# Load the original image and panoptic map.
+image = load_image(image_path)
+panoptic_image = load_image(panoptic_path)
+
+# Generate the segmentation mask and create an overlay.
+mask = generate_mask(panoptic_image)
+segmentation_overlay = create_overlay(image, mask)
+
+# Display the resulting overlay.
+cv.imshow("Segmentation Overlay", segmentation_overlay)
+cv.waitKey(0)
+cv.destroyAllWindows()
+
+

From 1a7ed7d52005967a37f5e2516a53c239249a3071 Mon Sep 17 00:00:00 2001
From: JairdanC <jairdanchopra@gmail.com>
Date: Tue, 25 Feb 2025 20:15:08 -0500
Subject: [PATCH 04/17] removed duplicate file

---
 .../kitti_segmentation_visualizer.py          | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 datasets/kitti/kitti_step/kitti_segmentation_visualizer.py

diff --git a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py b/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
deleted file mode 100644
index 0400b34..0000000
--- a/datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Panoptic Map + Image -> Mask Visualizer
-# Rough draft for a segmentation visualizer that takes in an image and panoptic image, then overlays them
-# This will be needed and useful for verifying that image augmentation does not cause errors in the masks 
-
-import cv2 as cv
-import numpy as np
-import sys
-
-# Numpy array of colour constants used for mask matrix, uses BRG (bc of OpenCV), key values are set to the Semantic Labeling Convention Used in Kitti
-LABEL_BGR = {
-    0: np.array([128, 64, 128], dtype=np.uint8),   # road
-    1: np.array([244, 35, 232], dtype=np.uint8),   # sidewalk
-    2: np.array([70, 70, 70], dtype=np.uint8),     # building
-    3: np.array([102, 102, 156], dtype=np.uint8),  # wall
-    4: np.array([190, 153, 153], dtype=np.uint8),  # fence
-    5: np.array([153, 153, 153], dtype=np.uint8),  # pole
-    6: np.array([30, 170, 250], dtype=np.uint8),   # traffic light
-    7: np.array([0, 220, 220], dtype=np.uint8),    # traffic sign
-    8: np.array([35, 142, 107], dtype=np.uint8),   # vegetation
-    9: np.array([152, 251, 152], dtype=np.uint8),  # terrain
-    10: np.array([180, 130, 70], dtype=np.uint8),  # sky
-    11: np.array([60, 20, 220], dtype=np.uint8),   # person
-    12: np.array([0, 0, 255], dtype=np.uint8),     # rider
-    13: np.array([142, 0, 0], dtype=np.uint8),     # car
-    14: np.array([70, 0, 0], dtype=np.uint8),      # truck
-    15: np.array([100, 60, 0], dtype=np.uint8),    # bus
-    16: np.array([100, 80, 0], dtype=np.uint8),    # train
-    17: np.array([230, 0, 0], dtype=np.uint8),     # motorcycle
-    18: np.array([32, 11, 119], dtype=np.uint8),   # bicycle
-    255: np.array([0, 0, 0], dtype=np.uint8)       # void
-}
-
-# Takes in path of image file and returns an openCV matrix
-def file_loader(path):
-    imgMatrix = cv.imread(path)
-    if (np.count_nonzero(imgMatrix) == 0):
-        raise RuntimeError("Image was unable to be read")
-    return imgMatrix
-
-
-# Extremely slow, works for now, but if the mask is generated in real time this needs to be optimized
-# Takes in image matrix of panoptic map, returns mask
-def generate_mask(imgMatrix):
-    height, width, channels = np.shape(imgMatrix)
-    maskMatrix = np.zeros((height, width, channels), dtype=np.uint8)
-    for i in range(height):
-        for j in range(width):
-            pixel = imgMatrix[i,j] #OpenCV uses BGR values
-            maskMatrix[i,j] = LABEL_BGR.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8))
-    return maskMatrix
-
-def create_overlay(imgMatrix, maskMatrix):
-    alpha = 0.2
-    segmentationMatrix = cv.addWeighted(maskMatrix, alpha, imgMatrix, 1 - alpha, 0)
-    return segmentationMatrix
-
-image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png"
-panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png"
-
-
-imgMatrix = file_loader(image_path)
-maskMatrix = file_loader(panoptic_path)
-maskMatrix = generate_mask(maskMatrix)
-
-segmentationMask = create_overlay(imgMatrix, maskMatrix)
-
-cv.imshow("Display window", segmentationMask)
-k = cv.waitKey(0)
-
-

From cf12ad82b6516be76a8498e937017493a9482d03 Mon Sep 17 00:00:00 2001
From: Jairdan Chopra <jairdanchopra@gmail.com>
Date: Tue, 25 Feb 2025 20:18:19 -0500
Subject: [PATCH 05/17] Update .gitignore

---
 .gitignore | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6c6ce00..b6b9905 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,21 +14,8 @@ __pycache__/
 logs/
 results/
 # Ignore datasets (you may want to keep them separate)
-<<<<<<< HEAD
-datasets/coco/
-#personal change not to be added to prod, but kept in my dev branch
-datasets/kitti/kitti_step
-!datasets/kitti/kitti_step/kitti_segmentation_visualizer.py
-datasets/kitti/raw_data
-
-
-# Ignore zipped datasets when aquired and clean data is added to repo (prevents redownloads)
-*.zip
-
-=======
 coco/
 google-cloud-sdk/
->>>>>>> dev
 # Ignore model checkpoints and trained models
 *.ckpt
 *.pth

From 86088ab80822717f1b4ad595415350986041620f Mon Sep 17 00:00:00 2001
From: Guojia La <guoji@MacBook-Pro.local>
Date: Tue, 25 Feb 2025 20:51:42 -0500
Subject: [PATCH 06/17] Imported dataloader to detection_model.py and created
 loader

---
 src/models/detection_model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/models/detection_model.py b/src/models/detection_model.py
index e69de29..749b6a1 100644
--- a/src/models/detection_model.py
+++ b/src/models/detection_model.py
@@ -0,0 +1,6 @@
+import datasets.data_loader as dataloader
+
+loader = dataloader.create_loader("datasets/stop_signs_dataset")
+
+
+

From 3e9b9f40fc9408b1c2806ca9182a52561106056e Mon Sep 17 00:00:00 2001
From: Tygo <tcrawley@uwo.ca>
Date: Wed, 26 Feb 2025 12:41:02 -0500
Subject: [PATCH 07/17] detection model iteration 1

---
 src/models/detection_model.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/models/detection_model.py b/src/models/detection_model.py
index e69de29..217f48a 100644
--- a/src/models/detection_model.py
+++ b/src/models/detection_model.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+
+# we want 7 layers - conv1, pool1, relu1, conv2, pool2, relu2, fc1 (fully connected)
+class SmallCNN(nn.Module):  
+    def __init__(self) -> None:
+        super().__init__(self)
+        self.flatten = nn.Flatten()
+        self.conv1 = nn.Conv2d()
+        self.stop_sign_detection = nn.Sequential(
+            # we want 3 channels because our input size is 128x128x3
+            # 64 features, square kernel of size 3
+            nn.functional.conv2d(3, 64, 3, 3),
+            nn.pool2d(kernel_size=2,stride=2),
+            nn.ReLU(),
+            nn.functional.Conv2d(3, 32, 3, 3),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.ReLU(),
+            nn.Linear(9216, 32)
+            nn.Linear(32,2)
+        )
+    
+    def forward():
+
+# detection_model = SmallCNN()

From acd47c1aa9ef54fb32e3f5e48f4b0e6aed25f024 Mon Sep 17 00:00:00 2001
From: Zayan <khanzayan_123@hotmail.com>
Date: Thu, 27 Feb 2025 13:00:40 -0500
Subject: [PATCH 08/17] Updated data_loader.py with dataset preprocessing

---
 datasets/data_loader.py | 107 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/datasets/data_loader.py b/datasets/data_loader.py
index 1720a45..fc59b86 100644
--- a/datasets/data_loader.py
+++ b/datasets/data_loader.py
@@ -1,8 +1,110 @@
 """
-Encapsulates DataLoader-related logic, including splitting and parallel loading.
+Encapsulates DataLoader-related logic, including splitting and parallel loading and transformations.
 """
 import torch
-from torch.utils.data import DataLoader, random_split
+import cv2
+import numpy as np
+import torchvision.transforms as transforms
+from PIL import Image
+from torch.utils.data import DataLoader, random_split, Dataset
+
+def apply_cv2_transforms(img):
+    """
+    Applies OpenCV-based augmentations dynamically when images are loaded.
+    :param img: PIL Image (converted to OpenCV format)
+    :return: Transformed PIL Image
+    """
+    img = np.array(img)  # Convert PIL image to OpenCV format (NumPy array)
+
+    # **Brightness & Contrast Adjustments**
+    alpha = np.random.uniform(0.8, 1.5)  # Contrast factor
+    beta = np.random.randint(-30, 30)    # Brightness offset
+    img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
+
+    # **Saturation & Hue Adjustments**
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)  # Convert to HSV
+    img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.7, 1.3)  # Modify saturation
+    img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-10, 10)  # Modify hue
+    img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)  # Convert back to RGB
+
+    # **Gaussian Blur & Motion Blur**
+    if np.random.rand() < 0.3:
+        img = cv2.GaussianBlur(img, (5, 5), 0)  # Simulate blur
+
+    if np.random.rand() < 0.2:
+        kernel_size = 3
+        kernel_motion_blur = np.zeros((kernel_size, kernel_size))
+        kernel_motion_blur[int((kernel_size - 1) / 2), :] = np.ones(kernel_size)
+        kernel_motion_blur = kernel_motion_blur / kernel_size
+        img = cv2.filter2D(img, -1, kernel_motion_blur)
+
+    # **JPEG Compression & Gaussian Noise**
+    if np.random.rand() < 0.3:
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(30, 90)]
+        _, enc_img = cv2.imencode('.jpg', img, encode_param)
+        img = cv2.imdecode(enc_img, cv2.IMREAD_UNCHANGED)
+
+    if np.random.rand() < 0.3:
+        noise = np.random.normal(0, 10, img.shape).astype(np.uint8)
+        img = cv2.add(img, noise)
+
+    # **Selective Red Boosting**
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    img_hsv[..., 1] = img_hsv[..., 1] * 1.1  # Slightly boost red saturation
+    img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
+
+    # **Weather Simulation (Fog & Shadows)**
+    if np.random.rand() < 0.2:
+        fog_intensity = np.random.uniform(0.3, 0.7)
+        fog = np.full_like(img, 255, dtype=np.uint8)
+        img = cv2.addWeighted(img, 1 - fog_intensity, fog, fog_intensity, 0)
+
+    if np.random.rand() < 0.2:
+        h, w, _ = img.shape
+        shadow = np.random.uniform(0.3, 0.7, (h, w, 3)) * 255
+        img = cv2.addWeighted(img, 1, shadow.astype(np.uint8), -0.5, 0)
+
+    return Image.fromarray(img)  # Convert back to PIL format
+
+# ========== APPLY TRANSFORMATION PIPELINE ==========
+def get_stop_sign_transforms():
+    """
+    Returns the best transformation pipeline for dynamic augmentation during training.
+    """
+    return transforms.Compose([
+        transforms.Lambda(lambda img: apply_cv2_transforms(img)),  # Apply OpenCV-based augmentations dynamically
+
+        # Torchvision Transformations
+        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),
+        transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
+        transforms.RandomHorizontalFlip(p=0.5),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+    ])
+
+# ========== MODIFIED DATASET CLASS ==========
+class StopSignDataset(Dataset):
+    """
+    Custom dataset class for Stop Sign detection that returns both the original and augmented images.
+    """
+    def __init__(self, image_paths, labels, transform=None):
+        self.image_paths = image_paths  # List of image file paths
+        self.labels = labels  # Corresponding labels
+        self.transform = transform  # Transformation pipeline
+
+    def __len__(self):
+        return len(self.image_paths)
+
+    def __getitem__(self, idx):
+        image = Image.open(self.image_paths[idx]).convert("RGB")  # Load image
+        label = self.labels[idx]  # Get label
+
+        original_image = transforms.ToTensor()(image)  # Convert original image to tensor
+
+        augmented_image = self.transform(image) if self.transform else original_image  # Apply augmentation
+
+        return original_image, augmented_image, label  # Return both images and label
+    
 
 def create_dataloader(dataset, batch_size=8, shuffle=True, num_workers=4):
     """
@@ -44,6 +146,7 @@ def create_train_val_test_loaders(dataset, batch_size=8, train_ratio=0.7, test_r
     Combines dataset splitting and DataLoader creation.
     :return: (train_loader, test_loader, val_loader)
     """
+    dataset = StopSignDataset(image_paths=image_paths, labels=labels, transform=get_stop_sign_transforms())
     train_dataset, test_dataset, val_dataset = split_dataset(dataset, train_ratio, test_ratio)
     train_loader = create_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
     test_loader  = create_dataloader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=num_workers)

From 991a650286d7dc0a0bcb4c599245b5e460b183d4 Mon Sep 17 00:00:00 2001
From: Zayan <khanzayan_123@hotmail.com>
Date: Thu, 27 Feb 2025 13:23:30 -0500
Subject: [PATCH 09/17] Updated .gitignore to ignore WEAP_CV/

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index b6b9905..8114324 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
+
 # Ignore the virtual environment
 venv/
-
+WEAP_CV/
 # Ignore Python cache files
 __pycache__/
 *.pyc

From 0c237dc84f06664d7cf0b7a9db75cbce8704602a Mon Sep 17 00:00:00 2001
From: Zayan <khanzayan_123@hotmail.com>
Date: Thu, 27 Feb 2025 16:18:29 -0500
Subject: [PATCH 10/17] Updated dataset pipeline with augmentations

---
 datasets/data_loader.py | 84 ++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 55 deletions(-)

diff --git a/datasets/data_loader.py b/datasets/data_loader.py
index fc59b86..536e747 100644
--- a/datasets/data_loader.py
+++ b/datasets/data_loader.py
@@ -8,6 +8,7 @@
 from PIL import Image
 from torch.utils.data import DataLoader, random_split, Dataset
 
+
 def apply_cv2_transforms(img):
     """
     Applies OpenCV-based augmentations dynamically when images are loaded.
@@ -16,96 +17,69 @@ def apply_cv2_transforms(img):
     """
     img = np.array(img)  # Convert PIL image to OpenCV format (NumPy array)
 
-    # **Brightness & Contrast Adjustments**
-    alpha = np.random.uniform(0.8, 1.5)  # Contrast factor
-    beta = np.random.randint(-30, 30)    # Brightness offset
+    alpha = np.random.uniform(0.9, 1.2)  # Reduced contrast variation
+    beta = np.random.randint(-15, 15)    # Reduced brightness shift
     img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
 
-    # **Saturation & Hue Adjustments**
-    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)  # Convert to HSV
-    img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.7, 1.3)  # Modify saturation
-    img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-10, 10)  # Modify hue
-    img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)  # Convert back to RGB
-
-    # **Gaussian Blur & Motion Blur**
-    if np.random.rand() < 0.3:
-        img = cv2.GaussianBlur(img, (5, 5), 0)  # Simulate blur
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.85, 1.15)  
+    img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-5, 5)
+    img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
 
     if np.random.rand() < 0.2:
+        img = cv2.GaussianBlur(img, (3, 3), 0)  
+
+    if np.random.rand() < 0.15:
         kernel_size = 3
         kernel_motion_blur = np.zeros((kernel_size, kernel_size))
         kernel_motion_blur[int((kernel_size - 1) / 2), :] = np.ones(kernel_size)
         kernel_motion_blur = kernel_motion_blur / kernel_size
         img = cv2.filter2D(img, -1, kernel_motion_blur)
 
-    # **JPEG Compression & Gaussian Noise**
-    if np.random.rand() < 0.3:
-        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(30, 90)]
+    if np.random.rand() < 0.2:
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(50, 90)]
         _, enc_img = cv2.imencode('.jpg', img, encode_param)
         img = cv2.imdecode(enc_img, cv2.IMREAD_UNCHANGED)
 
-    if np.random.rand() < 0.3:
-        noise = np.random.normal(0, 10, img.shape).astype(np.uint8)
-        img = cv2.add(img, noise)
-
-    # **Selective Red Boosting**
-    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
-    img_hsv[..., 1] = img_hsv[..., 1] * 1.1  # Slightly boost red saturation
-    img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
-
-    # **Weather Simulation (Fog & Shadows)**
-    if np.random.rand() < 0.2:
-        fog_intensity = np.random.uniform(0.3, 0.7)
-        fog = np.full_like(img, 255, dtype=np.uint8)
-        img = cv2.addWeighted(img, 1 - fog_intensity, fog, fog_intensity, 0)
-
     if np.random.rand() < 0.2:
-        h, w, _ = img.shape
-        shadow = np.random.uniform(0.3, 0.7, (h, w, 3)) * 255
-        img = cv2.addWeighted(img, 1, shadow.astype(np.uint8), -0.5, 0)
+        noise = np.random.normal(0, 5, img.shape).astype(np.uint8)
+        img = cv2.add(img, noise)
 
-    return Image.fromarray(img)  # Convert back to PIL format
+    return Image.fromarray(img)
 
-# ========== APPLY TRANSFORMATION PIPELINE ==========
 def get_stop_sign_transforms():
     """
-    Returns the best transformation pipeline for dynamic augmentation during training.
+    Returns the transformation pipeline for dynamic augmentation during training.
     """
     return transforms.Compose([
-        transforms.Lambda(lambda img: apply_cv2_transforms(img)),  # Apply OpenCV-based augmentations dynamically
-
-        # Torchvision Transformations
-        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),
-        transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
-        transforms.RandomHorizontalFlip(p=0.5),
+        transforms.Lambda(lambda img: apply_cv2_transforms(img)),
+        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.05),
+        transforms.RandomAffine(degrees=7, translate=(0.05, 0.05), scale=(0.95, 1.05)),
+        transforms.RandomHorizontalFlip(p=0.3),
         transforms.ToTensor(),
         transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
     ])
 
-# ========== MODIFIED DATASET CLASS ==========
 class StopSignDataset(Dataset):
     """
-    Custom dataset class for Stop Sign detection that returns both the original and augmented images.
+    Custom dataset class for Stop Sign detection that applies transformations during training.
     """
     def __init__(self, image_paths, labels, transform=None):
-        self.image_paths = image_paths  # List of image file paths
-        self.labels = labels  # Corresponding labels
-        self.transform = transform  # Transformation pipeline
+        self.image_paths = image_paths
+        self.labels = labels
+        self.transform = transform
 
     def __len__(self):
         return len(self.image_paths)
 
     def __getitem__(self, idx):
-        image = Image.open(self.image_paths[idx]).convert("RGB")  # Load image
-        label = self.labels[idx]  # Get label
+        image = Image.open(self.image_paths[idx]).convert("RGB")
+        label = self.labels[idx]
 
-        original_image = transforms.ToTensor()(image)  # Convert original image to tensor
+        augmented_image = self.transform(image) if self.transform else transforms.ToTensor()(image)
 
-        augmented_image = self.transform(image) if self.transform else original_image  # Apply augmentation
-
-        return original_image, augmented_image, label  # Return both images and label
+        return augmented_image, label  # Only return augmented image and label
     
-
 def create_dataloader(dataset, batch_size=8, shuffle=True, num_workers=4):
     """
     Creates and returns a DataLoader
@@ -141,7 +115,7 @@ def split_dataset(dataset, train_ratio=0.7, test_ratio=0.15, seed=42):
     torch.manual_seed(seed)
     return random_split(dataset, [train_size, test_size, val_size])
 
-def create_train_val_test_loaders(dataset, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4):
+def create_train_val_test_loaders(image_paths, labels, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4):
     """
     Combines dataset splitting and DataLoader creation.
     :return: (train_loader, test_loader, val_loader)

From 7be75fc16328d6bcbc9ab70fdf2d9b0eb67d8e9c Mon Sep 17 00:00:00 2001
From: Tygo <tcrawley@uwo.ca>
Date: Thu, 27 Feb 2025 20:30:38 -0500
Subject: [PATCH 11/17] fix indent

---
 src/models/detection_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/models/detection_model.py b/src/models/detection_model.py
index 217f48a..b2f9f3c 100644
--- a/src/models/detection_model.py
+++ b/src/models/detection_model.py
@@ -16,10 +16,8 @@ def __init__(self) -> None:
             nn.functional.Conv2d(3, 32, 3, 3),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.ReLU(),
-            nn.Linear(9216, 32)
+            nn.Linear(9216, 32),
             nn.Linear(32,2)
         )
-    
-    def forward():
 
 # detection_model = SmallCNN()

From 6550f12c3a14f8dc00ce9a63004ee2aab08c0276 Mon Sep 17 00:00:00 2001
From: Tygo <tcrawley@uwo.ca>
Date: Fri, 7 Mar 2025 14:21:19 -0500
Subject: [PATCH 12/17] cleanup + review

---
 datasets/data_loader.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/datasets/data_loader.py b/datasets/data_loader.py
index 536e747..d057223 100644
--- a/datasets/data_loader.py
+++ b/datasets/data_loader.py
@@ -15,11 +15,9 @@ def apply_cv2_transforms(img):
     :param img: PIL Image (converted to OpenCV format)
     :return: Transformed PIL Image
     """
+    
     img = np.array(img)  # Convert PIL image to OpenCV format (NumPy array)
-
-    alpha = np.random.uniform(0.9, 1.2)  # Reduced contrast variation
-    beta = np.random.randint(-15, 15)    # Reduced brightness shift
-    img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
+    img = cv2.convertScaleAbs(img, alpha=np.random.uniform(0.9, 1.2), beta=np.random.randint(-15, 15))  # Reduced contrast variation + Reduced brightness shift
 
     img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
     img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.85, 1.15)  

From a72603a84a6e4f007c8842b7c7f62926a1ba066f Mon Sep 17 00:00:00 2001
From: Tygo <tcrawley@uwo.ca>
Date: Fri, 7 Mar 2025 18:37:44 -0500
Subject: [PATCH 13/17] finish first iteration of model

---
 src/models/detection_model.py | 151 ++++++++++++++++++++++++++++++----
 1 file changed, 137 insertions(+), 14 deletions(-)

diff --git a/src/models/detection_model.py b/src/models/detection_model.py
index b2f9f3c..e2067fe 100644
--- a/src/models/detection_model.py
+++ b/src/models/detection_model.py
@@ -1,23 +1,146 @@
 import torch
 import torch.nn as nn
+import torch.optim as optim
+from torch.optim import lr_scheduler
+import torch.backends.cudnn as cudnn
+import numpy as np
 
-# we want 7 layers - conv1, pool1, relu1, conv2, pool2, relu2, fc1 (fully connected)
+'''
+Define a small CNN.
+We want 8 layers - conv1, batch + poo1, relu1, conv2, batch + pool2, relu2, fc1, fc2  
+Split up the two parts 1.Model 2.Classifier
+'''
 class SmallCNN(nn.Module):  
-    def __init__(self) -> None:
-        super().__init__(self)
-        self.flatten = nn.Flatten()
-        self.conv1 = nn.Conv2d()
-        self.stop_sign_detection = nn.Sequential(
-            # we want 3 channels because our input size is 128x128x3
-            # 64 features, square kernel of size 3
-            nn.functional.conv2d(3, 64, 3, 3),
-            nn.pool2d(kernel_size=2,stride=2),
+    def __init__(self, num_classes=2, input_size=128) -> None:
+        super().__init__()
+        
+        # calculate size after convolution and pooling
+        feature_size = ((input_size - 2) // 2 - 2) // 2
+        # Calculate the flattened feature size
+        self.flat_features = 64 * feature_size * feature_size
+
+        # Small CNN part
+        self.stop_sign_cnn = nn.Sequential(
+            # First conv block
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=0), # 64 features/outputs, input channels = 3 (RGB), square kernel of size 3, 
+            nn.BatchNorm2d(32),
+            nn.MaxPool2d(kernel_size=2,stride=2),
             nn.ReLU(),
-            nn.functional.Conv2d(3, 32, 3, 3),
+
+            # Second conv block
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0),
+            nn.BatchNorm2d(64),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.ReLU(),
-            nn.Linear(9216, 32),
-            nn.Linear(32,2)
+
+            # Dropout for regularization to avoid overfitting
+            nn.Dropout2d(0.20)
+        )
+
+        # Fully connected part
+        self.stop_sign_classifier = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(self.flat_features, 128),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(128, num_classes)
         )
 
-# detection_model = SmallCNN()
+        def forward(self, x):
+            x = self.stop_sign_cnn(x)
+            x = self.stop_sign_classifier(x)
+            return x
+
+# Create the model
+detection_model = SmallCNN()
+
+# Check if GPU is available
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+detection_model.to(device)
+
+# define a loss function and optimizer
+loss_fcn = nn.CrossEntropyLoss()
+optimizer = optim.Adam(detection_model.parameters(), lr=0.001, betas=(0.5, 0.999), weight_decay=1e-4) # weigh decay for regularization
+
+# learning rate scheduler
+scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1)
+
+def train_model(model, train_loader, val_loader, num_epochs = 25, ):
+    best = 0.0 # holds best accuracy
+
+    for epoch in range(num_epochs):
+        print(f'Epoch {epoch + 1}/{num_epochs}')
+        print('-' * 10)
+        running_loss = 0.0
+
+        model.train()
+        running_loss = 0.0
+        running_corrects = 0
+
+        for inputs, labels in train_loader:
+            print('.', end='', flush=True)
+            inputs = inputs.to(device)
+            labels = labels.to(device)
+
+            # zero the parameter gradients
+            optimizer.zero_grad()
+
+            # forward pass
+            outputs = model(inputs)
+            _, preds = torch.max(outputs, 1) # outputs is a tensor of shape [batch_size,2] and torch.max igores the first value
+            loss = loss_fcn(outputs, labels)
+
+            # backward pass and optimize
+            loss.backward()
+            optimizer.step()
+
+            # loss and corrects
+            running_loss += loss.item() * inputs.size(0)
+            running_corrects += torch.sum(preds == labels.data)
+        
+    epoch_loss = running_loss / len(train_loader.dataset)
+    epoch_acc = running_corrects.double() / len(train_loader.dataset)
+
+    print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
+
+    # Validation phase
+    model.eval()  # Set model to evaluate mode
+    val_loss = 0.0
+    val_corrects = 0
+        
+    # No gradient calculation needed for validation
+    with torch.no_grad():
+        for inputs, labels in val_loader:
+            inputs = inputs.to(device)
+            labels = labels.to(device)
+                
+            # Forward pass
+            outputs = model(inputs)
+            _, preds = torch.max(outputs, 1)
+            loss = loss_fcn(outputs, labels)
+                
+            # Statistics
+            val_loss += loss.item() * inputs.size(0)
+            val_corrects += torch.sum(preds == labels.data)
+        
+        val_loss = val_loss / len(val_loader.dataset)
+        val_acc = val_corrects.double() / len(val_loader.dataset)
+        
+        print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
+        
+        # Update learning rate based on validation loss
+        scheduler.step(val_loss)
+        
+        # Save the best model
+        if val_acc > best_acc:
+            best_acc = val_acc
+            torch.save(model.state_dict(), 'best_model.pth')
+            print(f'New best model saved with accuracy: {val_acc:.4f}')
+    
+    print(f'Best validation accuracy: {best_acc:.4f}')
+    return model
+
+
+
+
+        

From e892902319a80c48fd7399e56ccb72045fdf8327 Mon Sep 17 00:00:00 2001
From: TCrawley11 <tcrawley@uwo.ca>
Date: Wed, 12 Mar 2025 14:50:13 -0400
Subject: [PATCH 14/17] Added Ubuntu Environment, start lidar and cam fusion

---
 src/3D/cam_to_lidar.py        |   6 ++
 src/models/detection_model.py |   2 +-
 ubuntuEnv.yml                 | 139 ++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 src/3D/cam_to_lidar.py
 create mode 100644 ubuntuEnv.yml

diff --git a/src/3D/cam_to_lidar.py b/src/3D/cam_to_lidar.py
new file mode 100644
index 0000000..3a4ede2
--- /dev/null
+++ b/src/3D/cam_to_lidar.py
@@ -0,0 +1,6 @@
+import numpy as np
+
+# draw center line, 
+center_coord = [0,0]
+
+# get some test pics from KITTI
\ No newline at end of file
diff --git a/src/models/detection_model.py b/src/models/detection_model.py
index e2067fe..a1bdbe2 100644
--- a/src/models/detection_model.py
+++ b/src/models/detection_model.py
@@ -101,7 +101,7 @@ def train_model(model, train_loader, val_loader, num_epochs = 25, ):
     epoch_loss = running_loss / len(train_loader.dataset)
     epoch_acc = running_corrects.double() / len(train_loader.dataset)
 
-    print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
+    print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') 
 
     # Validation phase
     model.eval()  # Set model to evaluate mode
diff --git a/ubuntuEnv.yml b/ubuntuEnv.yml
new file mode 100644
index 0000000..be5dcd2
--- /dev/null
+++ b/ubuntuEnv.yml
@@ -0,0 +1,139 @@
+name: ubuntuEnv
+channels:
+  - defaults
+  - conda-forge
+  - https://repo.anaconda.com/pkgs/main
+  - https://repo.anaconda.com/pkgs/r
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - aom=3.6.0=h6a678d5_0
+  - blas=1.1=openblas
+  - brotli-python=1.0.9=py310hd8f1fbe_7
+  - bzip2=1.0.8=h5eee18b_6
+  - c-ares=1.19.1=h5eee18b_0
+  - ca-certificates=2025.2.25=h06a4308_0
+  - cairo=1.16.0=hb05425b_5
+  - certifi=2025.1.31=pyhd8ed1ab_0
+  - cffi=1.15.0=py310h0fdd8cc_0
+  - charset-normalizer=3.4.1=pyhd8ed1ab_0
+  - colorama=0.4.6=pyhd8ed1ab_1
+  - cpython=3.10.16=py310hd8ed1ab_1
+  - cyrus-sasl=2.1.28=h52b45da_1
+  - dav1d=1.2.1=h5eee18b_0
+  - dbus=1.13.18=hb2f20db_0
+  - eigen=3.4.0=h4bd325d_0
+  - expat=2.6.4=h6a678d5_0
+  - ffmpeg=6.1.1=h2a67f75_2
+  - filelock=3.17.0=pyhd8ed1ab_0
+  - fontconfig=2.14.1=h55d465d_3
+  - freetype=2.10.4=h0708190_1
+  - fsspec=2025.3.0=pyhd8ed1ab_0
+  - giflib=5.2.1=h36c2ea0_2
+  - glib=2.78.4=h6a678d5_0
+  - glib-tools=2.78.4=h6a678d5_0
+  - gmp=6.2.1=h58526e2_0
+  - gmpy2=2.1.2=py310h92f7908_0
+  - graphite2=1.3.14=h295c915_1
+  - gst-plugins-base=1.14.1=h6a678d5_1
+  - gstreamer=1.14.1=h5eee18b_1
+  - h2=4.2.0=pyhd8ed1ab_0
+  - harfbuzz=10.2.0=hf296adc_0
+  - hdf5=1.14.5=h2b7332f_2
+  - hpack=4.1.0=pyhd8ed1ab_0
+  - hyperframe=6.1.0=pyhd8ed1ab_0
+  - icu=73.1=h6a678d5_0
+  - idna=3.10=pyhd8ed1ab_1
+  - jinja2=3.1.6=pyhd8ed1ab_0
+  - jpeg=9e=h166bdaf_1
+  - kagglehub=0.3.8=pyhd8ed1ab_0
+  - krb5=1.20.1=h143b758_1
+  - lame=3.100=h7f98852_1001
+  - lcms2=2.16=hb9589c4_0
+  - ld_impl_linux-64=2.40=h12ee557_0
+  - leptonica=1.82.0=h42c8aad_2
+  - lerc=4.0.0=h6a678d5_0
+  - libabseil=20240116.2=cxx17_h6a678d5_0
+  - libarchive=3.7.7=hfab0078_0
+  - libblas=3.9.0=16_linux64_openblas
+  - libcblas=3.9.0=16_linux64_openblas
+  - libclang=14.0.6=default_hc6dbbc7_2
+  - libclang13=14.0.6=default_he11475f_2
+  - libcups=2.4.2=h2d74bed_1
+  - libcurl=8.12.1=hc9e6f67_0
+  - libdeflate=1.22=h5eee18b_0
+  - libedit=3.1.20230828=h5eee18b_0
+  - libev=4.33=h516909a_1
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgfortran=3.0.0=1
+  - libgfortran-ng=13.2.0=h69a702a_0
+  - libgfortran5=13.2.0=ha4646dd_0
+  - libglib=2.78.4=hdc74915_0
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.17=h166bdaf_0
+  - liblapack=3.9.0=16_linux64_openblas
+  - libllvm14=14.0.6=hecde1de_4
+  - libnghttp2=1.57.0=h2d74bed_0
+  - libogg=1.3.4=h7f98852_1
+  - libopenblas=0.3.21=h043d6bf_0
+  - libopus=1.3.1=h7f98852_1
+  - libpng=1.6.39=h5eee18b_0
+  - libpq=17.4=hdbd6064_0
+  - libprotobuf=4.25.3=he621ea3_0
+  - libssh2=1.11.1=h251f7ec_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtheora=1.1.1=h7f98852_1005
+  - libtiff=4.5.1=hffd6297_1
+  - libuuid=1.41.5=h5eee18b_0
+  - libvorbis=1.3.7=h9c3ff4c_0
+  - libvpx=1.13.1=h6a678d5_0
+  - libwebp=1.3.2=h11a3e52_0
+  - libwebp-base=1.3.2=h5eee18b_1
+  - libxcb=1.15=h7f8727e_0
+  - libxkbcommon=1.0.3=he3ba5ed_0
+  - libxml2=2.13.5=hfdd30dd_0
+  - lz4-c=1.9.4=h6a678d5_1
+  - markupsafe=2.1.1=py310h5764c6d_1
+  - mpc=1.2.1=h9f54685_0
+  - mpfr=4.1.0=h9202a9a_1
+  - mpmath=1.3.0=pyhd8ed1ab_1
+  - mysql=8.4.0=h29a9f33_1
+  - ncurses=6.4=h6a678d5_0
+  - networkx=3.4=pyhd8ed1ab_0
+  - numpy=1.22.3=py310h4ef5377_2
+  - openblas=0.3.4=ha44fe06_0
+  - opencv=4.10.0=py310h2484693_2
+  - openh264=2.1.1=h780b84a_0
+  - openjpeg=2.5.2=he7f1fd0_0
+  - openldap=2.6.4=h42fbc30_0
+  - openssl=3.0.16=h5eee18b_0
+  - packaging=24.2=pyhd8ed1ab_2
+  - pcre2=10.42=hebb0a14_1
+  - pillow=11.1.0=py310hcea889d_0
+  - pip=25.0=py310h06a4308_0
+  - pixman=0.40.0=h36c2ea0_0
+  - pycparser=2.22=pyh29332c3_1
+  - pysocks=1.7.1=pyha55dd90_7
+  - python=3.10.16=he870216_1
+  - python_abi=3.10=2_cp310
+  - pytorch=2.3.0=cpu_py310h1ce4368_1
+  - qt-main=5.15.2=hb6262e9_12
+  - readline=8.2=h5eee18b_0
+  - requests=2.32.3=pyhd8ed1ab_1
+  - setuptools=75.8.0=py310h06a4308_0
+  - sqlite=3.45.3=h5eee18b_0
+  - sympy=1.13.3=pyh2585a3b_105
+  - tesseract=5.2.0=h6a678d5_2
+  - tk=8.6.14=h39e8969_0
+  - torchvision=0.18.1=cpu_py310h54128f0_0
+  - tqdm=4.67.1=pyhd8ed1ab_1
+  - typing_extensions=4.12.2=pyha770c72_1
+  - tzdata=2025a=h04d1e81_0
+  - urllib3=2.3.0=pyhd8ed1ab_0
+  - wheel=0.45.1=py310h06a4308_0
+  - xz=5.6.4=h5eee18b_1
+  - zlib=1.2.13=h5eee18b_1
+  - zstandard=0.23.0=py310h2c38b39_1
+  - zstd=1.5.6=hc292b87_0
+prefix: /home/tcrawley11/miniconda3/envs/ubuntuEnv

From c96b8ccf086a270beea5c951d9059d61cc51524b Mon Sep 17 00:00:00 2001
From: TCrawley11 <tcrawley@uwo.ca>
Date: Wed, 12 Mar 2025 16:08:39 -0400
Subject: [PATCH 15/17] Implemented script to download dataset from roboflow
 url

---
 datasets/data_loader.py | 45 +++++++++++++++++++++++++++++++++++++++++
 main.py                 |  6 ++++++
 2 files changed, 51 insertions(+)

diff --git a/datasets/data_loader.py b/datasets/data_loader.py
index d057223..a2fc708 100644
--- a/datasets/data_loader.py
+++ b/datasets/data_loader.py
@@ -8,6 +8,51 @@
 from PIL import Image
 from torch.utils.data import DataLoader, random_split, Dataset
 
+import os
+import requests
+import zipfile
+from tqdm import tqdm # used for progress bars in downloading the dataset
+
+# this function will download the unaugmented train and val datasets from roboflow
+def download_roboflow_dataset(dataset_url, output_dir='stop_sign_dataset'):
+    """
+    Download and extract dataset from Roboflow
+    
+    Args:
+        dataset_url: URL to download the dataset from
+        output_dir: Directory where the dataset will be extracted
+    """
+    # Create full paths
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    output_path = os.path.join(base_dir, output_dir)
+    zip_path = os.path.join(output_path, "temp_dataset.zip")
+    
+    # Create directory if it doesn't exist
+    os.makedirs(output_path, exist_ok=True)
+    
+    # Console messages
+    print(f'Downloading dataset to {zip_path}...')
+    try: 
+        response = requests.get(dataset_url, stream=True)
+        response.raise_for_status() # raise status to check for bad response
+        
+        dataset_size = int(response.headers.get('content-length', 0))
+        
+        with open(zip_path, 'wb') as file:
+            for data in tqdm(response.iter_content(chunk_size=1024), total=dataset_size, unit='B', unit_scale=True):
+                file.write(data)
+    
+        print("\nDownload complete. Extracting files...")
+    
+        # extract zips
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(output_path)
+    
+        # delete downloaded zip file
+        os.remove(zip_path)
+        print(f'Extraction complete. Files extracted to {output_path}')
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading the dataset: {e}")
 
 def apply_cv2_transforms(img):
     """
diff --git a/main.py b/main.py
index 77ddebb..c676d5b 100644
--- a/main.py
+++ b/main.py
@@ -4,8 +4,10 @@
 from datasets.data_loader import create_train_val_test_loaders
 from torchvision import transforms
 from train import train_model
+import datasets.data_loader as data_loader
 
 def main():
+    '''
     data_dir = "./coco"
 
     transform = transforms.Compose([
@@ -45,6 +47,10 @@ def main():
         lr=1e-3, 
         device=device
     )
+    '''
+    
+    # testing the dataloader
+    data_loader.download_roboflow_dataset('https://app.roboflow.com/ds/LXl5gthuky?key=9cEzxHzAiX')
 
 if __name__ == '__main__':
     main()

From 339c4a198498c9467a1d40345f4cd8f76510de1a Mon Sep 17 00:00:00 2001
From: Zayan <khanzayan_123@hotmail.com>
Date: Sat, 15 Mar 2025 04:47:45 -0400
Subject: [PATCH 16/17] Fixed dataset integration and transformation issues

---
 datasets/data_loader.py | 43 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/datasets/data_loader.py b/datasets/data_loader.py
index a2fc708..0b941ab 100644
--- a/datasets/data_loader.py
+++ b/datasets/data_loader.py
@@ -54,6 +54,7 @@ def download_roboflow_dataset(dataset_url, output_dir='stop_sign_dataset'):
     except requests.exceptions.RequestException as e:
         print(f"Error downloading the dataset: {e}")
 
+
 def apply_cv2_transforms(img):
     """
     Applies OpenCV-based augmentations dynamically when images are loaded.
@@ -168,4 +169,44 @@ def create_train_val_test_loaders(image_paths, labels, batch_size=8, train_ratio
     train_loader = create_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
     test_loader  = create_dataloader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=num_workers)
     val_loader   = create_dataloader(val_dataset,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
-    return train_loader, test_loader, val_loader
\ No newline at end of file
+    return train_loader, test_loader, val_loader
+
+
+class StopSignDataset(Dataset):
+    """
+    Custom dataset class for Stop Sign detection that applies transformations during training.
+    """
+    def __init__(self, img_dir, transform=None):
+        self.img_dir = img_dir
+        self.transform = transform
+        self.img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")]
+
+    def __len__(self):
+        return len(self.img_paths)
+
+    def __getitem__(self, idx):
+        img_path = self.img_paths[idx]
+        img = Image.open(img_path).convert("RGB")
+        if self.transform:
+            img = self.transform(img)
+        return img
+
+def connect_dataset_with_transforms(dataset_path):
+    """
+    Connects the downloaded dataset with transformations.
+    Ensures dataset is properly loaded with augmentations before training.
+    """
+    print("Applying transformations and preparing dataloaders...")
+    
+    transform_pipeline = get_stop_sign_transforms()
+    dataset = StopSignDataset(dataset_path, transform=transform_pipeline)
+    
+    train_size = int(0.8 * len(dataset))
+    val_size = len(dataset) - train_size
+    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
+    
+    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True, persistent_workers=True)
+    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True)
+    
+    print("Transformations applied successfully! Training and validation dataloaders ready.")
+    return train_loader, val_loader

From 2189e04bc63d4b8adb72b988931d12980a4470c8 Mon Sep 17 00:00:00 2001
From: Zain Syed <145395172+zsyed44@users.noreply.github.com>
Date: Sat, 15 Mar 2025 05:11:39 -0400
Subject: [PATCH 17/17] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 8114324..b56e306 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 
 # Ignore the virtual environment
+.venv/
 venv/
 WEAP_CV/
 # Ignore Python cache files