diff --git a/.gitignore b/.gitignore index b6b9905..b56e306 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ + # Ignore the virtual environment +.venv/ venv/ - +WEAP_CV/ # Ignore Python cache files __pycache__/ *.pyc diff --git a/datasets/data_loader.py b/datasets/data_loader.py index 1720a45..0b941ab 100644 --- a/datasets/data_loader.py +++ b/datasets/data_loader.py @@ -1,9 +1,129 @@ """ -Encapsulates DataLoader-related logic, including splitting and parallel loading. +Encapsulates DataLoader-related logic, including splitting and parallel loading and transformations. """ import torch -from torch.utils.data import DataLoader, random_split +import cv2 +import numpy as np +import torchvision.transforms as transforms +from PIL import Image +from torch.utils.data import DataLoader, random_split, Dataset +import os +import requests +import zipfile +from tqdm import tqdm # used for progress bars in downloading the dataset + +# this function will download the unaugmented train and val datasets from roboflow +def download_roboflow_dataset(dataset_url, output_dir='stop_sign_dataset'): + """ + Download and extract dataset from Roboflow + + Args: + dataset_url: URL to download the dataset from + output_dir: Directory where the dataset will be extracted + """ + # Create full paths + base_dir = os.path.dirname(os.path.abspath(__file__)) + output_path = os.path.join(base_dir, output_dir) + zip_path = os.path.join(output_path, "temp_dataset.zip") + + # Create directory if it doesn't exist + os.makedirs(output_path, exist_ok=True) + + # Console messages + print(f'Downloading dataset to {zip_path}...') + try: + response = requests.get(dataset_url, stream=True) + response.raise_for_status() # raise status to check for bad response + + dataset_size = int(response.headers.get('content-length', 0)) + + with open(zip_path, 'wb') as file: + for data in tqdm(response.iter_content(chunk_size=1024), total=dataset_size, unit='B', unit_scale=True): + file.write(data) + + print("\nDownload complete. Extracting files...") + + # extract zips + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(output_path) + + # delete downloaded zip file + os.remove(zip_path) + print(f'Extraction complete. Files extracted to {output_path}') + except requests.exceptions.RequestException as e: + print(f"Error downloading the dataset: {e}") + + +def apply_cv2_transforms(img): + """ + Applies OpenCV-based augmentations dynamically when images are loaded. + :param img: PIL Image (converted to OpenCV format) + :return: Transformed PIL Image + """ + + img = np.array(img) # Convert PIL image to OpenCV format (NumPy array) + img = cv2.convertScaleAbs(img, alpha=np.random.uniform(0.9, 1.2), beta=np.random.randint(-15, 15)) # Reduced contrast variation + Reduced brightness shift + + img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + img_hsv[..., 1] = img_hsv[..., 1] * np.random.uniform(0.85, 1.15) + img_hsv[..., 0] = img_hsv[..., 0] + np.random.randint(-5, 5) + img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) + + if np.random.rand() < 0.2: + img = cv2.GaussianBlur(img, (3, 3), 0) + + if np.random.rand() < 0.15: + kernel_size = 3 + kernel_motion_blur = np.zeros((kernel_size, kernel_size)) + kernel_motion_blur[int((kernel_size - 1) / 2), :] = np.ones(kernel_size) + kernel_motion_blur = kernel_motion_blur / kernel_size + img = cv2.filter2D(img, -1, kernel_motion_blur) + + if np.random.rand() < 0.2: + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), np.random.randint(50, 90)] + _, enc_img = cv2.imencode('.jpg', img, encode_param) + img = cv2.imdecode(enc_img, cv2.IMREAD_UNCHANGED) + + if np.random.rand() < 0.2: + noise = np.random.normal(0, 5, img.shape).astype(np.uint8) + img = cv2.add(img, noise) + + return Image.fromarray(img) + +def get_stop_sign_transforms(): + """ + Returns the transformation pipeline for dynamic augmentation during training. + """ + return transforms.Compose([ + transforms.Lambda(lambda img: apply_cv2_transforms(img)), + transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.05), + transforms.RandomAffine(degrees=7, translate=(0.05, 0.05), scale=(0.95, 1.05)), + transforms.RandomHorizontalFlip(p=0.3), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ]) + +class StopSignDataset(Dataset): + """ + Custom dataset class for Stop Sign detection that applies transformations during training. + """ + def __init__(self, image_paths, labels, transform=None): + self.image_paths = image_paths + self.labels = labels + self.transform = transform + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image = Image.open(self.image_paths[idx]).convert("RGB") + label = self.labels[idx] + + augmented_image = self.transform(image) if self.transform else transforms.ToTensor()(image) + + return augmented_image, label # Only return augmented image and label + def create_dataloader(dataset, batch_size=8, shuffle=True, num_workers=4): """ Creates and returns a DataLoader @@ -39,13 +159,54 @@ def split_dataset(dataset, train_ratio=0.7, test_ratio=0.15, seed=42): torch.manual_seed(seed) return random_split(dataset, [train_size, test_size, val_size]) -def create_train_val_test_loaders(dataset, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4): +def create_train_val_test_loaders(image_paths, labels, batch_size=8, train_ratio=0.7, test_ratio=0.15, num_workers=4): """ Combines dataset splitting and DataLoader creation. :return: (train_loader, test_loader, val_loader) """ + dataset = StopSignDataset(image_paths=image_paths, labels=labels, transform=get_stop_sign_transforms()) train_dataset, test_dataset, val_dataset = split_dataset(dataset, train_ratio, test_ratio) train_loader = create_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_loader = create_dataloader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) val_loader = create_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) - return train_loader, test_loader, val_loader \ No newline at end of file + return train_loader, test_loader, val_loader + + +class StopSignDataset(Dataset): + """ + Custom dataset class for Stop Sign detection that applies transformations during training. + """ + def __init__(self, img_dir, transform=None): + self.img_dir = img_dir + self.transform = transform + self.img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")] + + def __len__(self): + return len(self.img_paths) + + def __getitem__(self, idx): + img_path = self.img_paths[idx] + img = Image.open(img_path).convert("RGB") + if self.transform: + img = self.transform(img) + return img + +def connect_dataset_with_transforms(dataset_path): + """ + Connects the downloaded dataset with transformations. + Ensures dataset is properly loaded with augmentations before training. + """ + print("Applying transformations and preparing dataloaders...") + + transform_pipeline = get_stop_sign_transforms() + dataset = StopSignDataset(dataset_path, transform=transform_pipeline) + + train_size = int(0.8 * len(dataset)) + val_size = len(dataset) - train_size + train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) + + train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True, persistent_workers=True) + val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True) + + print("Transformations applied successfully! Training and validation dataloaders ready.") + return train_loader, val_loader diff --git a/main.py b/main.py index 592ea2c..c676d5b 100644 --- a/main.py +++ b/main.py @@ -4,8 +4,10 @@ from datasets.data_loader import create_train_val_test_loaders from torchvision import transforms from train import train_model +import datasets.data_loader as data_loader def main(): + ''' data_dir = "./coco" transform = transforms.Compose([ @@ -28,6 +30,7 @@ def main(): num_workers=2 ) + model = nn.Sequential( nn.Flatten(), nn.Linear(3*224*224, 100), @@ -44,6 +47,10 @@ def main(): lr=1e-3, device=device ) + ''' + + # testing the dataloader + data_loader.download_roboflow_dataset('https://app.roboflow.com/ds/LXl5gthuky?key=9cEzxHzAiX') if __name__ == '__main__': main() diff --git a/src/3D/cam_to_lidar.py b/src/3D/cam_to_lidar.py new file mode 100644 index 0000000..3a4ede2 --- /dev/null +++ b/src/3D/cam_to_lidar.py @@ -0,0 +1,6 @@ +import numpy as np + +# draw center line, +center_coord = [0,0] + +# get some test pics from KITTI \ No newline at end of file diff --git a/src/models/detection_model.py b/src/models/detection_model.py new file mode 100644 index 0000000..a1bdbe2 --- /dev/null +++ b/src/models/detection_model.py @@ -0,0 +1,146 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import torch.backends.cudnn as cudnn +import numpy as np + +''' +Define a small CNN. +We want 8 layers - conv1, batch + poo1, relu1, conv2, batch + pool2, relu2, fc1, fc2 +Split up the two parts 1.Model 2.Classifier +''' +class SmallCNN(nn.Module): + def __init__(self, num_classes=2, input_size=128) -> None: + super().__init__() + + # calculate size after convolution and pooling + feature_size = ((input_size - 2) // 2 - 2) // 2 + # Calculate the flattened feature size + self.flat_features = 64 * feature_size * feature_size + + # Small CNN part + self.stop_sign_cnn = nn.Sequential( + # First conv block + nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=0), # 64 features/outputs, input channels = 3 (RGB), square kernel of size 3, + nn.BatchNorm2d(32), + nn.MaxPool2d(kernel_size=2,stride=2), + nn.ReLU(), + + # Second conv block + nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0), + nn.BatchNorm2d(64), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.ReLU(), + + # Dropout for regularization to avoid overfitting + nn.Dropout2d(0.20) + ) + + # Fully connected part + self.stop_sign_classifier = nn.Sequential( + nn.Flatten(), + nn.Linear(self.flat_features, 128), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(128, num_classes) + ) + + def forward(self, x): + x = self.stop_sign_cnn(x) + x = self.stop_sign_classifier(x) + return x + +# Create the model +detection_model = SmallCNN() + +# Check if GPU is available +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +detection_model.to(device) + +# define a loss function and optimizer +loss_fcn = nn.CrossEntropyLoss() +optimizer = optim.Adam(detection_model.parameters(), lr=0.001, betas=(0.5, 0.999), weight_decay=1e-4) # weigh decay for regularization + +# learning rate scheduler +scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1) + +def train_model(model, train_loader, val_loader, num_epochs = 25, ): + best = 0.0 # holds best accuracy + + for epoch in range(num_epochs): + print(f'Epoch {epoch + 1}/{num_epochs}') + print('-' * 10) + running_loss = 0.0 + + model.train() + running_loss = 0.0 + running_corrects = 0 + + for inputs, labels in train_loader: + print('.', end='', flush=True) + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward pass + outputs = model(inputs) + _, preds = torch.max(outputs, 1) # outputs is a tensor of shape [batch_size,2] and torch.max igores the first value + loss = loss_fcn(outputs, labels) + + # backward pass and optimize + loss.backward() + optimizer.step() + + # loss and corrects + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + + epoch_loss = running_loss / len(train_loader.dataset) + epoch_acc = running_corrects.double() / len(train_loader.dataset) + + print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') + + # Validation phase + model.eval() # Set model to evaluate mode + val_loss = 0.0 + val_corrects = 0 + + # No gradient calculation needed for validation + with torch.no_grad(): + for inputs, labels in val_loader: + inputs = inputs.to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = loss_fcn(outputs, labels) + + # Statistics + val_loss += loss.item() * inputs.size(0) + val_corrects += torch.sum(preds == labels.data) + + val_loss = val_loss / len(val_loader.dataset) + val_acc = val_corrects.double() / len(val_loader.dataset) + + print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}') + + # Update learning rate based on validation loss + scheduler.step(val_loss) + + # Save the best model + if val_acc > best_acc: + best_acc = val_acc + torch.save(model.state_dict(), 'best_model.pth') + print(f'New best model saved with accuracy: {val_acc:.4f}') + + print(f'Best validation accuracy: {best_acc:.4f}') + return model + + + + + diff --git a/src/models/lane_detection.py b/src/models/lane_detection.py index f7f070a..e328a9c 100644 --- a/src/models/lane_detection.py +++ b/src/models/lane_detection.py @@ -39,9 +39,9 @@ # Canny edge detection, black and white representation of edges in a frame canny = cv2.Canny(grayFrame, 100, 200) cv2.imshow("Canny edge detection", canny) - + # Display the resulting frame - #cv2.imshow('F1tenth Onboard Video', frame) + # cv2.imshow('F1tenth Onboard Video', frame) # define q as the exit button if cv2.waitKey(25) & 0xFF == ord('q'): diff --git a/ubuntuEnv.yml b/ubuntuEnv.yml new file mode 100644 index 0000000..be5dcd2 --- /dev/null +++ b/ubuntuEnv.yml @@ -0,0 +1,139 @@ +name: ubuntuEnv +channels: + - defaults + - conda-forge + - https://repo.anaconda.com/pkgs/main + - https://repo.anaconda.com/pkgs/r +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - aom=3.6.0=h6a678d5_0 + - blas=1.1=openblas + - brotli-python=1.0.9=py310hd8f1fbe_7 + - bzip2=1.0.8=h5eee18b_6 + - c-ares=1.19.1=h5eee18b_0 + - ca-certificates=2025.2.25=h06a4308_0 + - cairo=1.16.0=hb05425b_5 + - certifi=2025.1.31=pyhd8ed1ab_0 + - cffi=1.15.0=py310h0fdd8cc_0 + - charset-normalizer=3.4.1=pyhd8ed1ab_0 + - colorama=0.4.6=pyhd8ed1ab_1 + - cpython=3.10.16=py310hd8ed1ab_1 + - cyrus-sasl=2.1.28=h52b45da_1 + - dav1d=1.2.1=h5eee18b_0 + - dbus=1.13.18=hb2f20db_0 + - eigen=3.4.0=h4bd325d_0 + - expat=2.6.4=h6a678d5_0 + - ffmpeg=6.1.1=h2a67f75_2 + - filelock=3.17.0=pyhd8ed1ab_0 + - fontconfig=2.14.1=h55d465d_3 + - freetype=2.10.4=h0708190_1 + - fsspec=2025.3.0=pyhd8ed1ab_0 + - giflib=5.2.1=h36c2ea0_2 + - glib=2.78.4=h6a678d5_0 + - glib-tools=2.78.4=h6a678d5_0 + - gmp=6.2.1=h58526e2_0 + - gmpy2=2.1.2=py310h92f7908_0 + - graphite2=1.3.14=h295c915_1 + - gst-plugins-base=1.14.1=h6a678d5_1 + - gstreamer=1.14.1=h5eee18b_1 + - h2=4.2.0=pyhd8ed1ab_0 + - harfbuzz=10.2.0=hf296adc_0 + - hdf5=1.14.5=h2b7332f_2 + - hpack=4.1.0=pyhd8ed1ab_0 + - hyperframe=6.1.0=pyhd8ed1ab_0 + - icu=73.1=h6a678d5_0 + - idna=3.10=pyhd8ed1ab_1 + - jinja2=3.1.6=pyhd8ed1ab_0 + - jpeg=9e=h166bdaf_1 + - kagglehub=0.3.8=pyhd8ed1ab_0 + - krb5=1.20.1=h143b758_1 + - lame=3.100=h7f98852_1001 + - lcms2=2.16=hb9589c4_0 + - ld_impl_linux-64=2.40=h12ee557_0 + - leptonica=1.82.0=h42c8aad_2 + - lerc=4.0.0=h6a678d5_0 + - libabseil=20240116.2=cxx17_h6a678d5_0 + - libarchive=3.7.7=hfab0078_0 + - libblas=3.9.0=16_linux64_openblas + - libcblas=3.9.0=16_linux64_openblas + - libclang=14.0.6=default_hc6dbbc7_2 + - libclang13=14.0.6=default_he11475f_2 + - libcups=2.4.2=h2d74bed_1 + - libcurl=8.12.1=hc9e6f67_0 + - libdeflate=1.22=h5eee18b_0 + - libedit=3.1.20230828=h5eee18b_0 + - libev=4.33=h516909a_1 + - libffi=3.4.4=h6a678d5_1 + - libgcc-ng=11.2.0=h1234567_1 + - libgfortran=3.0.0=1 + - libgfortran-ng=13.2.0=h69a702a_0 + - libgfortran5=13.2.0=ha4646dd_0 + - libglib=2.78.4=hdc74915_0 + - libgomp=11.2.0=h1234567_1 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_openblas + - libllvm14=14.0.6=hecde1de_4 + - libnghttp2=1.57.0=h2d74bed_0 + - libogg=1.3.4=h7f98852_1 + - libopenblas=0.3.21=h043d6bf_0 + - libopus=1.3.1=h7f98852_1 + - libpng=1.6.39=h5eee18b_0 + - libpq=17.4=hdbd6064_0 + - libprotobuf=4.25.3=he621ea3_0 + - libssh2=1.11.1=h251f7ec_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libtheora=1.1.1=h7f98852_1005 + - libtiff=4.5.1=hffd6297_1 + - libuuid=1.41.5=h5eee18b_0 + - libvorbis=1.3.7=h9c3ff4c_0 + - libvpx=1.13.1=h6a678d5_0 + - libwebp=1.3.2=h11a3e52_0 + - libwebp-base=1.3.2=h5eee18b_1 + - libxcb=1.15=h7f8727e_0 + - libxkbcommon=1.0.3=he3ba5ed_0 + - libxml2=2.13.5=hfdd30dd_0 + - lz4-c=1.9.4=h6a678d5_1 + - markupsafe=2.1.1=py310h5764c6d_1 + - mpc=1.2.1=h9f54685_0 + - mpfr=4.1.0=h9202a9a_1 + - mpmath=1.3.0=pyhd8ed1ab_1 + - mysql=8.4.0=h29a9f33_1 + - ncurses=6.4=h6a678d5_0 + - networkx=3.4=pyhd8ed1ab_0 + - numpy=1.22.3=py310h4ef5377_2 + - openblas=0.3.4=ha44fe06_0 + - opencv=4.10.0=py310h2484693_2 + - openh264=2.1.1=h780b84a_0 + - openjpeg=2.5.2=he7f1fd0_0 + - openldap=2.6.4=h42fbc30_0 + - openssl=3.0.16=h5eee18b_0 + - packaging=24.2=pyhd8ed1ab_2 + - pcre2=10.42=hebb0a14_1 + - pillow=11.1.0=py310hcea889d_0 + - pip=25.0=py310h06a4308_0 + - pixman=0.40.0=h36c2ea0_0 + - pycparser=2.22=pyh29332c3_1 + - pysocks=1.7.1=pyha55dd90_7 + - python=3.10.16=he870216_1 + - python_abi=3.10=2_cp310 + - pytorch=2.3.0=cpu_py310h1ce4368_1 + - qt-main=5.15.2=hb6262e9_12 + - readline=8.2=h5eee18b_0 + - requests=2.32.3=pyhd8ed1ab_1 + - setuptools=75.8.0=py310h06a4308_0 + - sqlite=3.45.3=h5eee18b_0 + - sympy=1.13.3=pyh2585a3b_105 + - tesseract=5.2.0=h6a678d5_2 + - tk=8.6.14=h39e8969_0 + - torchvision=0.18.1=cpu_py310h54128f0_0 + - tqdm=4.67.1=pyhd8ed1ab_1 + - typing_extensions=4.12.2=pyha770c72_1 + - tzdata=2025a=h04d1e81_0 + - urllib3=2.3.0=pyhd8ed1ab_0 + - wheel=0.45.1=py310h06a4308_0 + - xz=5.6.4=h5eee18b_1 + - zlib=1.2.13=h5eee18b_1 + - zstandard=0.23.0=py310h2c38b39_1 + - zstd=1.5.6=hc292b87_0 +prefix: /home/tcrawley11/miniconda3/envs/ubuntuEnv diff --git a/utils/kitti_seg_visualizer.py b/utils/kitti_seg_visualizer.py new file mode 100644 index 0000000..e485016 --- /dev/null +++ b/utils/kitti_seg_visualizer.py @@ -0,0 +1,81 @@ +import cv2 as cv +import numpy as np + +# Dictionary mapping KITTI semantic labels to their corresponding BGR color values. +# Note: OpenCV uses BGR format; the label index is assumed to be encoded in the red channel. +KITTI_LABEL_COLORS = { + 0: np.array([128, 64, 128], dtype=np.uint8), # road + 1: np.array([244, 35, 232], dtype=np.uint8), # sidewalk + 2: np.array([70, 70, 70], dtype=np.uint8), # building + 3: np.array([102, 102, 156], dtype=np.uint8), # wall + 4: np.array([190, 153, 153], dtype=np.uint8), # fence + 5: np.array([153, 153, 153], dtype=np.uint8), # pole + 6: np.array([30, 170, 250], dtype=np.uint8), # traffic light + 7: np.array([0, 220, 220], dtype=np.uint8), # traffic sign + 8: np.array([35, 142, 107], dtype=np.uint8), # vegetation + 9: np.array([152, 251, 152], dtype=np.uint8), # terrain + 10: np.array([180, 130, 70], dtype=np.uint8), # sky + 11: np.array([60, 20, 220], dtype=np.uint8), # person + 12: np.array([0, 0, 255], dtype=np.uint8), # rider + 13: np.array([142, 0, 0], dtype=np.uint8), # car + 14: np.array([70, 0, 0], dtype=np.uint8), # truck + 15: np.array([100, 60, 0], dtype=np.uint8), # bus + 16: np.array([100, 80, 0], dtype=np.uint8), # train + 17: np.array([230, 0, 0], dtype=np.uint8), # motorcycle + 18: np.array([32, 11, 119], dtype=np.uint8), # bicycle + 255: np.array([0, 0, 0], dtype=np.uint8) # void +} + +def load_image(path): + + #Loads an image from the given file path using OpenCV. + #Raises a RuntimeError if the image cannot be read. + + image = cv.imread(path) + if image is None or np.count_nonzero(image) == 0: + raise RuntimeError("Image at path '{}' was unable to be read".format(path)) + return image + +def generate_mask(panoptic_image): + + #Generates a segmentation mask from the panoptic image. + #For each pixel in the panoptic image, the red channel value (index 2) is used as a key + #to retrieve the corresponding BGR color from KITTI_LABEL_COLORS. + #If the label is not found, it defaults to black. + #Note: This implementation is slow and may need optimization for real-time applications. + + height, width, channels = panoptic_image.shape + mask = np.zeros((height, width, channels), dtype=np.uint8) + for row in range(height): + for col in range(width): + pixel = panoptic_image[row, col] # OpenCV uses BGR; label is in red channel + mask[row, col] = KITTI_LABEL_COLORS.get(pixel[2], np.array([0, 0, 0], dtype=np.uint8)) + return mask + +def create_overlay(image, mask): + + #Creates an overlay by blending the original image with the segmentation mask. + #The blending factor (alpha) determines the transparency of the mask. + + alpha = 0.5 + overlay = cv.addWeighted(mask, alpha, image, 1 - alpha, 0) + return overlay + +# Absolute path to the input image and its corresponding panoptic map. +image_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/images/training/0000/000000.png" +panoptic_path = "C:/Users/Jairdan C/Desktop/WEAP/WEAP_CV/datasets/kitti/kitti_step/panoptic_maps/train/0000/000000.png" + +# Load the original image and panoptic map. +image = load_image(image_path) +panoptic_image = load_image(panoptic_path) + +# Generate the segmentation mask and create an overlay. +mask = generate_mask(panoptic_image) +segmentation_overlay = create_overlay(image, mask) + +# Display the resulting overlay. +cv.imshow("Segmentation Overlay", segmentation_overlay) +cv.waitKey(0) +cv.destroyAllWindows() + +