diff --git a/build/docker-compose.yml b/build/docker-compose.yml index 4580d9ea..378ba714 100644 --- a/build/docker-compose.yml +++ b/build/docker-compose.yml @@ -69,8 +69,7 @@ services: # in ``build/docker/build/Dockerfile - ../:/workspace # mount git config for dvc - - ../.gitconfig:/home/carla/.gitconfig - - ../:/workspace/ + - "~/.gitconfig:/home/carla/.gitconfig" networks: - carla - ros diff --git a/build/docker/agent/Dockerfile b/build/docker/agent/Dockerfile index 41dc7e4e..97959ca6 100644 --- a/build/docker/agent/Dockerfile +++ b/build/docker/agent/Dockerfile @@ -74,7 +74,9 @@ RUN apt-get update && apt-get install -y \ ros-noetic-carla-msgs ros-noetic-pcl-conversions \ ros-noetic-rviz ros-noetic-rqt ros-noetic-pcl-ros ros-noetic-rosbridge-suite ros-noetic-rosbridge-server \ ros-noetic-robot-pose-ekf ros-noetic-ros-numpy \ - ros-noetic-py-trees-ros ros-noetic-rqt-py-trees ros-noetic-rqt-reconfigure + ros-noetic-py-trees-ros ros-noetic-rqt-py-trees ros-noetic-rqt-reconfigure \ + python3-tk + SHELL ["/bin/bash", "-c"] diff --git a/code/perception/src/traffic_sign_detection/.gitignore b/code/perception/src/traffic_sign_detection/.gitignore new file mode 100644 index 00000000..69a85cd9 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/.gitignore @@ -0,0 +1,2 @@ +/models +/data diff --git a/code/perception/src/traffic_sign_detection/Readme.md b/code/perception/src/traffic_sign_detection/Readme.md new file mode 100644 index 00000000..89723a23 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/Readme.md @@ -0,0 +1,26 @@ +# Traffic Sign Detection + +The training code is inspired by [this tutorial](https://debuggercafe.com/traffic-sign-recognition-using-pytorch-and-deep-learning/). + +## Run trainig + +Update training dataset (Read [DVC docs](../../../../doc/02_development/11_dvc.md) first): + +```shell +cd code/perception/src/traffic_sign_detection/ +dvc pull +``` + +```shell +b5 shell +cd perception/src/traffic_sign_detection/ +python src/train.py +``` + +## Dataset + +Since there doesn't exist a large dataset for CARLA traffic signs +the [German Traffic Sign Recognition Benchmark](https://benchmark.ini.rub.de/gtsrb_news.html) +was used. + +Even if these are real images, they look pretty similar to the CARLA ones. diff --git a/code/perception/src/traffic_sign_detection/data.dvc b/code/perception/src/traffic_sign_detection/data.dvc new file mode 100644 index 00000000..b84d7e77 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/data.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 1eee983d5b361f353ec7d438da66c2a9.dir + size: 486648472 + nfiles: 51885 + path: data diff --git a/code/perception/src/traffic_sign_detection/dvc.yaml b/code/perception/src/traffic_sign_detection/dvc.yaml new file mode 100644 index 00000000..d8869486 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/dvc.yaml @@ -0,0 +1,18 @@ +stages: + train: + cmd: python src/train.py + deps: + - data + - src + params: + - params.yaml: + outs: + - models + metrics: + - dvclive/metrics.json: + cache: false + plots: + - dvclive/plots: + cache: false + - plots: + cache: false diff --git a/code/perception/src/traffic_sign_detection/params.yaml b/code/perception/src/traffic_sign_detection/params.yaml new file mode 100644 index 00000000..e69de29b diff --git a/code/perception/src/traffic_sign_detection/src/cam.py b/code/perception/src/traffic_sign_detection/src/cam.py new file mode 100755 index 00000000..f0d450a2 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/src/cam.py @@ -0,0 +1,177 @@ +import numpy as np +import cv2 +import torch +import glob as glob +import pandas as pd +import os +import albumentations as A +import time + +from albumentations.pytorch import ToTensorV2 +from torch.nn import functional as F +from torch import topk + +from model import build_model + +# Define computation device. +device = ('cuda' if torch.cuda.is_available() else 'cpu') +# Class names. +sign_names_df = pd.read_csv('../data/signnames.csv') +class_names = sign_names_df.SignName.tolist() + +# DataFrame for ground truth. +gt_df = pd.read_csv( + '../data/test/GT-final_test.csv', + delimiter=';' +) +gt_df = gt_df.set_index('Filename', drop=True) + +# Initialize model, switch to eval model, load trained weights. +model = build_model( + pretrained=False, + fine_tune=False, + num_classes=43 +).to(device) +model = model.eval() +model.load_state_dict( + torch.load( + '../models/model.pth', map_location=device + )['model_state_dict'] +) + + +# https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py +def returnCAM(feature_conv, weight_softmax, class_idx): + # Generate the class activation maps upsample to 256x256. + size_upsample = (256, 256) + bz, nc, h, w = feature_conv.shape + output_cam = [] + for idx in class_idx: + cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w))) + cam = cam.reshape(h, w) + cam = cam - np.min(cam) + cam_img = cam / np.max(cam) + cam_img = np.uint8(255 * cam_img) + output_cam.append(cv2.resize(cam_img, size_upsample)) + return output_cam + + +def apply_color_map(CAMs, width, height, orig_image): + for i, cam in enumerate(CAMs): + heatmap = cv2.applyColorMap( + cv2.resize(cam, (width, height)), + cv2.COLORMAP_JET + ) + result = heatmap * 0.5 + orig_image * 0.5 + result = cv2.resize(result, (224, 224)) + return result + + +def visualize_and_save_map( + result, orig_image, gt_idx=None, class_idx=None, save_name=None +): + # Put class label text on the result. + if class_idx is not None: + cv2.putText( + result, + f"Pred: {str(class_names[int(class_idx)])}", (5, 20), + cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2, + cv2.LINE_AA + ) + if gt_idx is not None: + cv2.putText( + result, + f"GT: {str(class_names[int(gt_idx)])}", (5, 40), + cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2, + cv2.LINE_AA + ) + orig_image = cv2.resize(orig_image, (224, 224)) + img_concat = cv2.hconcat([ + np.array(result, dtype=np.uint8), + np.array(orig_image, dtype=np.uint8) + ]) + cv2.imshow('Result', img_concat) + cv2.waitKey(1) + if save_name is not None: + cv2.imwrite(f"../outputs/test_results/CAM_{save_name}.jpg", img_concat) + + +# Hook the feature extractor. +# https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py +features_blobs = [] + + +def hook_feature(module, input, output): + features_blobs.append(output.data.cpu().numpy()) + + +model._modules.get('features').register_forward_hook(hook_feature) +# Get the softmax weight. +params = list(model.parameters()) +weight_softmax = np.squeeze(params[-4].data.cpu().numpy()) + +# Define the transforms, resize => tensor => normalize. +transform = A.Compose([ + A.Resize(224, 224), + A.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ), + ToTensorV2(), + ]) + +counter = 0 +# Run for all the test images. +all_images = glob.glob('../data/test/*.ppm') +correct_count = 0 +frame_count = 0 # To count total frames. +total_fps = 0 # To get the final frames per second. +for i, image_path in enumerate(all_images): + # Read the image. + image = cv2.imread(image_path) + orig_image = image.copy() + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + height, width, _ = orig_image.shape + # Apply the image transforms. + image_tensor = transform(image=image)['image'] + # Add batch dimension. + image_tensor = image_tensor.unsqueeze(0) + # Forward pass through model. + start_time = time.time() + outputs = model(image_tensor.to(device)) + end_time = time.time() + # Get the softmax probabilities. + probs = F.softmax(outputs).data.squeeze() + # Get the class indices of top k probabilities. + class_idx = topk(probs, 1)[1].int() + # Get the ground truth. + image_name = image_path.split(os.path.sep)[-1] + gt_idx = gt_df.loc[image_name].ClassId + # Check whether correct prediction or not. + if gt_idx == class_idx: + correct_count += 1 + # Generate class activation mapping for the top1 prediction. + CAMs = returnCAM(features_blobs[0], weight_softmax, class_idx) + # File name to save the resulting CAM image with. + save_name = f"{image_path.split('/')[-1].split('.')[0]}" + # Show and save the results. + result = apply_color_map(CAMs, width, height, orig_image) + visualize_and_save_map(result, orig_image, gt_idx, class_idx, save_name) + counter += 1 + print(f"Image: {counter}") + # Get the current fps. + fps = 1 / (end_time - start_time) + # Add `fps` to `total_fps`. + total_fps += fps + # Increment frame count. + frame_count += 1 + +print(f"Total number of test images: {len(all_images)}") +print(f"Total correct predictions: {correct_count}") +print(f"Accuracy: {correct_count/len(all_images)*100:.3f}") + +# Close all frames and video windows. +cv2.destroyAllWindows() +# calculate and print the average FPS +avg_fps = total_fps / frame_count +print(f"Average FPS: {avg_fps:.3f}") diff --git a/code/perception/src/traffic_sign_detection/src/datasets.py b/code/perception/src/traffic_sign_detection/src/datasets.py new file mode 100644 index 00000000..2bcc137b --- /dev/null +++ b/code/perception/src/traffic_sign_detection/src/datasets.py @@ -0,0 +1,97 @@ +import torch +import albumentations as A +import numpy as np + +from torchvision import datasets +from torch.utils.data import DataLoader, Subset +from albumentations.pytorch import ToTensorV2 + +# Required constants. +ROOT_DIR = 'data/train/' +VALID_SPLIT = 0.1 +RESIZE_TO = 224 # Image size of resize when applying transforms. +BATCH_SIZE = 32 +NUM_WORKERS = 4 # Number of parallel processes for data preparation. + + +# Training transforms. +class TrainTransforms: + def __init__(self, resize_to): + self.transforms = A.Compose([ + A.Resize(resize_to, resize_to), + A.RandomBrightnessContrast(), + A.RandomFog(), + A.RandomRain(), + A.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ), + ToTensorV2() + ]) + + def __call__(self, img): + return self.transforms(image=np.array(img))['image'] + + +# Validation transforms. +class ValidTransforms: + def __init__(self, resize_to): + self.transforms = A.Compose([ + A.Resize(resize_to, resize_to), + A.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ), + ToTensorV2() + ]) + + def __call__(self, img): + return self.transforms(image=np.array(img))['image'] + + +def get_datasets(): + """ + Function to prepare the Datasets. + + Returns the training and validation datasets along + with the class names. + """ + dataset = datasets.ImageFolder( + ROOT_DIR, + transform=(TrainTransforms(RESIZE_TO)) + ) + dataset_test = datasets.ImageFolder( + ROOT_DIR, + transform=(ValidTransforms(RESIZE_TO)) + ) + dataset_size = len(dataset) + + # Calculate the validation dataset size. + valid_size = int(VALID_SPLIT*dataset_size) + # Radomize the data indices. + indices = torch.randperm(len(dataset)).tolist() + # Training and validation sets. + dataset_train = Subset(dataset, indices[:-valid_size]) + dataset_valid = Subset(dataset_test, indices[-valid_size:]) + + return dataset_train, dataset_valid, dataset.classes + + +def get_data_loaders(dataset_train, dataset_valid): + """ + Prepares the training and validation data loaders. + + :param dataset_train: The training dataset. + :param dataset_valid: The validation dataset. + + Returns the training and validation data loaders. + """ + train_loader = DataLoader( + dataset_train, batch_size=BATCH_SIZE, + shuffle=True, num_workers=NUM_WORKERS + ) + valid_loader = DataLoader( + dataset_valid, batch_size=BATCH_SIZE, + shuffle=False, num_workers=NUM_WORKERS + ) + return train_loader, valid_loader diff --git a/code/perception/src/traffic_sign_detection/src/model.py b/code/perception/src/traffic_sign_detection/src/model.py new file mode 100644 index 00000000..c6285153 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/src/model.py @@ -0,0 +1,23 @@ +import torchvision.models as models +import torch.nn as nn + + +def build_model(pretrained=True, fine_tune=False, num_classes=10): + if pretrained: + print('[INFO]: Loading pre-trained weights') + else: + print('[INFO]: Not loading pre-trained weights') + model = models.mobilenet_v3_large(pretrained=pretrained) + + if fine_tune: + print('[INFO]: Fine-tuning all layers...') + for params in model.parameters(): + params.requires_grad = True + elif not fine_tune: + print('[INFO]: Freezing hidden layers...') + for params in model.parameters(): + params.requires_grad = False + + # Change the final classification head. + model.classifier[3] = nn.Linear(in_features=1280, out_features=num_classes) + return model diff --git a/code/perception/src/traffic_sign_detection/src/train.py b/code/perception/src/traffic_sign_detection/src/train.py new file mode 100644 index 00000000..7379c400 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/src/train.py @@ -0,0 +1,206 @@ +import torch +import argparse +import torch.nn as nn +import torch.optim as optim +import time + +from tqdm.auto import tqdm + +from model import build_model +from datasets import get_datasets, get_data_loaders +from utils import save_model, save_plots + +seed = 42 +torch.manual_seed(seed) +torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = True + +# Construct the argument parser. +parser = argparse.ArgumentParser() +parser.add_argument( + '-e', '--epochs', type=int, default=10, + help='Number of epochs to train our network for' +) +parser.add_argument( + '-lr', '--learning-rate', type=float, + dest='learning_rate', default=0.001, + help='Learning rate for training the model' +) +parser.add_argument( + '-pw', '--pretrained', action='store_true', + help='whether to use pretrained weihgts or not' +) +parser.add_argument( + '-ft', '--fine-tune', dest='fine_tune', action='store_true', + help='whether to train all layers or not' +) +args = vars(parser.parse_args()) + + +# Training function. +def train( + model, trainloader, optimizer, + criterion, scheduler=None, epoch=None +): + model.train() + print('Training') + train_running_loss = 0.0 + train_running_correct = 0 + counter = 0 + iters = len(trainloader) + for i, data in tqdm(enumerate(trainloader), total=len(trainloader)): + counter += 1 + image, labels = data + image = image.to(device) + labels = labels.to(device) + optimizer.zero_grad() + # Forward pass. + outputs = model(image) + # Calculate the loss. + loss = criterion(outputs, labels) + train_running_loss += loss.item() + # Calculate the accuracy. + _, preds = torch.max(outputs.data, 1) + train_running_correct += (preds == labels).sum().item() + # Backpropagation. + loss.backward() + # Update the weights. + optimizer.step() + + if scheduler is not None: + scheduler.step(epoch + i / iters) + + # Loss and accuracy for the complete epoch. + epoch_loss = train_running_loss / counter + epoch_acc = 100. * (train_running_correct / len(trainloader.dataset)) + return epoch_loss, epoch_acc + + +# Validation function. +def validate(model, testloader, criterion, class_names): + model.eval() + print('Validation') + valid_running_loss = 0.0 + valid_running_correct = 0 + counter = 0 + + # We need two lists to keep track of class-wise accuracy. + class_correct = list(0. for i in range(len(class_names))) + class_total = list(0. for i in range(len(class_names))) + + with torch.no_grad(): + for i, data in tqdm(enumerate(testloader), total=len(testloader)): + counter += 1 + + image, labels = data + image = image.to(device) + labels = labels.to(device) + # Forward pass. + outputs = model(image) + # Calculate the loss. + loss = criterion(outputs, labels) + valid_running_loss += loss.item() + # Calculate the accuracy. + _, preds = torch.max(outputs.data, 1) + valid_running_correct += (preds == labels).sum().item() + + # Calculate the accuracy for each class. + correct = (preds == labels).squeeze() + for i in range(len(preds)): + label = labels[i] + class_correct[label] += correct[i].item() + class_total[label] += 1 + + # Loss and accuracy for the complete epoch. + epoch_loss = valid_running_loss / counter + epoch_acc = 100. * (valid_running_correct / len(testloader.dataset)) + + # Print the accuracy for each class after every epoch. + print('\n') + for i in range(len(class_names)): + print( + f"Accuracy of class {class_names[i]}: \ + {100*class_correct[i]/class_total[i]}" + ) + print('\n') + return epoch_loss, epoch_acc + + +if __name__ == '__main__': + # Load the training and validation datasets. + dataset_train, dataset_valid, dataset_classes = get_datasets() + print(f"[INFO]: Number of training images: {len(dataset_train)}") + print(f"[INFO]: Number of validation images: {len(dataset_valid)}") + print(f"[INFO]: Class names: {dataset_classes}\n") + # Load the training and validation data loaders. + train_loader, valid_loader = get_data_loaders(dataset_train, dataset_valid) + + # Learning_parameters. + lr = args['learning_rate'] + epochs = args['epochs'] + device = ('cuda' if torch.cuda.is_available() else 'cpu') + print(f"Computation device: {device}") + print(f"Learning rate: {lr}") + print(f"Epochs to train for: {epochs}\n") + + # Load the model. + model = build_model( + pretrained=args['pretrained'], + fine_tune=args['fine_tune'], + num_classes=len(dataset_classes) + ).to(device) + + # Total parameters and trainable parameters. + total_params = sum(p.numel() for p in model.parameters()) + print(f"{total_params:,} total parameters.") + total_trainable_params = sum( + p.numel() for p in model.parameters() if p.requires_grad) + print(f"{total_trainable_params:,} training parameters.") + + # Optimizer. + optimizer = optim.Adam(model.parameters(), lr=lr) + # Loss function. + criterion = nn.CrossEntropyLoss() + + scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( + optimizer, + T_0=10, + T_mult=1, + verbose=True + ) + + # Lists to keep track of losses and accuracies. + train_loss, valid_loss = [], [] + train_acc, valid_acc = [], [] + # Start the training. + for epoch in range(epochs): + print(f"[INFO]: Epoch {epoch+1} of {epochs}") + train_epoch_loss, train_epoch_acc = train( + model, train_loader, + optimizer, criterion, + scheduler=scheduler, epoch=epoch + ) + valid_epoch_loss, valid_epoch_acc = validate( + model, valid_loader, criterion, dataset_classes + ) + train_loss.append(train_epoch_loss) + valid_loss.append(valid_epoch_loss) + train_acc.append(train_epoch_acc) + valid_acc.append(valid_epoch_acc) + print( + f"Training loss: {train_epoch_loss:.3f},\ + training acc: {train_epoch_acc:.3f}" + ) + print( + f"Validation loss: {valid_epoch_loss:.3f},\ + validation acc: {valid_epoch_acc:.3f}" + ) + print('-'*50) + time.sleep(5) + + # Save the trained model weights. + save_model(epochs, model, optimizer, criterion) + # Save the loss and accuracy plots. + save_plots(train_acc, valid_acc, train_loss, valid_loss) + print('TRAINING COMPLETE') diff --git a/code/perception/src/traffic_sign_detection/src/utils.py b/code/perception/src/traffic_sign_detection/src/utils.py new file mode 100755 index 00000000..ac5ecc61 --- /dev/null +++ b/code/perception/src/traffic_sign_detection/src/utils.py @@ -0,0 +1,52 @@ +import torch +import matplotlib +import matplotlib.pyplot as plt + +matplotlib.style.use('ggplot') + + +def save_model(epochs, model, optimizer, criterion): + """ + Function to save the trained model to disk. + """ + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, "../models/model.pth") + + +def save_plots(train_acc, valid_acc, train_loss, valid_loss): + """ + Function to save the loss and accuracy plots to disk. + """ + # Accuracy plots. + plt.figure(figsize=(10, 7)) + plt.plot( + train_acc, color='green', linestyle='-', + label='train accuracy' + ) + plt.plot( + valid_acc, color='blue', linestyle='-', + label='validataion accuracy' + ) + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig("../outputs/accuracy.png") + + # Loss plots. + plt.figure(figsize=(10, 7)) + plt.plot( + train_loss, color='orange', linestyle='-', + label='train loss' + ) + plt.plot( + valid_loss, color='red', linestyle='-', + label='validataion loss' + ) + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig("../outputs/loss.png") diff --git a/code/requirements.txt b/code/requirements.txt index 5c78ad34..e25feee3 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -1,19 +1,17 @@ dvc==2.36.0 dvc-gdrive==2.19.1 +dvclive==1.3.2 +ruamel.yaml==0.17.21 +albumentations==1.3.0 torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-lightning==1.8.5 -albumentations==1.3.0 git+https://github.com/cocodataset/panopticapi.git git+https://github.com/facebookresearch/detectron2.git inplace-abn==1.1.0 efficientnet_pytorch==0.7.1 opencv-python==4.7.0.68 -dvclive==1.3.2 -torch==1.13.1 -torchvision==0.14.1 -ruamel.yaml==0.17.21 scipy==1.10.0 xmltodict==0.13.0 py-trees==2.1.6