ll7 · timdreier · Jan 24, 2023 · Jan 24, 2023 · Jan 24, 2023 · Jan 24, 2023
diff --git a/build/docker-compose.yml b/build/docker-compose.yml
@@ -69,8 +69,7 @@ services:
       # in ``build/docker/build/Dockerfile
       - ../:/workspace
       # mount git config for dvc
-      - ../.gitconfig:/home/carla/.gitconfig
-      - ../:/workspace/
+      - "~/.gitconfig:/home/carla/.gitconfig"
     networks:
       - carla
       - ros

diff --git a/build/docker/agent/Dockerfile b/build/docker/agent/Dockerfile
@@ -74,7 +74,9 @@ RUN apt-get update && apt-get install -y \
         ros-noetic-carla-msgs ros-noetic-pcl-conversions \
         ros-noetic-rviz ros-noetic-rqt ros-noetic-pcl-ros ros-noetic-rosbridge-suite ros-noetic-rosbridge-server \
         ros-noetic-robot-pose-ekf ros-noetic-ros-numpy \
-        ros-noetic-py-trees-ros ros-noetic-rqt-py-trees ros-noetic-rqt-reconfigure
+        ros-noetic-py-trees-ros ros-noetic-rqt-py-trees ros-noetic-rqt-reconfigure \
+        python3-tk
+
 
 SHELL ["/bin/bash", "-c"]
 

diff --git a/code/perception/src/traffic_sign_detection/.gitignore b/code/perception/src/traffic_sign_detection/.gitignore
@@ -0,0 +1,2 @@
+/models
+/data
diff --git a/code/perception/src/traffic_sign_detection/Readme.md b/code/perception/src/traffic_sign_detection/Readme.md
@@ -0,0 +1,26 @@
+# Traffic Sign Detection
+
+The training code is inspired by [this tutorial](https://debuggercafe.com/traffic-sign-recognition-using-pytorch-and-deep-learning/).
+
+## Run trainig
+
+Update training dataset (Read [DVC docs](../../../../doc/02_development/11_dvc.md) first):
+
+```shell
+cd code/perception/src/traffic_sign_detection/
+dvc pull
+```
+
+```shell
+b5 shell
+cd perception/src/traffic_sign_detection/
+python src/train.py
+```
+
+## Dataset
+
+Since there doesn't exist a large dataset for CARLA traffic signs
+the [German Traffic Sign Recognition Benchmark](https://benchmark.ini.rub.de/gtsrb_news.html)
+was used.
+
+Even if these are real images, they look pretty similar to the CARLA ones.
diff --git a/code/perception/src/traffic_sign_detection/data.dvc b/code/perception/src/traffic_sign_detection/data.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: 1eee983d5b361f353ec7d438da66c2a9.dir
+  size: 486648472
+  nfiles: 51885
+  path: data
diff --git a/code/perception/src/traffic_sign_detection/dvc.yaml b/code/perception/src/traffic_sign_detection/dvc.yaml
@@ -0,0 +1,18 @@
+stages:
+  train:
+    cmd: python src/train.py
+    deps:
+    - data
+    - src
+    params:
+    - params.yaml:
+    outs:
+    - models
+    metrics:
+    - dvclive/metrics.json:
+        cache: false
+    plots:
+    - dvclive/plots:
+        cache: false
+    - plots:
+        cache: false
diff --git a/code/perception/src/traffic_sign_detection/params.yaml b/code/perception/src/traffic_sign_detection/params.yaml
diff --git a/code/perception/src/traffic_sign_detection/src/cam.py b/code/perception/src/traffic_sign_detection/src/cam.py
@@ -0,0 +1,177 @@
+import numpy as np
+import cv2
+import torch
+import glob as glob
+import pandas as pd
+import os
+import albumentations as A
+import time
+
+from albumentations.pytorch import ToTensorV2
+from torch.nn import functional as F
+from torch import topk
+
+from model import build_model
+
+# Define computation device.
+device = ('cuda' if torch.cuda.is_available() else 'cpu')
+# Class names.
+sign_names_df = pd.read_csv('../data/signnames.csv')
+class_names = sign_names_df.SignName.tolist()
+
+# DataFrame for ground truth.
+gt_df = pd.read_csv(
+    '../data/test/GT-final_test.csv',
+    delimiter=';'
+)
+gt_df = gt_df.set_index('Filename', drop=True)
+
+# Initialize model, switch to eval model, load trained weights.
+model = build_model(
+    pretrained=False,
+    fine_tune=False,
+    num_classes=43
+).to(device)
+model = model.eval()
+model.load_state_dict(
+    torch.load(
+        '../models/model.pth', map_location=device
+    )['model_state_dict']
+)
+
+
+# https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py
+def returnCAM(feature_conv, weight_softmax, class_idx):
+    # Generate the class activation maps upsample to 256x256.
+    size_upsample = (256, 256)
+    bz, nc, h, w = feature_conv.shape
+    output_cam = []
+    for idx in class_idx:
+        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
+        cam = cam.reshape(h, w)
+        cam = cam - np.min(cam)
+        cam_img = cam / np.max(cam)
+        cam_img = np.uint8(255 * cam_img)
+        output_cam.append(cv2.resize(cam_img, size_upsample))
+    return output_cam
+
+
+def apply_color_map(CAMs, width, height, orig_image):
+    for i, cam in enumerate(CAMs):
+        heatmap = cv2.applyColorMap(
+            cv2.resize(cam, (width, height)),
+            cv2.COLORMAP_JET
+        )
+        result = heatmap * 0.5 + orig_image * 0.5
+        result = cv2.resize(result, (224, 224))
+        return result
+
+
+def visualize_and_save_map(
+    result, orig_image, gt_idx=None, class_idx=None, save_name=None
+):
+    # Put class label text on the result.
+    if class_idx is not None:
+        cv2.putText(
+            result,
+            f"Pred: {str(class_names[int(class_idx)])}", (5, 20),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2,
+            cv2.LINE_AA
+        )
+    if gt_idx is not None:
+        cv2.putText(
+            result,
+            f"GT: {str(class_names[int(gt_idx)])}", (5, 40),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2,
+            cv2.LINE_AA
+        )
+    orig_image = cv2.resize(orig_image, (224, 224))
+    img_concat = cv2.hconcat([
+        np.array(result, dtype=np.uint8),
+        np.array(orig_image, dtype=np.uint8)
+    ])
+    cv2.imshow('Result', img_concat)
+    cv2.waitKey(1)
+    if save_name is not None:
+        cv2.imwrite(f"../outputs/test_results/CAM_{save_name}.jpg", img_concat)
+
+
+# Hook the feature extractor.
+# https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py
+features_blobs = []
+
+
+def hook_feature(module, input, output):
+    features_blobs.append(output.data.cpu().numpy())
+
+
+model._modules.get('features').register_forward_hook(hook_feature)
+# Get the softmax weight.
+params = list(model.parameters())
+weight_softmax = np.squeeze(params[-4].data.cpu().numpy())
+
+# Define the transforms, resize => tensor => normalize.
+transform = A.Compose([
+    A.Resize(224, 224),
+    A.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    ),
+    ToTensorV2(),
+    ])
+
+counter = 0
+# Run for all the test images.
+all_images = glob.glob('../data/test/*.ppm')
+correct_count = 0
+frame_count = 0  # To count total frames.
+total_fps = 0  # To get the final frames per second.
+for i, image_path in enumerate(all_images):
+    # Read the image.
+    image = cv2.imread(image_path)
+    orig_image = image.copy()
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    height, width, _ = orig_image.shape
+    # Apply the image transforms.
+    image_tensor = transform(image=image)['image']
+    # Add batch dimension.
+    image_tensor = image_tensor.unsqueeze(0)
+    # Forward pass through model.
+    start_time = time.time()
+    outputs = model(image_tensor.to(device))
+    end_time = time.time()
+    # Get the softmax probabilities.
+    probs = F.softmax(outputs).data.squeeze()
+    # Get the class indices of top k probabilities.
+    class_idx = topk(probs, 1)[1].int()
+    # Get the ground truth.
+    image_name = image_path.split(os.path.sep)[-1]
+    gt_idx = gt_df.loc[image_name].ClassId
+    # Check whether correct prediction or not.
+    if gt_idx == class_idx:
+        correct_count += 1
+    # Generate class activation mapping for the top1 prediction.
+    CAMs = returnCAM(features_blobs[0], weight_softmax, class_idx)
+    # File name to save the resulting CAM image with.
+    save_name = f"{image_path.split('/')[-1].split('.')[0]}"
+    # Show and save the results.
+    result = apply_color_map(CAMs, width, height, orig_image)
+    visualize_and_save_map(result, orig_image, gt_idx, class_idx, save_name)
+    counter += 1
+    print(f"Image: {counter}")
+    # Get the current fps.
+    fps = 1 / (end_time - start_time)
+    # Add `fps` to `total_fps`.
+    total_fps += fps
+    # Increment frame count.
+    frame_count += 1
+
+print(f"Total number of test images: {len(all_images)}")
+print(f"Total correct predictions: {correct_count}")
+print(f"Accuracy: {correct_count/len(all_images)*100:.3f}")
+
+# Close all frames and video windows.
+cv2.destroyAllWindows()
+# calculate and print the average FPS
+avg_fps = total_fps / frame_count
+print(f"Average FPS: {avg_fps:.3f}")
diff --git a/code/perception/src/traffic_sign_detection/src/datasets.py b/code/perception/src/traffic_sign_detection/src/datasets.py
@@ -0,0 +1,97 @@
+import torch
+import albumentations as A
+import numpy as np
+
+from torchvision import datasets
+from torch.utils.data import DataLoader, Subset
+from albumentations.pytorch import ToTensorV2
+
+# Required constants.
+ROOT_DIR = 'data/train/'
+VALID_SPLIT = 0.1
+RESIZE_TO = 224  # Image size of resize when applying transforms.
+BATCH_SIZE = 32
+NUM_WORKERS = 4  # Number of parallel processes for data preparation.
+
+
+# Training transforms.
+class TrainTransforms:
+    def __init__(self, resize_to):
+        self.transforms = A.Compose([
+            A.Resize(resize_to, resize_to),
+            A.RandomBrightnessContrast(),
+            A.RandomFog(),
+            A.RandomRain(),
+            A.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+                ),
+            ToTensorV2()
+        ])
+
+    def __call__(self, img):
+        return self.transforms(image=np.array(img))['image']
+
+
+# Validation transforms.
+class ValidTransforms:
+    def __init__(self, resize_to):
+        self.transforms = A.Compose([
+            A.Resize(resize_to, resize_to),
+            A.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+                ),
+            ToTensorV2()
+        ])
+
+    def __call__(self, img):
+        return self.transforms(image=np.array(img))['image']
+
+
+def get_datasets():
+    """
+    Function to prepare the Datasets.
+
+    Returns the training and validation datasets along
+    with the class names.
+    """
+    dataset = datasets.ImageFolder(
+        ROOT_DIR,
+        transform=(TrainTransforms(RESIZE_TO))
+    )
+    dataset_test = datasets.ImageFolder(
+        ROOT_DIR,
+        transform=(ValidTransforms(RESIZE_TO))
+    )
+    dataset_size = len(dataset)
+
+    # Calculate the validation dataset size.
+    valid_size = int(VALID_SPLIT*dataset_size)
+    # Radomize the data indices.
+    indices = torch.randperm(len(dataset)).tolist()
+    # Training and validation sets.
+    dataset_train = Subset(dataset, indices[:-valid_size])
+    dataset_valid = Subset(dataset_test, indices[-valid_size:])
+
+    return dataset_train, dataset_valid, dataset.classes
+
+
+def get_data_loaders(dataset_train, dataset_valid):
+    """
+    Prepares the training and validation data loaders.
+
+    :param dataset_train: The training dataset.
+    :param dataset_valid: The validation dataset.
+
+    Returns the training and validation data loaders.
+    """
+    train_loader = DataLoader(
+        dataset_train, batch_size=BATCH_SIZE,
+        shuffle=True, num_workers=NUM_WORKERS
+    )
+    valid_loader = DataLoader(
+        dataset_valid, batch_size=BATCH_SIZE,
+        shuffle=False, num_workers=NUM_WORKERS
+    )
+    return train_loader, valid_loader
diff --git a/code/perception/src/traffic_sign_detection/src/model.py b/code/perception/src/traffic_sign_detection/src/model.py
@@ -0,0 +1,23 @@
+import torchvision.models as models
+import torch.nn as nn
+
+
+def build_model(pretrained=True, fine_tune=False, num_classes=10):
+    if pretrained:
+        print('[INFO]: Loading pre-trained weights')
+    else:
+        print('[INFO]: Not loading pre-trained weights')
+    model = models.mobilenet_v3_large(pretrained=pretrained)
+
+    if fine_tune:
+        print('[INFO]: Fine-tuning all layers...')
+        for params in model.parameters():
+            params.requires_grad = True
+    elif not fine_tune:
+        print('[INFO]: Freezing hidden layers...')
+        for params in model.parameters():
+            params.requires_grad = False
+
+    # Change the final classification head.
+    model.classifier[3] = nn.Linear(in_features=1280, out_features=num_classes)
+    return model