diff --git a/README.md b/README.md index 4f88acd..2cb4896 100644 --- a/README.md +++ b/README.md @@ -127,3 +127,14 @@ The HDR JSON metadata file may have the following fields: ``` + + +## BOUNDING BOX STUFF + +The current bounding box implementation in this branch does not integrate well with the segmentation side. The HDR must have blobs on the intersections in the following colours: + +- L: magenta [255, 0, 255] +- T: cyan [0, 255, 255] +- X: darker orange [255, 100, 0] + +The goal posts must be solid yellow [255, 255, 0] with just the posts and not the top bar or any other part of the goals. \ No newline at end of file diff --git a/pbr/config/scene_config.py b/pbr/config/scene_config.py index e781792..5c3192d 100644 --- a/pbr/config/scene_config.py +++ b/pbr/config/scene_config.py @@ -127,7 +127,7 @@ "mesh_types": [".fbx", ".obj"], "path": path.abspath(path.join(res_path, "balls")), "mask": {"index": 1, "colour": (1, 0, 0, 1)}, - "radius": 0.045, + "radius": 0.09, "standard_deviation": 0.005, # for randomising ball position, large value means more random, zero means no random }, "environment": { @@ -152,6 +152,32 @@ "mask": {"index": 4, "colour": (0, 1, 0, 1), "line_colour": (1, 1, 1, 1)}, }, "goal": {"mask": {"index": 2, "colour": (1, 1, 0, 1)}}, + "bounding_boxes": { + "enabled": True, + "output_format": "yolo", + "mask": {"index": 6, "colour": (0.5, 0.5, 0.5, 1)}, # Dummy mask config for compatibility + "classes": { + "ball": 0, + "goal_post": 1, + "robot": 2, + "L_intersection": 3, + "T_intersection": 4, + "X_intersection": 5 + }, + "min_bbox_size": 8, # minimum bounding box size in pixels + "max_bbox_size": 800, # maximum bounding box size in pixels + "intersection_base_sizes": { + "L": 0.15, # meters - base size for L intersection + "T": 0.20, # meters - base size for T intersection + "X": 0.25 # meters - base size for X intersection + }, + "goal_post_detection": { + "enabled": True, + "width": 0.12, # meters - actual goal post width + "min_height_ratio": 0.3, # minimum visible height ratio + "use_segmentation": True # extract from goal segmentation mask + } + }, } @@ -231,7 +257,7 @@ def configure_scene(): **random.choice( [ {"type": "EQUISOLID", "focal_length": 10.5, "fov": pi}, - {"type": "RECTILINEAR", "fov": 0.857}, + {"type": "RECTILINEAR", "fov": 1.6}, ] ), "stereo_camera_distance": 0.1, diff --git a/pbr/pbr.py b/pbr/pbr.py index 4cc7199..dc67b2f 100644 --- a/pbr/pbr.py +++ b/pbr/pbr.py @@ -366,8 +366,51 @@ def main(): os.path.join(out_cfg.depth_dir, filename) + ".exr", ) - # Check that the rotation matrix of the main camera is valid - print(f"Rotation matrix of {cam_l.obj.name}: \n", cam_l.obj.matrix_world) + ############################################## + ## BOUNDING BOX GENERATION ## + ############################################## + + annotations = [] + + # Ball annotations + ball_annotations = [util.write_annotations(ball.obj)] + annotations += [ann for ann in ball_annotations if ann is not None] + + # Goal annotations + # goal_annotations = [util.write_annotations(goal.obj, 1) for goal in goals] + # annotations += [ann for ann in goal_annotations if ann is not None] + + # Goal post annotations (from rendered segmentation mask) + rendered_mask_path = os.path.join(out_cfg.mask_dir, "{}.png".format(filename)) + goalpost_annotations = util.write_goal_post_annotations_from_mask( + rendered_mask_path, bpy.context.scene + ) + annotations += goalpost_annotations + + # Robot annotations (exclude the camera robot r0) + robot_annotations = [util.write_annotations(robot.obj, 2) for robot in robots[1:]] # Skip robots[0] which is the camera robot + annotations += [ann for ann in robot_annotations if ann is not None] + + # Misc robot annotations + misc_annotations = [util.write_annotations(misc_robot.obj, 2) for misc_robot in misc_robots] + annotations += [ann for ann in misc_annotations if ann is not None] + + # Intersection annotations (from rendered segmentation mask) + intersection_annotations = util.write_intersection_annotations_from_mask( + rendered_mask_path, bpy.context.scene + ) + annotations += intersection_annotations + + # Write YOLO format annotations + if annotations: + os.makedirs(out_cfg.output_dir + "/annotations", exist_ok=True) + annotation_file = os.path.join(out_cfg.output_dir + "/annotations", f"{filename}.txt") + with open(annotation_file, 'w') as f: + for ann in annotations: + f.write(f"{ann[0]} {ann[1]:.6f} {ann[2]:.6f} {ann[3]:.6f} {ann[4]:.6f}\n") + print(f"[INFO] Wrote {len(annotations)} annotations to {annotation_file}") + else: + print(f"[INFO] No annotations generated for frame {filename}") # Generate meta file with open( diff --git a/pbr/util.py b/pbr/util.py index f0afa13..2aa4b49 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -360,3 +360,506 @@ def find_forward_vector(obj): forward.normalize() # Normalize the forward vector after setting Z to 0 return forward + +def get_robot_bounding_box(robot_obj, cam, scene): + """Calculates 2D bounding box for robot objects with all their parts""" + import bpy_extras + + # Extract robot number from the object name (e.g., "r6_Torso" -> "r6") + robot_prefix = robot_obj.name.split('_')[0] # e.g., "r6" + + # Find all objects that belong to this robot + robot_parts = [] + for obj in bpy.data.objects: + if obj.name.startswith(robot_prefix + '_'): + robot_parts.append(obj) + + print(f"Found {len(robot_parts)} parts for robot {robot_prefix}") + + # Collect all bounding box corners from all robot parts + all_corners = [] + + for part in robot_parts: + # Get the 8 corners of each part's bounding box in world coordinates + for corner in part.bound_box: + world_corner = part.matrix_world @ Vector(corner) + # Project to camera view + camera_corner = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_corner) + if camera_corner.z > 0: # Only use points in front of camera + all_corners.append(camera_corner) + + if not all_corners: + print(f"No valid corners found for robot {robot_prefix}") + return None + + # Find the overall min/max bounds + min_x = min(corner.x for corner in all_corners) + max_x = max(corner.x for corner in all_corners) + min_y = min(corner.y for corner in all_corners) + max_y = max(corner.y for corner in all_corners) + + # Convert to pixels + min_x *= scene.render.resolution_x + max_x *= scene.render.resolution_x + min_y *= scene.render.resolution_y + max_y *= scene.render.resolution_y + + print(f"Robot {robot_prefix} combined bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + +def get_robot_bounding_box_panoramic(obj, h, w, lens, cam, scene): + + # Extract robot number from the object name (e.g., "r6_Torso" -> "r6") + robot_prefix = obj.name.split('_')[0] # e.g., "r6" + + # Find all objects that belong to this robot + robot_parts = [] + for obj in bpy.data.objects: + if obj.name.startswith(robot_prefix + '_'): + robot_parts.append(obj) + + bbox_corners = [] + screen_positions = [] + + for part in robot_parts: + + part_center = (cam.matrix_world.inverted() @ part.matrix_world @ Vector(part.location)) + part_center.normalize() + if part_center.z > 0: + continue + + for corner in part.bound_box: + + bbox_corner = (cam.matrix_world.inverted() @ part.matrix_world @ Vector(corner)) + bbox_corner.normalize() + + phi = math.atan2(bbox_corner.y, bbox_corner.x) + l = (bbox_corner.x**2 + bbox_corner.y**2)**(1/2) + l = np.clip(l, -0.999, 0.999) + theta = math.asin(l) + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + bbox_corners.append(bbox_corner) + screen_positions.append(Vector((x, y))) + + if not bbox_corners: + print("no valid corners for robot " + obj.name) + return None + + min_x = min(screen_pos.x for screen_pos in screen_positions) + max_x = max(screen_pos.x for screen_pos in screen_positions) + min_y = min(screen_pos.y for screen_pos in screen_positions) + max_y = max(screen_pos.y for screen_pos in screen_positions) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + + +def get_bounding_box(obj): + """Calculates 2D bounding box for YOLO format""" + import bpy_extras + cam = bpy.context.scene.camera + scene = bpy.context.scene + + # Special handling for ball objects (spheres) + if obj.name == "Ball": + return get_sphere_bounding_box(obj, cam, scene) + + # Special handling for robot objects - check if this looks like a robot part + # Robot parts follow pattern "r_" (e.g., "r6_Torso") + if '_' in obj.name and obj.name.split('_')[0].startswith('r') and obj.name.split('_')[0][1:].isdigit(): + return get_robot_bounding_box(obj, cam, scene) + + # Default bounding box calculation for other objects + bbox_corners = [bpy_extras.object_utils.world_to_camera_view(scene, cam, obj.matrix_world @ Vector(corner)) for corner in obj.bound_box] + + # Check if any corners are behind the camera + valid_corners = [corner for corner in bbox_corners if corner.z > 0] + if not valid_corners: + print(f"All corners of {obj.name} are behind camera") + return None + + min_x = min(corner.x for corner in valid_corners) + max_x = max(corner.x for corner in valid_corners) + min_y = min(corner.y for corner in valid_corners) + max_y = max(corner.y for corner in valid_corners) + + # Convert to pixels + min_x *= scene.render.resolution_x + max_x *= scene.render.resolution_x + min_y *= scene.render.resolution_y + max_y *= scene.render.resolution_y + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + +def get_bounding_box_panoramic(obj): + import bpy_extras + + cam = bpy.context.scene.camera + scene = bpy.context.scene + + lens = cam.data.cycles.fisheye_lens + + aspect_ratio = bpy.context.scene.render.resolution_x / bpy.context.scene.render.resolution_y + if cam.data.sensor_fit == 'VERTICAL': + h = cam.data.sensor_height + w = aspect_ratio * h + else: + w = cam.data.sensor_width + h = w / aspect_ratio + + # Special handling for ball objects (spheres) + if obj.name == "Ball": + return get_sphere_bounding_box_panoramic(obj, h, w, lens, cam, scene) + + # Special handling for robot objects - check if this looks like a robot part + # Robot parts follow pattern "r_" (e.g., "r6_Torso") + if '_' in obj.name and obj.name.split('_')[0].startswith('r') and obj.name.split('_')[0][1:].isdigit(): + return get_robot_bounding_box_panoramic(obj, h, w, lens, cam, scene) + + bbox_corners = [] + screen_positions = [] + + for corner in obj.bound_box: + + bbox_corner = (cam.matrix_world.inverted() @ corner.matrix_world @ Vector(corner)) + bbox_corner.normalize() + + if bbox_corner.z > 0: + continue + + phi = math.atan2(bbox_corner.y, bbox_corner.x) + l = (bbox_corner.x**2 + bbox_corner.y**2)**(1/2) + theta = math.asin(l) + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + bbox_corners.append(bbox_corner) + screen_positions.append(Vector((x, y))) + + if not bbox_corners: + print("no valid corners for " + obj.name) + return None + + min_x = min(screen_pos.x for screen_pos in screen_positions) + max_x = max(screen_pos.x for screen_pos in screen_positions) + min_y = min(screen_pos.y for screen_pos in screen_positions) + max_y = max(screen_pos.y for screen_pos in screen_positions) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + +def get_sphere_bounding_box(obj, cam, scene): + """Calculates accurate 2D bounding box for spherical objects""" + import bpy_extras + + # Check camera type - equisolid cameras need different handling + camera_type = getattr(cam.data, 'type', 'PERSP') + + # Get the sphere center in world coordinates + world_center = obj.matrix_world.translation + radius = max(obj.dimensions) / 2.0 + + # For now, disable equisolid handling and use perspective projection for all cameras + # This ensures consistent, reliable bounding boxes + # TODO: Re-enable equisolid handling once perspective projection is perfected + + # Regular perspective camera handling for all camera types + # Project sphere center to camera view + center_2d = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_center) + + # Check if sphere center is behind camera + if center_2d.z <= 0: + print(f"Ball behind camera, z={center_2d.z}") + return None + + # Convert to pixel coordinates (same system as regular bbox function) + center_x_pixels = center_2d.x * scene.render.resolution_x + center_y_pixels = center_2d.y * scene.render.resolution_y + + # Calculate distance from camera to ball + camera_pos = cam.matrix_world.translation + distance = (world_center - camera_pos).length + + # Check if ball is occluded + cam_to_ball = world_center - camera_pos + ray_hit = scene.ray_cast(bpy.context.evaluated_depsgraph_get(), cam.matrix_world.translation + cam_to_ball * 0.20, cam_to_ball, distance=10) + + if ray_hit[4] != obj: + return None + + # Simple perspective projection for radius + # Use camera focal length to calculate apparent size + focal_length = cam.data.lens # in mm + sensor_width = cam.data.sensor_width # in mm + + # Calculate apparent size in pixels + # apparent_size = (object_size / distance) * focal_length * (image_width / sensor_width) + apparent_diameter = (radius * 2.0 / distance) * focal_length * (scene.render.resolution_x / sensor_width) + radius_pixels = apparent_diameter / 2.0 + + # Calculate bounding box + min_x = center_x_pixels - radius_pixels + max_x = center_x_pixels + radius_pixels + min_y = center_y_pixels - radius_pixels + max_y = center_y_pixels + radius_pixels + + print(f"Ball bbox: center=({center_2d.x:.3f}, {center_2d.y:.3f}), radius={radius:.3f}") + print(f"Ball bbox: distance={distance:.1f}m, apparent_diameter={apparent_diameter:.1f}px") + print(f"Ball bbox: center_pixels=({center_x_pixels:.1f}, {center_y_pixels:.1f}), radius_pixels={radius_pixels:.1f}") + print(f"Ball bbox pixels: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + + return (min_x, min_y, max_x, max_y) + +def get_sphere_bounding_box_panoramic(obj, h, w, lens, cam, scene): + + radius = max(obj.dimensions) / 2.0 + + center = (cam.matrix_world.inverted() @ Vector(obj.location)) + center.normalize() + + print(center.z) + if center.z > 0: + print("Ball " + obj.name + " behind camera") + return None + + phi = math.atan2(center.y, center.x) + l = (center.x**2 + center.y**2)**(1/2) + theta = math.asin(l) + + world_center = obj.matrix_world.translation + camera_pos = cam.matrix_world.translation + distance = (world_center - camera_pos).length + + # Check if ball is occluded + cam_to_ball = world_center - camera_pos + ray_hit = scene.ray_cast(bpy.context.evaluated_depsgraph_get(), cam.matrix_world.translation + cam_to_ball * 0.20, cam_to_ball, distance=10) + + if ray_hit[4] != obj: + return None + + apparent_diameter = (radius * 2.0 / distance) * lens * (scene.render.resolution_x / w) + radius_pixels = apparent_diameter / 2.0 + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + min_x = x - (radius_pixels) + max_x = x + (radius_pixels) + min_y = y - (radius_pixels) + max_y = y + (radius_pixels) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + +def write_annotations(obj, class_id=0): + """Writes YOLO annotations for the object""" + scene = bpy.context.scene + + cam = bpy.context.scene.camera + print(cam.data.type) + if cam.data.type == "PERSP": + bbox_result = get_bounding_box(obj) + else: + bbox_result = get_bounding_box_panoramic(obj) + + # Check if bounding box calculation failed + if bbox_result is None: + print(f"Failed to calculate bounding box for {obj.name}") + return None + + min_x, min_y, max_x, max_y = bbox_result + + # Clamp bounding box to image bounds + min_x = max(0, min_x) + min_y = max(0, min_y) + max_x = min(scene.render.resolution_x, max_x) + max_y = min(scene.render.resolution_y, max_y) + + # Check if there's any visible area after clamping + if min_x >= max_x or min_y >= max_y: + print(f"No visible area for {obj.name} after clamping") + return None + + # Calculate center and dimensions + x_center = (min_x + max_x) / 2 + # Use consistent Y-flip for all objects + y_center = scene.render.resolution_y - (min_y + max_y) / 2 + + width = max_x - min_x + height = max_y - min_y + + # Normalize coordinates + x_center /= scene.render.resolution_x + y_center /= scene.render.resolution_y + width /= scene.render.resolution_x + height /= scene.render.resolution_y + + # Final bounds check on normalized coordinates + if x_center < 0 or x_center > 1 or y_center < 0 or y_center > 1: + print(f"Center out of bounds for {obj.name}: ({x_center:.3f}, {y_center:.3f})") + return None + + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if (width * scene.render.resolution_x < min_size_pixels or + height * scene.render.resolution_y < min_size_pixels): + print(f"Bounding box too small for {obj.name}: {width * scene.render.resolution_x:.1f} x {height * scene.render.resolution_y:.1f}") + return None + + print(f"{obj.name} {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") + return class_id, x_center, y_center, width, height + +def write_goal_post_annotations_from_mask(mask_path, scene): + """Generate goal post annotations from segmentation mask""" + import cv2 + import numpy as np + + try: + mask_img = cv2.imread(mask_path) + except: + print(f"Cannot load mask image {mask_path}") + return [] + + if mask_img is None: + print(f"Failed to read mask image {mask_path}") + return [] + + annotations = [] + + # Goal posts should be yellow in the segmentation mask + # Convert BGR to RGB and look for yellow pixels + mask_rgb = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB) + + # Define yellow color range (goal posts) + # Yellow in RGB is approximately (255, 255, 0) + yellow_lower = np.array([250, 250, 0]) + yellow_upper = np.array([255, 255, 10]) + + # Create mask for yellow pixels (goal posts) + yellow_mask = cv2.inRange(mask_rgb, yellow_lower, yellow_upper) + + # Find contours in the yellow mask + contours, _ = cv2.findContours(yellow_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + goalpost_class_id = 1 # Goal posts have class 1 + + for contour in contours: + # Calculate bounding box for each goal post contour + x, y, w, h = cv2.boundingRect(contour) + + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if w < min_size_pixels or h < min_size_pixels: + print(f"Goal post contour too small: {w}x{h}") + continue + + # Convert to YOLO format (normalized center coordinates and dimensions) + img_height, img_width = mask_img.shape[:2] + + center_x = (x + w/2) / img_width + center_y = (y + h/2) / img_height + width_norm = w / img_width + height_norm = h / img_height + + # Ensure coordinates are within bounds + if 0 <= center_x <= 1 and 0 <= center_y <= 1: + print(f"Goal post from mask: {goalpost_class_id} {center_x:.6f} {center_y:.6f} {width_norm:.6f} {height_norm:.6f}") + annotations.append((goalpost_class_id, center_x, center_y, width_norm, height_norm)) + + print(f"Generated {len(annotations)} goal post annotations from mask") + return annotations + +def write_intersection_annotations_from_mask(mask_path, scene): + """Generate intersection annotations from segmentation mask""" + import cv2 + import numpy as np + + try: + mask_img = cv2.imread(mask_path) + except: + print(f"Cannot load mask image {mask_path}") + return [] + + if mask_img is None: + print(f"Failed to read mask image {mask_path}") + return [] + + annotations = [] + + # Convert BGR to RGB for color detection + mask_rgb = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB) + + # Define color ranges and class IDs for different intersection types + intersection_types = { + "L": { + "class_id": 3, + "color_lower": np.array([250, 0, 250]), # Magenta lower bound + "color_upper": np.array([255, 10, 255]) # Magenta upper bound + }, + "T": { + "class_id": 4, + "color_lower": np.array([0, 250, 250]), # Cyan lower bound + "color_upper": np.array([10, 255, 255]) # Cyan upper bound + }, + "X": { + "class_id": 5, + "color_lower": np.array([250, 90, 0]), # Orange lower bound + "color_upper": np.array([255, 110, 0]) # Orange upper bound + } + } + + for intersection_type, config in intersection_types.items(): + # Create mask for this intersection type's color + color_mask = cv2.inRange(mask_rgb, config["color_lower"], config["color_upper"]) + + # Find contours in the color mask + contours, _ = cv2.findContours(color_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours: + # Calculate bounding box for each intersection contour + x, y, w, h = cv2.boundingRect(contour) + + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if w < min_size_pixels or h < min_size_pixels: + print(f"{intersection_type}-intersection contour too small: {w}x{h}") + continue + + # Convert to YOLO format (normalized center coordinates and dimensions) + img_height, img_width = mask_img.shape[:2] + + center_x = (x + w/2) / img_width + center_y = (y + h/2) / img_height + width_norm = w / img_width + height_norm = h / img_height + + # Ensure coordinates are within bounds + if 0 <= center_x <= 1 and 0 <= center_y <= 1: + print(f"{intersection_type}-intersection from mask: {config['class_id']} {center_x:.6f} {center_y:.6f} {width_norm:.6f} {height_norm:.6f}") + annotations.append((config["class_id"], center_x, center_y, width_norm, height_norm)) + + print(f"Generated {len(annotations)} intersection annotations from mask") + return annotations diff --git a/scripts/image_curator.py b/scripts/image_curator.py new file mode 100755 index 0000000..e28e269 --- /dev/null +++ b/scripts/image_curator.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +""" +Image Curator Tool for NUpbr Dataset + +This tool allows you to review generated images with their annotations, +visualize bounding boxes, and accept/reject images for your final dataset. + +Usage: + python image_curator.py --input /path/to/run_X --output /path/to/curated_dataset + +Controls: + - Left/Right Arrow Keys: Navigate between images + - 'a' or Space: Accept current image + - 'r' or Delete: Reject current image + - 's': Skip current image (no decision) + - 'q' or Escape: Quit + - 't': Toggle bounding box visibility + - 'f': Toggle fullscreen +""" + +import argparse +import os +import shutil +import json +from pathlib import Path +import tkinter as tk +from tkinter import ttk, messagebox, filedialog +from PIL import Image, ImageTk, ImageDraw, ImageFont +import cv2 +import numpy as np + + +class ImageCurator: + def __init__(self, input_dir, output_dir): + self.input_dir = Path(input_dir) + self.output_dir = Path(output_dir) + self.current_index = 0 + self.show_boxes = True + + # Class names for YOLO format + self.class_names = { + 0: "Ball", + 1: "Goal_post", + 2: "Robot", + 3: "L_intersection", + 4: "T_intersection", + 5: "X_intersection" + } + + # Colors for bounding boxes (BGR format for OpenCV) + self.class_colors = { + 0: (0, 255, 0), # Ball - Green + 1: (0, 255, 255), # Goal_post - Yellow + 2: (255, 0, 0), # Robot - Blue + 3: (255, 0, 255), # L_intersection - Magenta + 4: (255, 255, 0), # T_intersection - Cyan + 5: (0, 165, 255) # X_intersection - Orange + } + + # Initialize paths + self.raw_dir = self.input_dir / "raw" + self.seg_dir = self.input_dir / "seg" + self.meta_dir = self.input_dir / "meta" + self.annotations_dir = self.input_dir / "annotations" + + # Output paths + self.output_raw_dir = self.output_dir / "raw" + self.output_seg_dir = self.output_dir / "seg" + self.output_meta_dir = self.output_dir / "meta" + self.output_annotations_dir = self.output_dir / "annotations" + + # Create output directories + for dir_path in [self.output_raw_dir, self.output_seg_dir, self.output_meta_dir, self.output_annotations_dir]: + dir_path.mkdir(parents=True, exist_ok=True) + + # Load image list + self.load_images() + + # Statistics + self.stats = { + 'total': len(self.images), + 'accepted': 0, + 'rejected': 0, + 'reviewed': 0 + } + + # Load existing decisions + self.decisions_file = self.output_dir / "curation_decisions.json" + self.decisions = self.load_decisions() + + # Setup GUI + self.setup_gui() + + def load_images(self): + """Load list of available images""" + if not self.raw_dir.exists(): + raise ValueError(f"Raw images directory not found: {self.raw_dir}") + + self.images = [] + for img_file in sorted(self.raw_dir.glob("*.png")): + # Check if annotation file exists + ann_file = self.annotations_dir / f"{img_file.stem}.txt" + if ann_file.exists(): + self.images.append(img_file.stem) + + print(f"Found {len(self.images)} images with annotations") + + def load_decisions(self): + """Load previous curation decisions""" + if self.decisions_file.exists(): + with open(self.decisions_file, 'r') as f: + decisions = json.load(f) + # Count existing decisions + for decision in decisions.values(): + if decision == 'accepted': + self.stats['accepted'] += 1 + elif decision == 'rejected': + self.stats['rejected'] += 1 + self.stats['reviewed'] = self.stats['accepted'] + self.stats['rejected'] + return decisions + return {} + + def save_decisions(self): + """Save curation decisions""" + with open(self.decisions_file, 'w') as f: + json.dump(self.decisions, f, indent=2) + + def setup_gui(self): + """Setup the GUI""" + self.root = tk.Tk() + self.root.title("NUpbr Image Curator") + self.root.geometry("1200x900") + + # Main frame + main_frame = ttk.Frame(self.root) + main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) + + # Control panel + control_frame = ttk.Frame(main_frame) + control_frame.pack(fill=tk.X, pady=(0, 10)) + + # Image info + self.info_label = ttk.Label(control_frame, text="", font=("Arial", 12)) + self.info_label.pack(side=tk.LEFT) + + # Statistics + self.stats_label = ttk.Label(control_frame, text="", font=("Arial", 10)) + self.stats_label.pack(side=tk.RIGHT) + + # Navigation frame + nav_frame = ttk.Frame(main_frame) + nav_frame.pack(fill=tk.X, pady=(0, 10)) + + ttk.Button(nav_frame, text="← Previous", command=self.prev_image).pack(side=tk.LEFT) + ttk.Button(nav_frame, text="Next →", command=self.next_image).pack(side=tk.LEFT, padx=(5, 0)) + + # Decision buttons + decision_frame = ttk.Frame(nav_frame) + decision_frame.pack(side=tk.RIGHT) + + ttk.Button(decision_frame, text="✓ Accept (A)", command=self.accept_image, + style="Accept.TButton").pack(side=tk.LEFT, padx=(0, 5)) + ttk.Button(decision_frame, text="✗ Reject (R)", command=self.reject_image, + style="Reject.TButton").pack(side=tk.LEFT, padx=(0, 5)) + ttk.Button(decision_frame, text="Skip (S)", command=self.skip_image).pack(side=tk.LEFT) + + # Toggle controls + toggle_frame = ttk.Frame(nav_frame) + toggle_frame.pack() + + ttk.Button(toggle_frame, text="Toggle Boxes (T)", command=self.toggle_boxes).pack(side=tk.LEFT, padx=(0, 5)) + + # Image display frame + self.image_frame = ttk.Frame(main_frame) + self.image_frame.pack(fill=tk.BOTH, expand=True) + + # Canvas for image display + self.canvas = tk.Canvas(self.image_frame, bg='black') + self.canvas.pack(fill=tk.BOTH, expand=True) + + # Setup key bindings + self.root.bind('', self.on_key_press) + self.root.focus_set() + + # Configure styles + style = ttk.Style() + style.configure("Accept.TButton", foreground="green") + style.configure("Reject.TButton", foreground="red") + + # Load first image + if self.images: + self.show_current_image() + + def load_annotations(self, image_name): + """Load YOLO format annotations for an image""" + ann_file = self.annotations_dir / f"{image_name}.txt" + annotations = [] + + if ann_file.exists(): + with open(ann_file, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) == 5: + class_id = int(parts[0]) + x_center = float(parts[1]) + y_center = float(parts[2]) + width = float(parts[3]) + height = float(parts[4]) + annotations.append((class_id, x_center, y_center, width, height)) + + return annotations + + def draw_bounding_boxes(self, image, annotations, img_width, img_height): + """Draw bounding boxes on image""" + if not self.show_boxes or not annotations: + return image + + # Convert PIL image to OpenCV format + cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) + + for class_id, x_center, y_center, width, height in annotations: + # Convert YOLO format to pixel coordinates + x1 = int((x_center - width/2) * img_width) + y1 = int((y_center - height/2) * img_height) + x2 = int((x_center + width/2) * img_width) + y2 = int((y_center + height/2) * img_height) + + # Get color and class name + color = self.class_colors.get(class_id, (255, 255, 255)) + class_name = self.class_names.get(class_id, f"Class_{class_id}") + + # Draw bounding box + cv2.rectangle(cv_image, (x1, y1), (x2, y2), color, 2) + + # Draw label background + label = f"{class_name}" + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(cv_image, (x1, y1 - label_height - 10), (x1 + label_width, y1), color, -1) + + # Draw label text + cv2.putText(cv_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + + # Convert back to PIL format + return Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)) + + def show_current_image(self): + """Display the current image with annotations""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Load image + img_path = self.raw_dir / f"{image_name}.png" + if not img_path.exists(): + self.info_label.config(text=f"Image not found: {img_path}") + return + + image = Image.open(img_path) + original_width, original_height = image.size + + # Load annotations + annotations = self.load_annotations(image_name) + + # Draw bounding boxes + image_with_boxes = self.draw_bounding_boxes(image, annotations, original_width, original_height) + + # Resize image to fit canvas + canvas_width = self.canvas.winfo_width() + canvas_height = self.canvas.winfo_height() + + if canvas_width > 1 and canvas_height > 1: # Avoid division by zero + # Calculate scaling factor to fit image in canvas while maintaining aspect ratio + scale_x = canvas_width / original_width + scale_y = canvas_height / original_height + scale = min(scale_x, scale_y, 1.0) # Don't upscale + + new_width = int(original_width * scale) + new_height = int(original_height * scale) + + image_with_boxes = image_with_boxes.resize((new_width, new_height), Image.Resampling.LANCZOS) + + # Display image + self.photo = ImageTk.PhotoImage(image_with_boxes) + self.canvas.delete("all") + self.canvas.create_image(canvas_width//2, canvas_height//2, image=self.photo) + + # Update info + decision = self.decisions.get(image_name, "pending") + decision_text = f" [{decision.upper()}]" if decision != "pending" else "" + self.info_label.config(text=f"Image {self.current_index + 1}/{len(self.images)}: {image_name}{decision_text} | " + f"Annotations: {len(annotations)}") + + # Update statistics + self.update_stats_display() + + def update_stats_display(self): + """Update the statistics display""" + progress = (self.stats['reviewed'] / self.stats['total']) * 100 if self.stats['total'] > 0 else 0 + self.stats_label.config(text=f"Progress: {progress:.1f}% | " + f"Accepted: {self.stats['accepted']} | " + f"Rejected: {self.stats['rejected']} | " + f"Reviewed: {self.stats['reviewed']}/{self.stats['total']}") + + def accept_image(self): + """Accept the current image and copy files""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Update decision + if image_name not in self.decisions or self.decisions[image_name] != 'accepted': + if image_name in self.decisions and self.decisions[image_name] == 'rejected': + self.stats['rejected'] -= 1 + elif image_name not in self.decisions: + self.stats['reviewed'] += 1 + + self.decisions[image_name] = 'accepted' + self.stats['accepted'] += 1 + + # Copy files + self.copy_image_files(image_name) + + self.save_decisions() + self.next_image() + + def reject_image(self): + """Reject the current image""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Update decision + if image_name not in self.decisions or self.decisions[image_name] != 'rejected': + if image_name in self.decisions and self.decisions[image_name] == 'accepted': + self.stats['accepted'] -= 1 + # Remove files from output if they exist + self.remove_image_files(image_name) + elif image_name not in self.decisions: + self.stats['reviewed'] += 1 + + self.decisions[image_name] = 'rejected' + self.stats['rejected'] += 1 + + self.save_decisions() + self.next_image() + + def skip_image(self): + """Skip the current image without decision""" + self.next_image() + + def copy_image_files(self, image_name): + """Copy image files to output directory""" + files_to_copy = [ + (self.raw_dir / f"{image_name}.png", self.output_raw_dir / f"{image_name}.png"), + (self.annotations_dir / f"{image_name}.txt", self.output_annotations_dir / f"{image_name}.txt"), + ] + + # Optional files + seg_file = self.seg_dir / f"{image_name}.png" + if seg_file.exists(): + files_to_copy.append((seg_file, self.output_seg_dir / f"{image_name}.png")) + + meta_file = self.meta_dir / f"{image_name}.yaml" + if meta_file.exists(): + files_to_copy.append((meta_file, self.output_meta_dir / f"{image_name}.yaml")) + + for src, dst in files_to_copy: + if src.exists(): + shutil.copy2(src, dst) + + def remove_image_files(self, image_name): + """Remove image files from output directory""" + files_to_remove = [ + self.output_raw_dir / f"{image_name}.png", + self.output_annotations_dir / f"{image_name}.txt", + self.output_seg_dir / f"{image_name}.png", + self.output_meta_dir / f"{image_name}.yaml" + ] + + for file_path in files_to_remove: + if file_path.exists(): + file_path.unlink() + + def toggle_boxes(self): + """Toggle bounding box visibility""" + self.show_boxes = not self.show_boxes + self.show_current_image() + + def prev_image(self): + """Go to previous image""" + if self.images and self.current_index > 0: + self.current_index -= 1 + self.show_current_image() + + def next_image(self): + """Go to next image""" + if self.images and self.current_index < len(self.images) - 1: + self.current_index += 1 + self.show_current_image() + + def on_key_press(self, event): + """Handle key press events""" + key = event.keysym.lower() + + if key in ['left', 'up']: + self.prev_image() + elif key in ['right', 'down']: + self.next_image() + elif key in ['a', 'space']: + self.accept_image() + elif key in ['r', 'delete']: + self.reject_image() + elif key == 's': + self.skip_image() + elif key == 't': + self.toggle_boxes() + elif key in ['q', 'escape']: + self.quit() + + def quit(self): + """Quit the application""" + self.save_decisions() + self.root.quit() + + def run(self): + """Run the curator""" + try: + self.root.mainloop() + except KeyboardInterrupt: + self.quit() + + +def main(): + parser = argparse.ArgumentParser(description="Image Curator for NUpbr Dataset") + parser.add_argument("--input", "-i", required=True, help="Input directory (e.g., outputs/run_6)") + parser.add_argument("--output", "-o", required=True, help="Output directory for curated dataset") + + args = parser.parse_args() + + input_dir = Path(args.input) + output_dir = Path(args.output) + + if not input_dir.exists(): + print(f"Error: Input directory does not exist: {input_dir}") + return + + if not (input_dir / "raw").exists(): + print(f"Error: Raw images directory not found: {input_dir / 'raw'}") + return + + if not (input_dir / "annotations").exists(): + print(f"Error: Annotations directory not found: {input_dir / 'annotations'}") + return + + print(f"Input directory: {input_dir}") + print(f"Output directory: {output_dir}") + + curator = ImageCurator(input_dir, output_dir) + curator.run() + + +if __name__ == "__main__": + main() diff --git a/scripts/visualise_bboxes.py b/scripts/visualise_bboxes.py new file mode 100755 index 0000000..47519f6 --- /dev/null +++ b/scripts/visualise_bboxes.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +""" +Interactive Image Viewer with Bounding Box Annotations + +This tool displays images from a folder with their corresponding bounding box annotations. +Supports YOLO format annotations and provides interactive navigation. + +Usage: + python visualise_bboxes.py --images --annotations [--config ] + +Controls: + - Right Arrow / Space: Next image + - Left Arrow: Previous image + - Q / Escape: Quit + - S: Save current image with annotations + - H: Show/hide help overlay +""" + +import argparse +import os +import sys +import cv2 +import glob +import yaml +from pathlib import Path + +class BBoxVisualiser: + def __init__(self, image_folder, annotation_folder, config_file=None): + self.image_folder = Path(image_folder) + self.annotation_folder = Path(annotation_folder) + self.config_file = config_file + self.current_index = 0 + self.show_help = False + + # Default class names and colors + self.class_names = { + 0: "ball", + 1: "goal_post", + 2: "robot", + 3: "L_intersection", + 4: "T_intersection", + 5: "X_intersection" + } + + # Color palette for different classes (BGR format) + self.colors = [ + (0, 255, 0), # Green for ball + (255, 0, 0), # Blue for goal_post + (0, 0, 255), # Red for robot + (255, 255, 0), # Cyan for L_intersection + (255, 0, 255), # Magenta for T_intersection + (0, 255, 255), # Yellow for X_intersection + ] + + # Load configuration if provided + if config_file and os.path.exists(config_file): + self.load_config(config_file) + + # Get list of images + self.image_files = self.get_image_files() + if not self.image_files: + raise ValueError(f"No images found in {self.image_folder}") + + print(f"Found {len(self.image_files)} images") + print("Controls: Arrow keys to navigate, 'q' to quit, 's' to save, 'h' for help") + + def load_config(self, config_file): + """Load class names from YAML config file""" + try: + with open(config_file, 'r') as f: + config = yaml.safe_load(f) + if 'names' in config: + self.class_names = config['names'] + print(f"Loaded {len(self.class_names)} classes from config") + except Exception as e: + print(f"Warning: Could not load config file {config_file}: {e}") + + def get_image_files(self): + """Get sorted list of image files""" + extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tiff'] + image_files = [] + for ext in extensions: + image_files.extend(glob.glob(str(self.image_folder / ext))) + return sorted(image_files) + + def load_annotations(self, image_path): + """Load bounding box annotations for an image""" + image_name = Path(image_path).stem + annotation_file = self.annotation_folder / f"{image_name}.txt" + + annotations = [] + if annotation_file.exists(): + try: + with open(annotation_file, 'r') as f: + for line in f: + line = line.strip() + if line: + parts = line.split() + if len(parts) >= 5: + class_id = int(parts[0]) + center_x = float(parts[1]) + center_y = float(parts[2]) + width = float(parts[3]) + height = float(parts[4]) + annotations.append({ + 'class_id': class_id, + 'center_x': center_x, + 'center_y': center_y, + 'width': width, + 'height': height + }) + except Exception as e: + print(f"Error loading annotations for {image_name}: {e}") + + return annotations + + def yolo_to_pixel_coords(self, annotation, img_width, img_height): + """Convert YOLO normalised coordinates to pixel coordinates""" + center_x = annotation['center_x'] * img_width + center_y = annotation['center_y'] * img_height + width = annotation['width'] * img_width + height = annotation['height'] * img_height + + x1 = int(center_x - width / 2) + y1 = int(center_y - height / 2) + x2 = int(center_x + width / 2) + y2 = int(center_y + height / 2) + + return x1, y1, x2, y2 + + def draw_bounding_boxes(self, image, annotations): + """Draw bounding boxes and labels on the image""" + img_height, img_width = image.shape[:2] + + for annotation in annotations: + class_id = annotation['class_id'] + x1, y1, x2, y2 = self.yolo_to_pixel_coords(annotation, img_width, img_height) + + # Get color for this class + color = self.colors[class_id % len(self.colors)] + + # Draw bounding box + cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) + + # Draw class label + class_name = self.class_names.get(class_id, f"class_{class_id}") + label = f"{class_name}" + + # Calculate label size and position + (label_width, label_height), baseline = cv2.getTextSize( + label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 + ) + + # Draw label background + cv2.rectangle( + image, + (x1, y1 - label_height - baseline - 5), + (x1 + label_width + 5, y1), + color, + -1 + ) + + # Draw label text + cv2.putText( + image, + label, + (x1 + 2, y1 - baseline - 2), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + + return image + + def draw_help_overlay(self, image): + """Draw help text overlay""" + help_text = [ + "Controls:", + "Right Arrow / Space: Next image", + "Left Arrow: Previous image", + "S: Save current image", + "H: Toggle this help", + "Q / Escape: Quit" + ] + + # Semi-transparent overlay + overlay = image.copy() + cv2.rectangle(overlay, (10, 10), (400, 180), (0, 0, 0), -1) + image = cv2.addWeighted(image, 0.7, overlay, 0.3, 0) + + # Draw help text + for i, text in enumerate(help_text): + cv2.putText( + image, + text, + (20, 40 + i * 25), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + + return image + + def draw_info_overlay(self, image, image_name, num_annotations): + """Draw image information overlay""" + info_text = [ + f"Image: {image_name}", + f"Annotations: {num_annotations}", + f"{self.current_index + 1}/{len(self.image_files)}" + ] + + img_height = image.shape[0] + for i, text in enumerate(info_text): + cv2.putText( + image, + text, + (10, img_height - 60 + i * 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + cv2.putText( + image, + text, + (10, img_height - 60 + i * 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 0, 0), + 1 + ) + + return image + + def save_current_image(self, image, image_name): + """Save the current image with annotations""" + output_path = f"{image_name}_with_boxes.png" + cv2.imwrite(output_path, image) + print(f"Saved annotated image: {output_path}") + + def run(self): + """Main visualisation loop""" + cv2.namedWindow('Image Viewer with Annotations', cv2.WINDOW_NORMAL) + + while True: + # Load current image + current_image_path = self.image_files[self.current_index] + image_name = Path(current_image_path).name + + try: + image = cv2.imread(current_image_path) + if image is None: + print(f"Could not load image: {current_image_path}") + self.current_index = (self.current_index + 1) % len(self.image_files) + continue + + # Load annotations + annotations = self.load_annotations(current_image_path) + + # Draw bounding boxes + if annotations: + image = self.draw_bounding_boxes(image, annotations) + + # Draw info overlay + image = self.draw_info_overlay(image, image_name, len(annotations)) + + # Draw help overlay if enabled + if self.show_help: + image = self.draw_help_overlay(image) + + # Display image + cv2.imshow('Image Viewer with Annotations', image) + + # Handle keyboard input + key = cv2.waitKey(0) & 0xFF + + if key == ord('q') or key == 27: # 'q' or Escape + break + elif key == 83 or key == 32: # Right arrow or Space + self.current_index = (self.current_index + 1) % len(self.image_files) + elif key == 81: # Left arrow + self.current_index = (self.current_index - 1) % len(self.image_files) + elif key == ord('s'): # Save + self.save_current_image(image, Path(current_image_path).stem) + elif key == ord('h'): # Help + self.show_help = not self.show_help + + except Exception as e: + print(f"Error processing image {current_image_path}: {e}") + self.current_index = (self.current_index + 1) % len(self.image_files) + + cv2.destroyAllWindows() + +parser = argparse.ArgumentParser(description='Visualise images with bounding box annotations') +parser.add_argument('--images', '-i', required=True, help='Path to images folder') +parser.add_argument('--annotations', '-a', required=True, help='Path to annotations folder') +parser.add_argument('--config', '-c', help='Path to YAML config file with class names') + +args = parser.parse_args() + +# Validate input paths +if not os.path.exists(args.images): + print(f"Error: Images folder does not exist: {args.images}") + sys.exit(1) + +if not os.path.exists(args.annotations): + print(f"Error: Annotations folder does not exist: {args.annotations}") + sys.exit(1) + +try: + visualiser = BBoxVisualiser(args.images, args.annotations, args.config) + visualiser.run() +except Exception as e: + print(f"Error: {e}") + sys.exit(1)