From f164f54dc831199f236e30e316213b63e1b01c3a Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 13:34:41 +1000 Subject: [PATCH 01/11] ball radius --- pbr/config/scene_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbr/config/scene_config.py b/pbr/config/scene_config.py index e781792..ba68565 100644 --- a/pbr/config/scene_config.py +++ b/pbr/config/scene_config.py @@ -127,7 +127,7 @@ "mesh_types": [".fbx", ".obj"], "path": path.abspath(path.join(res_path, "balls")), "mask": {"index": 1, "colour": (1, 0, 0, 1)}, - "radius": 0.045, + "radius": 0.09, "standard_deviation": 0.005, # for randomising ball position, large value means more random, zero means no random }, "environment": { From d5019da4334483d57e45b47b0d7785bdf662a9fa Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 13:35:12 +1000 Subject: [PATCH 02/11] bbox config --- pbr/config/scene_config.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pbr/config/scene_config.py b/pbr/config/scene_config.py index ba68565..fa76091 100644 --- a/pbr/config/scene_config.py +++ b/pbr/config/scene_config.py @@ -152,6 +152,32 @@ "mask": {"index": 4, "colour": (0, 1, 0, 1), "line_colour": (1, 1, 1, 1)}, }, "goal": {"mask": {"index": 2, "colour": (1, 1, 0, 1)}}, + "bounding_boxes": { + "enabled": True, + "output_format": "yolo", + "mask": {"index": 6, "colour": (0.5, 0.5, 0.5, 1)}, # Dummy mask config for compatibility + "classes": { + "ball": 0, + "goal_post": 1, + "robot": 2, + "L_intersection": 3, + "T_intersection": 4, + "X_intersection": 5 + }, + "min_bbox_size": 8, # minimum bounding box size in pixels + "max_bbox_size": 800, # maximum bounding box size in pixels + "intersection_base_sizes": { + "L": 0.15, # meters - base size for L intersection + "T": 0.20, # meters - base size for T intersection + "X": 0.25 # meters - base size for X intersection + }, + "goal_post_detection": { + "enabled": True, + "width": 0.12, # meters - actual goal post width + "min_height_ratio": 0.3, # minimum visible height ratio + "use_segmentation": True # extract from goal segmentation mask + } + }, } From 7f6299fb4c8e4d3478b8b695f2dd1241765438ac Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 13:35:20 +1000 Subject: [PATCH 03/11] vis tool --- scripts/visualise_bboxes.py | 317 ++++++++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100755 scripts/visualise_bboxes.py diff --git a/scripts/visualise_bboxes.py b/scripts/visualise_bboxes.py new file mode 100755 index 0000000..47519f6 --- /dev/null +++ b/scripts/visualise_bboxes.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +""" +Interactive Image Viewer with Bounding Box Annotations + +This tool displays images from a folder with their corresponding bounding box annotations. +Supports YOLO format annotations and provides interactive navigation. + +Usage: + python visualise_bboxes.py --images --annotations [--config ] + +Controls: + - Right Arrow / Space: Next image + - Left Arrow: Previous image + - Q / Escape: Quit + - S: Save current image with annotations + - H: Show/hide help overlay +""" + +import argparse +import os +import sys +import cv2 +import glob +import yaml +from pathlib import Path + +class BBoxVisualiser: + def __init__(self, image_folder, annotation_folder, config_file=None): + self.image_folder = Path(image_folder) + self.annotation_folder = Path(annotation_folder) + self.config_file = config_file + self.current_index = 0 + self.show_help = False + + # Default class names and colors + self.class_names = { + 0: "ball", + 1: "goal_post", + 2: "robot", + 3: "L_intersection", + 4: "T_intersection", + 5: "X_intersection" + } + + # Color palette for different classes (BGR format) + self.colors = [ + (0, 255, 0), # Green for ball + (255, 0, 0), # Blue for goal_post + (0, 0, 255), # Red for robot + (255, 255, 0), # Cyan for L_intersection + (255, 0, 255), # Magenta for T_intersection + (0, 255, 255), # Yellow for X_intersection + ] + + # Load configuration if provided + if config_file and os.path.exists(config_file): + self.load_config(config_file) + + # Get list of images + self.image_files = self.get_image_files() + if not self.image_files: + raise ValueError(f"No images found in {self.image_folder}") + + print(f"Found {len(self.image_files)} images") + print("Controls: Arrow keys to navigate, 'q' to quit, 's' to save, 'h' for help") + + def load_config(self, config_file): + """Load class names from YAML config file""" + try: + with open(config_file, 'r') as f: + config = yaml.safe_load(f) + if 'names' in config: + self.class_names = config['names'] + print(f"Loaded {len(self.class_names)} classes from config") + except Exception as e: + print(f"Warning: Could not load config file {config_file}: {e}") + + def get_image_files(self): + """Get sorted list of image files""" + extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tiff'] + image_files = [] + for ext in extensions: + image_files.extend(glob.glob(str(self.image_folder / ext))) + return sorted(image_files) + + def load_annotations(self, image_path): + """Load bounding box annotations for an image""" + image_name = Path(image_path).stem + annotation_file = self.annotation_folder / f"{image_name}.txt" + + annotations = [] + if annotation_file.exists(): + try: + with open(annotation_file, 'r') as f: + for line in f: + line = line.strip() + if line: + parts = line.split() + if len(parts) >= 5: + class_id = int(parts[0]) + center_x = float(parts[1]) + center_y = float(parts[2]) + width = float(parts[3]) + height = float(parts[4]) + annotations.append({ + 'class_id': class_id, + 'center_x': center_x, + 'center_y': center_y, + 'width': width, + 'height': height + }) + except Exception as e: + print(f"Error loading annotations for {image_name}: {e}") + + return annotations + + def yolo_to_pixel_coords(self, annotation, img_width, img_height): + """Convert YOLO normalised coordinates to pixel coordinates""" + center_x = annotation['center_x'] * img_width + center_y = annotation['center_y'] * img_height + width = annotation['width'] * img_width + height = annotation['height'] * img_height + + x1 = int(center_x - width / 2) + y1 = int(center_y - height / 2) + x2 = int(center_x + width / 2) + y2 = int(center_y + height / 2) + + return x1, y1, x2, y2 + + def draw_bounding_boxes(self, image, annotations): + """Draw bounding boxes and labels on the image""" + img_height, img_width = image.shape[:2] + + for annotation in annotations: + class_id = annotation['class_id'] + x1, y1, x2, y2 = self.yolo_to_pixel_coords(annotation, img_width, img_height) + + # Get color for this class + color = self.colors[class_id % len(self.colors)] + + # Draw bounding box + cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) + + # Draw class label + class_name = self.class_names.get(class_id, f"class_{class_id}") + label = f"{class_name}" + + # Calculate label size and position + (label_width, label_height), baseline = cv2.getTextSize( + label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 + ) + + # Draw label background + cv2.rectangle( + image, + (x1, y1 - label_height - baseline - 5), + (x1 + label_width + 5, y1), + color, + -1 + ) + + # Draw label text + cv2.putText( + image, + label, + (x1 + 2, y1 - baseline - 2), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + + return image + + def draw_help_overlay(self, image): + """Draw help text overlay""" + help_text = [ + "Controls:", + "Right Arrow / Space: Next image", + "Left Arrow: Previous image", + "S: Save current image", + "H: Toggle this help", + "Q / Escape: Quit" + ] + + # Semi-transparent overlay + overlay = image.copy() + cv2.rectangle(overlay, (10, 10), (400, 180), (0, 0, 0), -1) + image = cv2.addWeighted(image, 0.7, overlay, 0.3, 0) + + # Draw help text + for i, text in enumerate(help_text): + cv2.putText( + image, + text, + (20, 40 + i * 25), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + + return image + + def draw_info_overlay(self, image, image_name, num_annotations): + """Draw image information overlay""" + info_text = [ + f"Image: {image_name}", + f"Annotations: {num_annotations}", + f"{self.current_index + 1}/{len(self.image_files)}" + ] + + img_height = image.shape[0] + for i, text in enumerate(info_text): + cv2.putText( + image, + text, + (10, img_height - 60 + i * 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (255, 255, 255), + 2 + ) + cv2.putText( + image, + text, + (10, img_height - 60 + i * 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 0, 0), + 1 + ) + + return image + + def save_current_image(self, image, image_name): + """Save the current image with annotations""" + output_path = f"{image_name}_with_boxes.png" + cv2.imwrite(output_path, image) + print(f"Saved annotated image: {output_path}") + + def run(self): + """Main visualisation loop""" + cv2.namedWindow('Image Viewer with Annotations', cv2.WINDOW_NORMAL) + + while True: + # Load current image + current_image_path = self.image_files[self.current_index] + image_name = Path(current_image_path).name + + try: + image = cv2.imread(current_image_path) + if image is None: + print(f"Could not load image: {current_image_path}") + self.current_index = (self.current_index + 1) % len(self.image_files) + continue + + # Load annotations + annotations = self.load_annotations(current_image_path) + + # Draw bounding boxes + if annotations: + image = self.draw_bounding_boxes(image, annotations) + + # Draw info overlay + image = self.draw_info_overlay(image, image_name, len(annotations)) + + # Draw help overlay if enabled + if self.show_help: + image = self.draw_help_overlay(image) + + # Display image + cv2.imshow('Image Viewer with Annotations', image) + + # Handle keyboard input + key = cv2.waitKey(0) & 0xFF + + if key == ord('q') or key == 27: # 'q' or Escape + break + elif key == 83 or key == 32: # Right arrow or Space + self.current_index = (self.current_index + 1) % len(self.image_files) + elif key == 81: # Left arrow + self.current_index = (self.current_index - 1) % len(self.image_files) + elif key == ord('s'): # Save + self.save_current_image(image, Path(current_image_path).stem) + elif key == ord('h'): # Help + self.show_help = not self.show_help + + except Exception as e: + print(f"Error processing image {current_image_path}: {e}") + self.current_index = (self.current_index + 1) % len(self.image_files) + + cv2.destroyAllWindows() + +parser = argparse.ArgumentParser(description='Visualise images with bounding box annotations') +parser.add_argument('--images', '-i', required=True, help='Path to images folder') +parser.add_argument('--annotations', '-a', required=True, help='Path to annotations folder') +parser.add_argument('--config', '-c', help='Path to YAML config file with class names') + +args = parser.parse_args() + +# Validate input paths +if not os.path.exists(args.images): + print(f"Error: Images folder does not exist: {args.images}") + sys.exit(1) + +if not os.path.exists(args.annotations): + print(f"Error: Annotations folder does not exist: {args.annotations}") + sys.exit(1) + +try: + visualiser = BBoxVisualiser(args.images, args.annotations, args.config) + visualiser.run() +except Exception as e: + print(f"Error: {e}") + sys.exit(1) From e900e828fa9778554ce194e5b58f4b6c843f6830 Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 13:39:23 +1000 Subject: [PATCH 04/11] create and save annotations --- pbr/pbr.py | 39 +++++++++++++++++- pbr/util.py | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/pbr/pbr.py b/pbr/pbr.py index 4cc7199..3ab66fd 100644 --- a/pbr/pbr.py +++ b/pbr/pbr.py @@ -366,8 +366,43 @@ def main(): os.path.join(out_cfg.depth_dir, filename) + ".exr", ) - # Check that the rotation matrix of the main camera is valid - print(f"Rotation matrix of {cam_l.obj.name}: \n", cam_l.obj.matrix_world) + ############################################## + ## BOUNDING BOX GENERATION ## + ############################################## + + annotations = [] + + # Ball annotations + ball_annotations = [util.write_annotations(ball.obj)] + annotations += [ann for ann in ball_annotations if ann is not None] + + # Goal annotations + goal_annotations = [util.write_annotations(goal.obj, 1) for goal in goals] + annotations += [ann for ann in goal_annotations if ann is not None] + + # Robot annotations + robot_annotations = [util.write_annotations(robot.obj, 2) for robot in robots] + annotations += [ann for ann in robot_annotations if ann is not None] + + # Misc robot annotations + misc_annotations = [util.write_annotations(misc_robot.obj, 2) for misc_robot in misc_robots] + annotations += [ann for ann in misc_annotations if ann is not None] + + # Intersection annotations from environment data + if env_info: + intersection_annotations = util.write_intersection_annotations(env_info) + annotations += intersection_annotations + + # Write YOLO format annotations + if annotations: + os.makedirs(out_cfg.output_dir + "/annotations", exist_ok=True) + annotation_file = os.path.join(out_cfg.output_dir + "/annotations", f"{filename}.txt") + with open(annotation_file, 'w') as f: + for ann in annotations: + f.write(f"{ann[0]} {ann[1]:.6f} {ann[2]:.6f} {ann[3]:.6f} {ann[4]:.6f}\n") + print(f"[INFO] Wrote {len(annotations)} annotations to {annotation_file}") + else: + print(f"[INFO] No annotations generated for frame {filename}") # Generate meta file with open( diff --git a/pbr/util.py b/pbr/util.py index f0afa13..a09053d 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -360,3 +360,114 @@ def find_forward_vector(obj): forward.normalize() # Normalize the forward vector after setting Z to 0 return forward + +def get_bounding_box(obj): + """Calculates 2D bounding box for YOLO format""" + import bpy_extras + cam = bpy.context.scene.camera + scene = bpy.context.scene + + bbox_corners = [bpy_extras.object_utils.world_to_camera_view(scene, cam, obj.matrix_world @ Vector(corner)) for corner in obj.bound_box] + + min_x = min(corner.x for corner in bbox_corners) + max_x = max(corner.x for corner in bbox_corners) + min_y = min(corner.y for corner in bbox_corners) + max_y = max(corner.y for corner in bbox_corners) + + min_x *= scene.render.resolution_x + max_x *= scene.render.resolution_x + min_y *= scene.render.resolution_y + max_y *= scene.render.resolution_y + + print((obj.name, min_x, min_y, max_x, max_y)) + return (min_x, min_y, max_x, max_y) + +def write_annotations(obj, class_id=0): + """Writes YOLO annotations for the object""" + scene = bpy.context.scene + min_x, min_y, max_x, max_y = get_bounding_box(obj) + x_center = (min_x + max_x) / 2 + y_center = scene.render.resolution_y - (min_y + max_y) / 2 + width = max_x - min_x + height = max_y - min_y + + # Normalize coordinates + x_center /= scene.render.resolution_x + y_center /= scene.render.resolution_y + width /= scene.render.resolution_x + height /= scene.render.resolution_y + + if x_center < 0 or x_center > 1 or y_center < 0 or y_center > 1: + # Out of bounds + return None + + print(f"{obj.name} {class_id} {x_center} {y_center} {width} {height}") + return class_id, x_center, y_center, width, height + +def write_intersection_annotations(environment_data): + """Generate intersection annotations from environment data""" + import bpy_extras + + if not environment_data or "intersections" not in environment_data: + return [] + + cam = bpy.context.scene.camera + scene = bpy.context.scene + annotations = [] + + # Class mapping for intersections + intersection_classes = { + "L": 3, # L_intersection + "T": 4, # T_intersection + "X": 5 # X_intersection + } + + # Base sizes for intersection bounding boxes (in meters) + base_sizes = { + "L": 0.15, + "T": 0.20, + "X": 0.25 + } + + for intersection_type, class_id in intersection_classes.items(): + if intersection_type not in environment_data["intersections"]: + continue + + for intersection in environment_data["intersections"][intersection_type]: + world_pos = Vector(intersection["position"]) + + # Project to camera view + screen_pos = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_pos) + + # Check if in view + if screen_pos.x < 0 or screen_pos.x > 1 or screen_pos.y < 0 or screen_pos.y > 1: + continue + + # Calculate distance for size estimation + camera_pos = cam.location + distance = (camera_pos - world_pos).length + + # Calculate apparent size based on distance + base_size = base_sizes.get(intersection_type, 0.2) + focal_length_pixels = 1000 # Approximate focal length in pixels + apparent_size_pixels = (base_size * focal_length_pixels) / max(distance, 0.1) + + # Convert to normalized size + apparent_size_x = apparent_size_pixels / scene.render.resolution_x + apparent_size_y = apparent_size_pixels / scene.render.resolution_y + + # Ensure minimum size + min_size = 8.0 / min(scene.render.resolution_x, scene.render.resolution_y) + apparent_size_x = max(min_size, apparent_size_x) + apparent_size_y = max(min_size, apparent_size_y) + + # Create YOLO format annotation (center_x, center_y, width, height) + x_center = screen_pos.x + y_center = 1.0 - screen_pos.y # Flip Y coordinate + width = apparent_size_x + height = apparent_size_y + + print(f"Intersection {intersection_type} at {intersection['position']} -> {class_id} {x_center} {y_center} {width} {height}") + annotations.append((class_id, x_center, y_center, width, height)) + + return annotations From 2636a835536af80e19ba02bf18c38e811317e6ed Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 14:12:52 +1000 Subject: [PATCH 05/11] balls working? --- pbr/util.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 9 deletions(-) diff --git a/pbr/util.py b/pbr/util.py index a09053d..152fcb8 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -367,27 +367,119 @@ def get_bounding_box(obj): cam = bpy.context.scene.camera scene = bpy.context.scene + # Special handling for ball objects (spheres) + if obj.name == "Ball": + return get_sphere_bounding_box(obj, cam, scene) + + # Default bounding box calculation for other objects bbox_corners = [bpy_extras.object_utils.world_to_camera_view(scene, cam, obj.matrix_world @ Vector(corner)) for corner in obj.bound_box] - min_x = min(corner.x for corner in bbox_corners) - max_x = max(corner.x for corner in bbox_corners) - min_y = min(corner.y for corner in bbox_corners) - max_y = max(corner.y for corner in bbox_corners) + # Check if any corners are behind the camera + valid_corners = [corner for corner in bbox_corners if corner.z > 0] + if not valid_corners: + print(f"All corners of {obj.name} are behind camera") + return None + + min_x = min(corner.x for corner in valid_corners) + max_x = max(corner.x for corner in valid_corners) + min_y = min(corner.y for corner in valid_corners) + max_y = max(corner.y for corner in valid_corners) min_x *= scene.render.resolution_x max_x *= scene.render.resolution_x min_y *= scene.render.resolution_y max_y *= scene.render.resolution_y - print((obj.name, min_x, min_y, max_x, max_y)) + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + +def get_sphere_bounding_box(obj, cam, scene): + """Calculates accurate 2D bounding box for spherical objects""" + import bpy_extras + + # Get the sphere center in world coordinates + world_center = obj.matrix_world.translation + + # Get the sphere radius (assuming uniform scaling) + # For a ball, dimensions should be (diameter, diameter, diameter) + radius = max(obj.dimensions) / 2.0 + + # Project sphere center to camera view + center_2d = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_center) + + # Check if sphere center is behind camera + if center_2d.z <= 0: + print(f"Ball behind camera, z={center_2d.z}") + return None + + # Convert center to pixel coordinates + # Blender's coordinate system: Y=0 at bottom, Y=1 at top + # Image coordinate system: Y=0 at top, Y=resolution_y at bottom + # So we need to flip: image_y = resolution_y * (1 - blender_y) + center_x_pixels = center_2d.x * scene.render.resolution_x + center_y_pixels = scene.render.resolution_y * (1.0 - center_2d.y) + + # Calculate screen-space radius by projecting points offset by the radius + # Project a point that's offset by the radius in world coordinates + offset_point = world_center + Vector((radius, 0, 0)) + offset_2d = bpy_extras.object_utils.world_to_camera_view(scene, cam, offset_point) + + # Calculate the radius in pixels from the difference + radius_x_pixels = abs((offset_2d.x - center_2d.x) * scene.render.resolution_x) + + # Do the same for Y direction to handle aspect ratio + offset_point_y = world_center + Vector((0, radius, 0)) + offset_2d_y = bpy_extras.object_utils.world_to_camera_view(scene, cam, offset_point_y) + radius_y_pixels = abs((offset_2d_y.y - center_2d.y) * scene.render.resolution_y) + + # Use the larger radius to ensure we capture the full sphere + radius_pixels = max(radius_x_pixels, radius_y_pixels) + + # Calculate bounding box in image coordinates + min_x = center_x_pixels - radius_pixels + max_x = center_x_pixels + radius_pixels + min_y = center_y_pixels - radius_pixels + max_y = center_y_pixels + radius_pixels + + print(f"Ball bbox: center=({center_2d.x:.3f}, {center_2d.y:.3f}), world_radius={radius:.3f}") + print(f"Ball bbox: center_pixels=({center_x_pixels:.1f}, {center_y_pixels:.1f}), radius_pixels={radius_pixels:.1f}") + print(f"Ball bbox pixels: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) def write_annotations(obj, class_id=0): """Writes YOLO annotations for the object""" scene = bpy.context.scene - min_x, min_y, max_x, max_y = get_bounding_box(obj) + bbox_result = get_bounding_box(obj) + + # Check if bounding box calculation failed + if bbox_result is None: + print(f"Failed to calculate bounding box for {obj.name}") + return None + + min_x, min_y, max_x, max_y = bbox_result + + # Clamp bounding box to image bounds + min_x = max(0, min_x) + min_y = max(0, min_y) + max_x = min(scene.render.resolution_x, max_x) + max_y = min(scene.render.resolution_y, max_y) + + # Check if there's any visible area after clamping + if min_x >= max_x or min_y >= max_y: + print(f"No visible area for {obj.name} after clamping") + return None + + # Calculate center and dimensions x_center = (min_x + max_x) / 2 - y_center = scene.render.resolution_y - (min_y + max_y) / 2 + + # For balls, we already converted to image coordinates in get_sphere_bounding_box + # For other objects, we need to flip Y coordinate + if obj.name == "Ball": + y_center = (min_y + max_y) / 2 + else: + y_center = scene.render.resolution_y - (min_y + max_y) / 2 + width = max_x - min_x height = max_y - min_y @@ -397,11 +489,19 @@ def write_annotations(obj, class_id=0): width /= scene.render.resolution_x height /= scene.render.resolution_y + # Final bounds check on normalized coordinates if x_center < 0 or x_center > 1 or y_center < 0 or y_center > 1: - # Out of bounds + print(f"Center out of bounds for {obj.name}: ({x_center:.3f}, {y_center:.3f})") + return None + + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if (width * scene.render.resolution_x < min_size_pixels or + height * scene.render.resolution_y < min_size_pixels): + print(f"Bounding box too small for {obj.name}: {width * scene.render.resolution_x:.1f} x {height * scene.render.resolution_y:.1f}") return None - print(f"{obj.name} {class_id} {x_center} {y_center} {width} {height}") + print(f"{obj.name} {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") return class_id, x_center, y_center, width, height def write_intersection_annotations(environment_data): From e01fdefa1b8d6e8e08ae29cad70ee37d0a0ac53f Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 16:16:00 +1000 Subject: [PATCH 06/11] ball and robots good --- pbr/config/scene_config.py | 4 +- pbr/pbr.py | 21 ++- pbr/util.py | 306 +++++++++++++++++++++++++++++++------ 3 files changed, 272 insertions(+), 59 deletions(-) diff --git a/pbr/config/scene_config.py b/pbr/config/scene_config.py index fa76091..27784cf 100644 --- a/pbr/config/scene_config.py +++ b/pbr/config/scene_config.py @@ -256,8 +256,8 @@ def configure_scene(): "camera": { **random.choice( [ - {"type": "EQUISOLID", "focal_length": 10.5, "fov": pi}, - {"type": "RECTILINEAR", "fov": 0.857}, + # {"type": "EQUISOLID", "focal_length": 10.5, "fov": pi}, + {"type": "RECTILINEAR", "fov": 1.6}, ] ), "stereo_camera_distance": 0.1, diff --git a/pbr/pbr.py b/pbr/pbr.py index 3ab66fd..fa5f4a7 100644 --- a/pbr/pbr.py +++ b/pbr/pbr.py @@ -377,21 +377,26 @@ def main(): annotations += [ann for ann in ball_annotations if ann is not None] # Goal annotations - goal_annotations = [util.write_annotations(goal.obj, 1) for goal in goals] - annotations += [ann for ann in goal_annotations if ann is not None] + # goal_annotations = [util.write_annotations(goal.obj, 1) for goal in goals] + # annotations += [ann for ann in goal_annotations if ann is not None] - # Robot annotations - robot_annotations = [util.write_annotations(robot.obj, 2) for robot in robots] + # Goal post annotations (calculated from field geometry) + field_config = {**config["field"], **config["goal"]} # Combine field and goal config + goalpost_annotations = util.write_goal_post_annotations(field_config) + annotations += goalpost_annotations + + # Robot annotations (exclude the camera robot r0) + robot_annotations = [util.write_annotations(robot.obj, 2) for robot in robots[1:]] # Skip robots[0] which is the camera robot annotations += [ann for ann in robot_annotations if ann is not None] # Misc robot annotations misc_annotations = [util.write_annotations(misc_robot.obj, 2) for misc_robot in misc_robots] annotations += [ann for ann in misc_annotations if ann is not None] - # Intersection annotations from environment data - if env_info: - intersection_annotations = util.write_intersection_annotations(env_info) - annotations += intersection_annotations + # Intersection annotations (based on standard field geometry) + field_config_for_intersections = config["field"] # Use field config for intersections + intersection_annotations = util.write_intersection_annotations(field_config_for_intersections) + annotations += intersection_annotations # Write YOLO format annotations if annotations: diff --git a/pbr/util.py b/pbr/util.py index 152fcb8..5aae48b 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -361,6 +361,52 @@ def find_forward_vector(obj): return forward +def get_robot_bounding_box(robot_obj, cam, scene): + """Calculates 2D bounding box for robot objects with all their parts""" + import bpy_extras + + # Extract robot number from the object name (e.g., "r6_Torso" -> "r6") + robot_prefix = robot_obj.name.split('_')[0] # e.g., "r6" + + # Find all objects that belong to this robot + robot_parts = [] + for obj in bpy.data.objects: + if obj.name.startswith(robot_prefix + '_'): + robot_parts.append(obj) + + print(f"Found {len(robot_parts)} parts for robot {robot_prefix}") + + # Collect all bounding box corners from all robot parts + all_corners = [] + + for part in robot_parts: + # Get the 8 corners of each part's bounding box in world coordinates + for corner in part.bound_box: + world_corner = part.matrix_world @ Vector(corner) + # Project to camera view + camera_corner = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_corner) + if camera_corner.z > 0: # Only use points in front of camera + all_corners.append(camera_corner) + + if not all_corners: + print(f"No valid corners found for robot {robot_prefix}") + return None + + # Find the overall min/max bounds + min_x = min(corner.x for corner in all_corners) + max_x = max(corner.x for corner in all_corners) + min_y = min(corner.y for corner in all_corners) + max_y = max(corner.y for corner in all_corners) + + # Convert to pixels + min_x *= scene.render.resolution_x + max_x *= scene.render.resolution_x + min_y *= scene.render.resolution_y + max_y *= scene.render.resolution_y + + print(f"Robot {robot_prefix} combined bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + def get_bounding_box(obj): """Calculates 2D bounding box for YOLO format""" import bpy_extras @@ -371,6 +417,11 @@ def get_bounding_box(obj): if obj.name == "Ball": return get_sphere_bounding_box(obj, cam, scene) + # Special handling for robot objects - check if this looks like a robot part + # Robot parts follow pattern "r_" (e.g., "r6_Torso") + if '_' in obj.name and obj.name.split('_')[0].startswith('r') and obj.name.split('_')[0][1:].isdigit(): + return get_robot_bounding_box(obj, cam, scene) + # Default bounding box calculation for other objects bbox_corners = [bpy_extras.object_utils.world_to_camera_view(scene, cam, obj.matrix_world @ Vector(corner)) for corner in obj.bound_box] @@ -385,11 +436,12 @@ def get_bounding_box(obj): min_y = min(corner.y for corner in valid_corners) max_y = max(corner.y for corner in valid_corners) + # Convert to pixels min_x *= scene.render.resolution_x max_x *= scene.render.resolution_x min_y *= scene.render.resolution_y max_y *= scene.render.resolution_y - + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") return (min_x, min_y, max_x, max_y) @@ -397,13 +449,18 @@ def get_sphere_bounding_box(obj, cam, scene): """Calculates accurate 2D bounding box for spherical objects""" import bpy_extras + # Check camera type - equisolid cameras need different handling + camera_type = getattr(cam.data, 'type', 'PERSP') + # Get the sphere center in world coordinates world_center = obj.matrix_world.translation - - # Get the sphere radius (assuming uniform scaling) - # For a ball, dimensions should be (diameter, diameter, diameter) radius = max(obj.dimensions) / 2.0 + # For now, disable equisolid handling and use perspective projection for all cameras + # This ensures consistent, reliable bounding boxes + # TODO: Re-enable equisolid handling once perspective projection is perfected + + # Regular perspective camera handling for all camera types # Project sphere center to camera view center_2d = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_center) @@ -412,36 +469,32 @@ def get_sphere_bounding_box(obj, cam, scene): print(f"Ball behind camera, z={center_2d.z}") return None - # Convert center to pixel coordinates - # Blender's coordinate system: Y=0 at bottom, Y=1 at top - # Image coordinate system: Y=0 at top, Y=resolution_y at bottom - # So we need to flip: image_y = resolution_y * (1 - blender_y) + # Convert to pixel coordinates (same system as regular bbox function) center_x_pixels = center_2d.x * scene.render.resolution_x - center_y_pixels = scene.render.resolution_y * (1.0 - center_2d.y) + center_y_pixels = center_2d.y * scene.render.resolution_y - # Calculate screen-space radius by projecting points offset by the radius - # Project a point that's offset by the radius in world coordinates - offset_point = world_center + Vector((radius, 0, 0)) - offset_2d = bpy_extras.object_utils.world_to_camera_view(scene, cam, offset_point) + # Calculate distance from camera to ball + camera_pos = cam.matrix_world.translation + distance = (world_center - camera_pos).length - # Calculate the radius in pixels from the difference - radius_x_pixels = abs((offset_2d.x - center_2d.x) * scene.render.resolution_x) + # Simple perspective projection for radius + # Use camera focal length to calculate apparent size + focal_length = cam.data.lens # in mm + sensor_width = cam.data.sensor_width # in mm - # Do the same for Y direction to handle aspect ratio - offset_point_y = world_center + Vector((0, radius, 0)) - offset_2d_y = bpy_extras.object_utils.world_to_camera_view(scene, cam, offset_point_y) - radius_y_pixels = abs((offset_2d_y.y - center_2d.y) * scene.render.resolution_y) + # Calculate apparent size in pixels + # apparent_size = (object_size / distance) * focal_length * (image_width / sensor_width) + apparent_diameter = (radius * 2.0 / distance) * focal_length * (scene.render.resolution_x / sensor_width) + radius_pixels = apparent_diameter / 2.0 - # Use the larger radius to ensure we capture the full sphere - radius_pixels = max(radius_x_pixels, radius_y_pixels) - - # Calculate bounding box in image coordinates + # Calculate bounding box min_x = center_x_pixels - radius_pixels max_x = center_x_pixels + radius_pixels min_y = center_y_pixels - radius_pixels max_y = center_y_pixels + radius_pixels - print(f"Ball bbox: center=({center_2d.x:.3f}, {center_2d.y:.3f}), world_radius={radius:.3f}") + print(f"Ball bbox: center=({center_2d.x:.3f}, {center_2d.y:.3f}), radius={radius:.3f}") + print(f"Ball bbox: distance={distance:.1f}m, apparent_diameter={apparent_diameter:.1f}px") print(f"Ball bbox: center_pixels=({center_x_pixels:.1f}, {center_y_pixels:.1f}), radius_pixels={radius_pixels:.1f}") print(f"Ball bbox pixels: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") @@ -472,13 +525,8 @@ def write_annotations(obj, class_id=0): # Calculate center and dimensions x_center = (min_x + max_x) / 2 - - # For balls, we already converted to image coordinates in get_sphere_bounding_box - # For other objects, we need to flip Y coordinate - if obj.name == "Ball": - y_center = (min_y + max_y) / 2 - else: - y_center = scene.render.resolution_y - (min_y + max_y) / 2 + # Use consistent Y-flip for all objects + y_center = scene.render.resolution_y - (min_y + max_y) / 2 width = max_x - min_x height = max_y - min_y @@ -504,17 +552,131 @@ def write_annotations(obj, class_id=0): print(f"{obj.name} {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") return class_id, x_center, y_center, width, height -def write_intersection_annotations(environment_data): - """Generate intersection annotations from environment data""" +def write_goal_post_annotations(field_config): + """Generate goal post annotations based on field geometry""" import bpy_extras - if not environment_data or "intersections" not in environment_data: - return [] + cam = bpy.context.scene.camera + scene = bpy.context.scene + annotations = [] + + # Field dimensions from the field config + field_length = field_config.get("length", 9.0) # 9m field length + + # Goal dimensions from the goal config (these override field params in the combined config) + goal_width = field_config.get("width", 3.0) # Goal width (between posts) - should be 2.6m + goalpost_width = field_config.get("post_width", 0.15) # Post diameter - should be 0.12m + goalpost_width = goalpost_width + 0.2 + goalpost_height = field_config.get("height", 2.5) # Goal height - should be 1.8m + print(f"Goal config: length={field_length}, goal_width={goal_width}, post_width={goalpost_width}, post_height={goalpost_height}") + + # Calculate goal post positions + # Goals are at each end of the field (±field_length/2 from center) + # Posts are at ±goal_width/2 from center line + + goal_positions = [ + # Goal 1 (one end of field) + [ + [-field_length/2 -0.7, -goal_width/2 - 0.35, 0], # Left post + [-field_length/2 - 0.7, +goal_width/2 - 0.35, 0], # Right post + ], + # Goal 2 (other end of field) + [ + [+field_length/2 + 0.7, -goal_width/2 + 0.35, 0], # Left post + [+field_length/2 + 0.7, +goal_width/2 + 0.35, 0], # Right post + ] + ] + + goalpost_class_id = 1 # Goal posts have class 1 (not 2) + + for goal_idx, goal in enumerate(goal_positions): + for post_idx, post_pos in enumerate(goal): + world_pos = Vector(post_pos) + + # Project goal post center to camera view + screen_pos = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_pos) + + # Check if in view and in front of camera + if screen_pos.z <= 0: + print(f"Goal post {goal_idx+1}-{post_idx+1} behind camera (z={screen_pos.z:.3f})") + continue + + if screen_pos.x < 0 or screen_pos.x > 1 or screen_pos.y < 0 or screen_pos.y > 1: + print(f"Goal post {goal_idx+1}-{post_idx+1} outside view: ({screen_pos.x:.3f}, {screen_pos.y:.3f})") + continue + + # Calculate distance for size estimation + camera_pos = cam.matrix_world.translation + distance = (camera_pos - world_pos).length + + print(f"Goal post {goal_idx+1}-{post_idx+1} at world {post_pos}, distance={distance:.2f}m") + + # Project the top of the post to get height in pixels + top_pos = Vector([post_pos[0], post_pos[1], goalpost_height]) + top_screen = bpy_extras.object_utils.world_to_camera_view(scene, cam, top_pos) + + # Calculate apparent width using perspective projection (similar to ball calculation) + focal_length = cam.data.lens # in mm + sensor_width = cam.data.sensor_width # in mm + + # Calculate apparent width in pixels + apparent_width_pixels = (goalpost_width / distance) * focal_length * (scene.render.resolution_x / sensor_width) + + # Calculate height in pixels from screen space difference + if top_screen.z > 0: # Top is also in front of camera + height_pixels = abs((screen_pos.y - top_screen.y) * scene.render.resolution_y) + print(f" Height from projection: {height_pixels:.1f}px") + else: + # Estimate height if top is behind camera (post is very close) + height_pixels = apparent_width_pixels * (goalpost_height / goalpost_width) + print(f" Height estimated (top behind camera): {height_pixels:.1f}px") + + print(f" Apparent width: {apparent_width_pixels:.1f}px, height: {height_pixels:.1f}px") + + # Convert to normalized coordinates for YOLO format + center_x = screen_pos.x + center_y = 1.0 - screen_pos.y # Flip Y coordinate for YOLO + width_norm = apparent_width_pixels / scene.render.resolution_x + height_norm = height_pixels / scene.render.resolution_y + + print(f" Before clamping: center=({center_x:.3f}, {center_y:.3f}), size=({width_norm:.3f}, {height_norm:.3f})") + + # Ensure minimum size (8 pixels minimum) + min_size_x = 8.0 / scene.render.resolution_x + min_size_y = 8.0 / scene.render.resolution_y + width_norm = max(min_size_x, width_norm) + height_norm = max(min_size_y, height_norm) + + # Clamp maximum size to something reasonable (goal posts shouldn't be huge) + max_size = 0.15 # Maximum 15% of image dimension + width_norm = min(max_size, width_norm) + height_norm = min(max_size, height_norm) + + print(f" Final: center=({center_x:.6f}, {center_y:.6f}), size=({width_norm:.6f}, {height_norm:.6f})") + + annotations.append((goalpost_class_id, center_x, center_y, width_norm, height_norm)) + + print(f"Generated {len(annotations)} goal post annotations") + return annotations + +def write_intersection_annotations(field_config): + """Generate intersection annotations based on standard field geometry""" + import bpy_extras cam = bpy.context.scene.camera scene = bpy.context.scene annotations = [] + # Field dimensions from the field config + field_length = field_config.get("length", 9.0) # 9m field length + field_width = field_config.get("width", 6.0) # 6m field width + goal_area_length = field_config.get("goal_area", {}).get("length", 1.0) # 1m goal area depth + goal_area_width = field_config.get("goal_area", {}).get("width", 5.0) # 5m goal area width + penalty_mark_dist = field_config.get("penalty_mark_dist", 2.1) # 2.1m penalty mark distance + centre_circle_radius = field_config.get("centre_circle_radius", 0.75) # 0.75m centre circle radius + + print(f"Field intersection config: length={field_length}, width={field_width}") + # Class mapping for intersections intersection_classes = { "L": 3, # L_intersection @@ -529,37 +691,83 @@ def write_intersection_annotations(environment_data): "X": 0.25 } + # Define standard field intersection positions based on RoboCup field layout + intersections = { + "L": [ + # Corner intersections (field boundary + goal line/side line) + [-field_length/2, -field_width/2, 0], # Bottom-left corner + [-field_length/2, +field_width/2, 0], # Top-left corner + [+field_length/2, -field_width/2, 0], # Bottom-right corner + [+field_length/2, +field_width/2, 0], # Top-right corner + # Goal area corners + [-field_length/2 + goal_area_length, -goal_area_width/2, 0], # Left goal area bottom + [-field_length/2 + goal_area_length, +goal_area_width/2, 0], # Left goal area top + [+field_length/2 - goal_area_length, -goal_area_width/2, 0], # Right goal area bottom + [+field_length/2 - goal_area_length, +goal_area_width/2, 0], # Right goal area top + ], + "T": [ + # Goal area T-junctions (goal line meets goal area line) + [-field_length/2, -goal_area_width/2, 0], # Left goal, bottom T + [-field_length/2, +goal_area_width/2, 0], # Left goal, top T + [+field_length/2, -goal_area_width/2, 0], # Right goal, bottom T + [+field_length/2, +goal_area_width/2, 0], # Right goal, top T + # Centre line T-junctions (centre line meets side line) + [0, -field_width/2, 0], # Centre line, bottom T + [0, +field_width/2, 0], # Centre line, top T + ], + "X": [ + # Centre circle intersections with centre line (if visible) + [0, 0, 0], # Centre circle (could be X if multiple lines cross) + ] + } + for intersection_type, class_id in intersection_classes.items(): - if intersection_type not in environment_data["intersections"]: + if intersection_type not in intersections: continue - for intersection in environment_data["intersections"][intersection_type]: - world_pos = Vector(intersection["position"]) + for intersection_pos in intersections[intersection_type]: + world_pos = Vector(intersection_pos) # Project to camera view screen_pos = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_pos) - # Check if in view + # Check if in view and in front of camera + if screen_pos.z <= 0: + print(f"Intersection {intersection_type} behind camera (z={screen_pos.z:.3f})") + continue + if screen_pos.x < 0 or screen_pos.x > 1 or screen_pos.y < 0 or screen_pos.y > 1: + print(f"Intersection {intersection_type} outside view: ({screen_pos.x:.3f}, {screen_pos.y:.3f})") continue - # Calculate distance for size estimation - camera_pos = cam.location + # Calculate distance for size estimation (use same method as other objects) + camera_pos = cam.matrix_world.translation distance = (camera_pos - world_pos).length - # Calculate apparent size based on distance + # Calculate apparent size based on distance (use same method as goal posts) base_size = base_sizes.get(intersection_type, 0.2) - focal_length_pixels = 1000 # Approximate focal length in pixels - apparent_size_pixels = (base_size * focal_length_pixels) / max(distance, 0.1) + focal_length = cam.data.lens # in mm + sensor_width = cam.data.sensor_width # in mm + + # Calculate apparent size in pixels using perspective projection + apparent_size_pixels = (base_size / distance) * focal_length * (scene.render.resolution_x / sensor_width) # Convert to normalized size apparent_size_x = apparent_size_pixels / scene.render.resolution_x apparent_size_y = apparent_size_pixels / scene.render.resolution_y - # Ensure minimum size - min_size = 8.0 / min(scene.render.resolution_x, scene.render.resolution_y) - apparent_size_x = max(min_size, apparent_size_x) - apparent_size_y = max(min_size, apparent_size_y) + print(f"Intersection {intersection_type} at {intersection_pos}, distance={distance:.2f}m, size={apparent_size_pixels:.1f}px") + + # Ensure minimum size (8 pixels minimum) + min_size_x = 8.0 / scene.render.resolution_x + min_size_y = 8.0 / scene.render.resolution_y + apparent_size_x = max(min_size_x, apparent_size_x) + apparent_size_y = max(min_size_y, apparent_size_y) + + # Clamp maximum size to something reasonable + max_size = 0.10 # Maximum 10% of image dimension for intersections + apparent_size_x = min(max_size, apparent_size_x) + apparent_size_y = min(max_size, apparent_size_y) # Create YOLO format annotation (center_x, center_y, width, height) x_center = screen_pos.x @@ -567,7 +775,7 @@ def write_intersection_annotations(environment_data): width = apparent_size_x height = apparent_size_y - print(f"Intersection {intersection_type} at {intersection['position']} -> {class_id} {x_center} {y_center} {width} {height}") + print(f"Intersection {intersection_type} at {intersection['position']} -> {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") annotations.append((class_id, x_center, y_center, width, height)) return annotations From 2582a4ec32497ac72a73ea2d2c03c4bc6e2137de Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 16:42:36 +1000 Subject: [PATCH 07/11] YES --- pbr/pbr.py | 15 ++- pbr/util.py | 308 ++++++++++++++++++---------------------------------- 2 files changed, 115 insertions(+), 208 deletions(-) diff --git a/pbr/pbr.py b/pbr/pbr.py index fa5f4a7..dc67b2f 100644 --- a/pbr/pbr.py +++ b/pbr/pbr.py @@ -380,9 +380,11 @@ def main(): # goal_annotations = [util.write_annotations(goal.obj, 1) for goal in goals] # annotations += [ann for ann in goal_annotations if ann is not None] - # Goal post annotations (calculated from field geometry) - field_config = {**config["field"], **config["goal"]} # Combine field and goal config - goalpost_annotations = util.write_goal_post_annotations(field_config) + # Goal post annotations (from rendered segmentation mask) + rendered_mask_path = os.path.join(out_cfg.mask_dir, "{}.png".format(filename)) + goalpost_annotations = util.write_goal_post_annotations_from_mask( + rendered_mask_path, bpy.context.scene + ) annotations += goalpost_annotations # Robot annotations (exclude the camera robot r0) @@ -393,9 +395,10 @@ def main(): misc_annotations = [util.write_annotations(misc_robot.obj, 2) for misc_robot in misc_robots] annotations += [ann for ann in misc_annotations if ann is not None] - # Intersection annotations (based on standard field geometry) - field_config_for_intersections = config["field"] # Use field config for intersections - intersection_annotations = util.write_intersection_annotations(field_config_for_intersections) + # Intersection annotations (from rendered segmentation mask) + intersection_annotations = util.write_intersection_annotations_from_mask( + rendered_mask_path, bpy.context.scene + ) annotations += intersection_annotations # Write YOLO format annotations diff --git a/pbr/util.py b/pbr/util.py index 5aae48b..6f2f507 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -552,230 +552,134 @@ def write_annotations(obj, class_id=0): print(f"{obj.name} {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") return class_id, x_center, y_center, width, height -def write_goal_post_annotations(field_config): - """Generate goal post annotations based on field geometry""" - import bpy_extras +def write_goal_post_annotations_from_mask(mask_path, scene): + """Generate goal post annotations from segmentation mask""" + import cv2 + import numpy as np + + try: + mask_img = cv2.imread(mask_path) + except: + print(f"Cannot load mask image {mask_path}") + return [] + + if mask_img is None: + print(f"Failed to read mask image {mask_path}") + return [] - cam = bpy.context.scene.camera - scene = bpy.context.scene annotations = [] - # Field dimensions from the field config - field_length = field_config.get("length", 9.0) # 9m field length + # Goal posts should be yellow in the segmentation mask + # Convert BGR to RGB and look for yellow pixels + mask_rgb = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB) - # Goal dimensions from the goal config (these override field params in the combined config) - goal_width = field_config.get("width", 3.0) # Goal width (between posts) - should be 2.6m - goalpost_width = field_config.get("post_width", 0.15) # Post diameter - should be 0.12m - goalpost_width = goalpost_width + 0.2 - goalpost_height = field_config.get("height", 2.5) # Goal height - should be 1.8m - print(f"Goal config: length={field_length}, goal_width={goal_width}, post_width={goalpost_width}, post_height={goalpost_height}") + # Define yellow color range (goal posts) + # Yellow in RGB is approximately (255, 255, 0) + yellow_lower = np.array([250, 250, 0]) + yellow_upper = np.array([255, 255, 10]) - # Calculate goal post positions - # Goals are at each end of the field (±field_length/2 from center) - # Posts are at ±goal_width/2 from center line + # Create mask for yellow pixels (goal posts) + yellow_mask = cv2.inRange(mask_rgb, yellow_lower, yellow_upper) - goal_positions = [ - # Goal 1 (one end of field) - [ - [-field_length/2 -0.7, -goal_width/2 - 0.35, 0], # Left post - [-field_length/2 - 0.7, +goal_width/2 - 0.35, 0], # Right post - ], - # Goal 2 (other end of field) - [ - [+field_length/2 + 0.7, -goal_width/2 + 0.35, 0], # Left post - [+field_length/2 + 0.7, +goal_width/2 + 0.35, 0], # Right post - ] - ] + # Find contours in the yellow mask + contours, _ = cv2.findContours(yellow_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - goalpost_class_id = 1 # Goal posts have class 1 (not 2) + goalpost_class_id = 1 # Goal posts have class 1 - for goal_idx, goal in enumerate(goal_positions): - for post_idx, post_pos in enumerate(goal): - world_pos = Vector(post_pos) - - # Project goal post center to camera view - screen_pos = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_pos) - - # Check if in view and in front of camera - if screen_pos.z <= 0: - print(f"Goal post {goal_idx+1}-{post_idx+1} behind camera (z={screen_pos.z:.3f})") - continue - - if screen_pos.x < 0 or screen_pos.x > 1 or screen_pos.y < 0 or screen_pos.y > 1: - print(f"Goal post {goal_idx+1}-{post_idx+1} outside view: ({screen_pos.x:.3f}, {screen_pos.y:.3f})") - continue - - # Calculate distance for size estimation - camera_pos = cam.matrix_world.translation - distance = (camera_pos - world_pos).length - - print(f"Goal post {goal_idx+1}-{post_idx+1} at world {post_pos}, distance={distance:.2f}m") - - # Project the top of the post to get height in pixels - top_pos = Vector([post_pos[0], post_pos[1], goalpost_height]) - top_screen = bpy_extras.object_utils.world_to_camera_view(scene, cam, top_pos) - - # Calculate apparent width using perspective projection (similar to ball calculation) - focal_length = cam.data.lens # in mm - sensor_width = cam.data.sensor_width # in mm - - # Calculate apparent width in pixels - apparent_width_pixels = (goalpost_width / distance) * focal_length * (scene.render.resolution_x / sensor_width) - - # Calculate height in pixels from screen space difference - if top_screen.z > 0: # Top is also in front of camera - height_pixels = abs((screen_pos.y - top_screen.y) * scene.render.resolution_y) - print(f" Height from projection: {height_pixels:.1f}px") - else: - # Estimate height if top is behind camera (post is very close) - height_pixels = apparent_width_pixels * (goalpost_height / goalpost_width) - print(f" Height estimated (top behind camera): {height_pixels:.1f}px") - - print(f" Apparent width: {apparent_width_pixels:.1f}px, height: {height_pixels:.1f}px") - - # Convert to normalized coordinates for YOLO format - center_x = screen_pos.x - center_y = 1.0 - screen_pos.y # Flip Y coordinate for YOLO - width_norm = apparent_width_pixels / scene.render.resolution_x - height_norm = height_pixels / scene.render.resolution_y - - print(f" Before clamping: center=({center_x:.3f}, {center_y:.3f}), size=({width_norm:.3f}, {height_norm:.3f})") - - # Ensure minimum size (8 pixels minimum) - min_size_x = 8.0 / scene.render.resolution_x - min_size_y = 8.0 / scene.render.resolution_y - width_norm = max(min_size_x, width_norm) - height_norm = max(min_size_y, height_norm) - - # Clamp maximum size to something reasonable (goal posts shouldn't be huge) - max_size = 0.15 # Maximum 15% of image dimension - width_norm = min(max_size, width_norm) - height_norm = min(max_size, height_norm) - - print(f" Final: center=({center_x:.6f}, {center_y:.6f}), size=({width_norm:.6f}, {height_norm:.6f})") - + for contour in contours: + # Calculate bounding box for each goal post contour + x, y, w, h = cv2.boundingRect(contour) + + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if w < min_size_pixels or h < min_size_pixels: + print(f"Goal post contour too small: {w}x{h}") + continue + + # Convert to YOLO format (normalized center coordinates and dimensions) + img_height, img_width = mask_img.shape[:2] + + center_x = (x + w/2) / img_width + center_y = (y + h/2) / img_height + width_norm = w / img_width + height_norm = h / img_height + + # Ensure coordinates are within bounds + if 0 <= center_x <= 1 and 0 <= center_y <= 1: + print(f"Goal post from mask: {goalpost_class_id} {center_x:.6f} {center_y:.6f} {width_norm:.6f} {height_norm:.6f}") annotations.append((goalpost_class_id, center_x, center_y, width_norm, height_norm)) - print(f"Generated {len(annotations)} goal post annotations") + print(f"Generated {len(annotations)} goal post annotations from mask") return annotations -def write_intersection_annotations(field_config): - """Generate intersection annotations based on standard field geometry""" - import bpy_extras +def write_intersection_annotations_from_mask(mask_path, scene): + """Generate intersection annotations from segmentation mask""" + import cv2 + import numpy as np - cam = bpy.context.scene.camera - scene = bpy.context.scene - annotations = [] + try: + mask_img = cv2.imread(mask_path) + except: + print(f"Cannot load mask image {mask_path}") + return [] - # Field dimensions from the field config - field_length = field_config.get("length", 9.0) # 9m field length - field_width = field_config.get("width", 6.0) # 6m field width - goal_area_length = field_config.get("goal_area", {}).get("length", 1.0) # 1m goal area depth - goal_area_width = field_config.get("goal_area", {}).get("width", 5.0) # 5m goal area width - penalty_mark_dist = field_config.get("penalty_mark_dist", 2.1) # 2.1m penalty mark distance - centre_circle_radius = field_config.get("centre_circle_radius", 0.75) # 0.75m centre circle radius - - print(f"Field intersection config: length={field_length}, width={field_width}") - - # Class mapping for intersections - intersection_classes = { - "L": 3, # L_intersection - "T": 4, # T_intersection - "X": 5 # X_intersection - } + if mask_img is None: + print(f"Failed to read mask image {mask_path}") + return [] - # Base sizes for intersection bounding boxes (in meters) - base_sizes = { - "L": 0.15, - "T": 0.20, - "X": 0.25 - } + annotations = [] - # Define standard field intersection positions based on RoboCup field layout - intersections = { - "L": [ - # Corner intersections (field boundary + goal line/side line) - [-field_length/2, -field_width/2, 0], # Bottom-left corner - [-field_length/2, +field_width/2, 0], # Top-left corner - [+field_length/2, -field_width/2, 0], # Bottom-right corner - [+field_length/2, +field_width/2, 0], # Top-right corner - # Goal area corners - [-field_length/2 + goal_area_length, -goal_area_width/2, 0], # Left goal area bottom - [-field_length/2 + goal_area_length, +goal_area_width/2, 0], # Left goal area top - [+field_length/2 - goal_area_length, -goal_area_width/2, 0], # Right goal area bottom - [+field_length/2 - goal_area_length, +goal_area_width/2, 0], # Right goal area top - ], - "T": [ - # Goal area T-junctions (goal line meets goal area line) - [-field_length/2, -goal_area_width/2, 0], # Left goal, bottom T - [-field_length/2, +goal_area_width/2, 0], # Left goal, top T - [+field_length/2, -goal_area_width/2, 0], # Right goal, bottom T - [+field_length/2, +goal_area_width/2, 0], # Right goal, top T - # Centre line T-junctions (centre line meets side line) - [0, -field_width/2, 0], # Centre line, bottom T - [0, +field_width/2, 0], # Centre line, top T - ], - "X": [ - # Centre circle intersections with centre line (if visible) - [0, 0, 0], # Centre circle (could be X if multiple lines cross) - ] + # Convert BGR to RGB for color detection + mask_rgb = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB) + + # Define color ranges and class IDs for different intersection types + intersection_types = { + "L": { + "class_id": 3, + "color_lower": np.array([250, 0, 250]), # Magenta lower bound + "color_upper": np.array([255, 10, 255]) # Magenta upper bound + }, + "T": { + "class_id": 4, + "color_lower": np.array([0, 250, 250]), # Cyan lower bound + "color_upper": np.array([10, 255, 255]) # Cyan upper bound + }, + "X": { + "class_id": 5, + "color_lower": np.array([250, 90, 0]), # Orange lower bound + "color_upper": np.array([255, 110, 0]) # Orange upper bound + } } - for intersection_type, class_id in intersection_classes.items(): - if intersection_type not in intersections: - continue - - for intersection_pos in intersections[intersection_type]: - world_pos = Vector(intersection_pos) - - # Project to camera view - screen_pos = bpy_extras.object_utils.world_to_camera_view(scene, cam, world_pos) + for intersection_type, config in intersection_types.items(): + # Create mask for this intersection type's color + color_mask = cv2.inRange(mask_rgb, config["color_lower"], config["color_upper"]) + + # Find contours in the color mask + contours, _ = cv2.findContours(color_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours: + # Calculate bounding box for each intersection contour + x, y, w, h = cv2.boundingRect(contour) - # Check if in view and in front of camera - if screen_pos.z <= 0: - print(f"Intersection {intersection_type} behind camera (z={screen_pos.z:.3f})") - continue - - if screen_pos.x < 0 or screen_pos.x > 1 or screen_pos.y < 0 or screen_pos.y > 1: - print(f"Intersection {intersection_type} outside view: ({screen_pos.x:.3f}, {screen_pos.y:.3f})") + # Check minimum size requirements + min_size_pixels = scene_config.resources["bounding_boxes"]["min_bbox_size"] + if w < min_size_pixels or h < min_size_pixels: + print(f"{intersection_type}-intersection contour too small: {w}x{h}") continue - - # Calculate distance for size estimation (use same method as other objects) - camera_pos = cam.matrix_world.translation - distance = (camera_pos - world_pos).length - - # Calculate apparent size based on distance (use same method as goal posts) - base_size = base_sizes.get(intersection_type, 0.2) - focal_length = cam.data.lens # in mm - sensor_width = cam.data.sensor_width # in mm - - # Calculate apparent size in pixels using perspective projection - apparent_size_pixels = (base_size / distance) * focal_length * (scene.render.resolution_x / sensor_width) - - # Convert to normalized size - apparent_size_x = apparent_size_pixels / scene.render.resolution_x - apparent_size_y = apparent_size_pixels / scene.render.resolution_y - - print(f"Intersection {intersection_type} at {intersection_pos}, distance={distance:.2f}m, size={apparent_size_pixels:.1f}px") - - # Ensure minimum size (8 pixels minimum) - min_size_x = 8.0 / scene.render.resolution_x - min_size_y = 8.0 / scene.render.resolution_y - apparent_size_x = max(min_size_x, apparent_size_x) - apparent_size_y = max(min_size_y, apparent_size_y) - # Clamp maximum size to something reasonable - max_size = 0.10 # Maximum 10% of image dimension for intersections - apparent_size_x = min(max_size, apparent_size_x) - apparent_size_y = min(max_size, apparent_size_y) + # Convert to YOLO format (normalized center coordinates and dimensions) + img_height, img_width = mask_img.shape[:2] - # Create YOLO format annotation (center_x, center_y, width, height) - x_center = screen_pos.x - y_center = 1.0 - screen_pos.y # Flip Y coordinate - width = apparent_size_x - height = apparent_size_y + center_x = (x + w/2) / img_width + center_y = (y + h/2) / img_height + width_norm = w / img_width + height_norm = h / img_height - print(f"Intersection {intersection_type} at {intersection['position']} -> {class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") - annotations.append((class_id, x_center, y_center, width, height)) + # Ensure coordinates are within bounds + if 0 <= center_x <= 1 and 0 <= center_y <= 1: + print(f"{intersection_type}-intersection from mask: {config['class_id']} {center_x:.6f} {center_y:.6f} {width_norm:.6f} {height_norm:.6f}") + annotations.append((config["class_id"], center_x, center_y, width_norm, height_norm)) + print(f"Generated {len(annotations)} intersection annotations from mask") return annotations From 3969858f7d1b54975969bb5a56bbd82698dd0e59 Mon Sep 17 00:00:00 2001 From: Ysobel Date: Mon, 30 Jun 2025 20:18:29 +1000 Subject: [PATCH 08/11] some info --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 4f88acd..2cb4896 100644 --- a/README.md +++ b/README.md @@ -127,3 +127,14 @@ The HDR JSON metadata file may have the following fields: ``` + + +## BOUNDING BOX STUFF + +The current bounding box implementation in this branch does not integrate well with the segmentation side. The HDR must have blobs on the intersections in the following colours: + +- L: magenta [255, 0, 255] +- T: cyan [0, 255, 255] +- X: darker orange [255, 100, 0] + +The goal posts must be solid yellow [255, 255, 0] with just the posts and not the top bar or any other part of the goals. \ No newline at end of file From df0a70a92bff8c35037bcc4179aa365c38ef61d4 Mon Sep 17 00:00:00 2001 From: Ysobel Date: Fri, 4 Jul 2025 21:29:06 +1000 Subject: [PATCH 09/11] . --- scripts/image_curator.py | 466 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100755 scripts/image_curator.py diff --git a/scripts/image_curator.py b/scripts/image_curator.py new file mode 100755 index 0000000..e28e269 --- /dev/null +++ b/scripts/image_curator.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +""" +Image Curator Tool for NUpbr Dataset + +This tool allows you to review generated images with their annotations, +visualize bounding boxes, and accept/reject images for your final dataset. + +Usage: + python image_curator.py --input /path/to/run_X --output /path/to/curated_dataset + +Controls: + - Left/Right Arrow Keys: Navigate between images + - 'a' or Space: Accept current image + - 'r' or Delete: Reject current image + - 's': Skip current image (no decision) + - 'q' or Escape: Quit + - 't': Toggle bounding box visibility + - 'f': Toggle fullscreen +""" + +import argparse +import os +import shutil +import json +from pathlib import Path +import tkinter as tk +from tkinter import ttk, messagebox, filedialog +from PIL import Image, ImageTk, ImageDraw, ImageFont +import cv2 +import numpy as np + + +class ImageCurator: + def __init__(self, input_dir, output_dir): + self.input_dir = Path(input_dir) + self.output_dir = Path(output_dir) + self.current_index = 0 + self.show_boxes = True + + # Class names for YOLO format + self.class_names = { + 0: "Ball", + 1: "Goal_post", + 2: "Robot", + 3: "L_intersection", + 4: "T_intersection", + 5: "X_intersection" + } + + # Colors for bounding boxes (BGR format for OpenCV) + self.class_colors = { + 0: (0, 255, 0), # Ball - Green + 1: (0, 255, 255), # Goal_post - Yellow + 2: (255, 0, 0), # Robot - Blue + 3: (255, 0, 255), # L_intersection - Magenta + 4: (255, 255, 0), # T_intersection - Cyan + 5: (0, 165, 255) # X_intersection - Orange + } + + # Initialize paths + self.raw_dir = self.input_dir / "raw" + self.seg_dir = self.input_dir / "seg" + self.meta_dir = self.input_dir / "meta" + self.annotations_dir = self.input_dir / "annotations" + + # Output paths + self.output_raw_dir = self.output_dir / "raw" + self.output_seg_dir = self.output_dir / "seg" + self.output_meta_dir = self.output_dir / "meta" + self.output_annotations_dir = self.output_dir / "annotations" + + # Create output directories + for dir_path in [self.output_raw_dir, self.output_seg_dir, self.output_meta_dir, self.output_annotations_dir]: + dir_path.mkdir(parents=True, exist_ok=True) + + # Load image list + self.load_images() + + # Statistics + self.stats = { + 'total': len(self.images), + 'accepted': 0, + 'rejected': 0, + 'reviewed': 0 + } + + # Load existing decisions + self.decisions_file = self.output_dir / "curation_decisions.json" + self.decisions = self.load_decisions() + + # Setup GUI + self.setup_gui() + + def load_images(self): + """Load list of available images""" + if not self.raw_dir.exists(): + raise ValueError(f"Raw images directory not found: {self.raw_dir}") + + self.images = [] + for img_file in sorted(self.raw_dir.glob("*.png")): + # Check if annotation file exists + ann_file = self.annotations_dir / f"{img_file.stem}.txt" + if ann_file.exists(): + self.images.append(img_file.stem) + + print(f"Found {len(self.images)} images with annotations") + + def load_decisions(self): + """Load previous curation decisions""" + if self.decisions_file.exists(): + with open(self.decisions_file, 'r') as f: + decisions = json.load(f) + # Count existing decisions + for decision in decisions.values(): + if decision == 'accepted': + self.stats['accepted'] += 1 + elif decision == 'rejected': + self.stats['rejected'] += 1 + self.stats['reviewed'] = self.stats['accepted'] + self.stats['rejected'] + return decisions + return {} + + def save_decisions(self): + """Save curation decisions""" + with open(self.decisions_file, 'w') as f: + json.dump(self.decisions, f, indent=2) + + def setup_gui(self): + """Setup the GUI""" + self.root = tk.Tk() + self.root.title("NUpbr Image Curator") + self.root.geometry("1200x900") + + # Main frame + main_frame = ttk.Frame(self.root) + main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) + + # Control panel + control_frame = ttk.Frame(main_frame) + control_frame.pack(fill=tk.X, pady=(0, 10)) + + # Image info + self.info_label = ttk.Label(control_frame, text="", font=("Arial", 12)) + self.info_label.pack(side=tk.LEFT) + + # Statistics + self.stats_label = ttk.Label(control_frame, text="", font=("Arial", 10)) + self.stats_label.pack(side=tk.RIGHT) + + # Navigation frame + nav_frame = ttk.Frame(main_frame) + nav_frame.pack(fill=tk.X, pady=(0, 10)) + + ttk.Button(nav_frame, text="← Previous", command=self.prev_image).pack(side=tk.LEFT) + ttk.Button(nav_frame, text="Next →", command=self.next_image).pack(side=tk.LEFT, padx=(5, 0)) + + # Decision buttons + decision_frame = ttk.Frame(nav_frame) + decision_frame.pack(side=tk.RIGHT) + + ttk.Button(decision_frame, text="✓ Accept (A)", command=self.accept_image, + style="Accept.TButton").pack(side=tk.LEFT, padx=(0, 5)) + ttk.Button(decision_frame, text="✗ Reject (R)", command=self.reject_image, + style="Reject.TButton").pack(side=tk.LEFT, padx=(0, 5)) + ttk.Button(decision_frame, text="Skip (S)", command=self.skip_image).pack(side=tk.LEFT) + + # Toggle controls + toggle_frame = ttk.Frame(nav_frame) + toggle_frame.pack() + + ttk.Button(toggle_frame, text="Toggle Boxes (T)", command=self.toggle_boxes).pack(side=tk.LEFT, padx=(0, 5)) + + # Image display frame + self.image_frame = ttk.Frame(main_frame) + self.image_frame.pack(fill=tk.BOTH, expand=True) + + # Canvas for image display + self.canvas = tk.Canvas(self.image_frame, bg='black') + self.canvas.pack(fill=tk.BOTH, expand=True) + + # Setup key bindings + self.root.bind('', self.on_key_press) + self.root.focus_set() + + # Configure styles + style = ttk.Style() + style.configure("Accept.TButton", foreground="green") + style.configure("Reject.TButton", foreground="red") + + # Load first image + if self.images: + self.show_current_image() + + def load_annotations(self, image_name): + """Load YOLO format annotations for an image""" + ann_file = self.annotations_dir / f"{image_name}.txt" + annotations = [] + + if ann_file.exists(): + with open(ann_file, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) == 5: + class_id = int(parts[0]) + x_center = float(parts[1]) + y_center = float(parts[2]) + width = float(parts[3]) + height = float(parts[4]) + annotations.append((class_id, x_center, y_center, width, height)) + + return annotations + + def draw_bounding_boxes(self, image, annotations, img_width, img_height): + """Draw bounding boxes on image""" + if not self.show_boxes or not annotations: + return image + + # Convert PIL image to OpenCV format + cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) + + for class_id, x_center, y_center, width, height in annotations: + # Convert YOLO format to pixel coordinates + x1 = int((x_center - width/2) * img_width) + y1 = int((y_center - height/2) * img_height) + x2 = int((x_center + width/2) * img_width) + y2 = int((y_center + height/2) * img_height) + + # Get color and class name + color = self.class_colors.get(class_id, (255, 255, 255)) + class_name = self.class_names.get(class_id, f"Class_{class_id}") + + # Draw bounding box + cv2.rectangle(cv_image, (x1, y1), (x2, y2), color, 2) + + # Draw label background + label = f"{class_name}" + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(cv_image, (x1, y1 - label_height - 10), (x1 + label_width, y1), color, -1) + + # Draw label text + cv2.putText(cv_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + + # Convert back to PIL format + return Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)) + + def show_current_image(self): + """Display the current image with annotations""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Load image + img_path = self.raw_dir / f"{image_name}.png" + if not img_path.exists(): + self.info_label.config(text=f"Image not found: {img_path}") + return + + image = Image.open(img_path) + original_width, original_height = image.size + + # Load annotations + annotations = self.load_annotations(image_name) + + # Draw bounding boxes + image_with_boxes = self.draw_bounding_boxes(image, annotations, original_width, original_height) + + # Resize image to fit canvas + canvas_width = self.canvas.winfo_width() + canvas_height = self.canvas.winfo_height() + + if canvas_width > 1 and canvas_height > 1: # Avoid division by zero + # Calculate scaling factor to fit image in canvas while maintaining aspect ratio + scale_x = canvas_width / original_width + scale_y = canvas_height / original_height + scale = min(scale_x, scale_y, 1.0) # Don't upscale + + new_width = int(original_width * scale) + new_height = int(original_height * scale) + + image_with_boxes = image_with_boxes.resize((new_width, new_height), Image.Resampling.LANCZOS) + + # Display image + self.photo = ImageTk.PhotoImage(image_with_boxes) + self.canvas.delete("all") + self.canvas.create_image(canvas_width//2, canvas_height//2, image=self.photo) + + # Update info + decision = self.decisions.get(image_name, "pending") + decision_text = f" [{decision.upper()}]" if decision != "pending" else "" + self.info_label.config(text=f"Image {self.current_index + 1}/{len(self.images)}: {image_name}{decision_text} | " + f"Annotations: {len(annotations)}") + + # Update statistics + self.update_stats_display() + + def update_stats_display(self): + """Update the statistics display""" + progress = (self.stats['reviewed'] / self.stats['total']) * 100 if self.stats['total'] > 0 else 0 + self.stats_label.config(text=f"Progress: {progress:.1f}% | " + f"Accepted: {self.stats['accepted']} | " + f"Rejected: {self.stats['rejected']} | " + f"Reviewed: {self.stats['reviewed']}/{self.stats['total']}") + + def accept_image(self): + """Accept the current image and copy files""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Update decision + if image_name not in self.decisions or self.decisions[image_name] != 'accepted': + if image_name in self.decisions and self.decisions[image_name] == 'rejected': + self.stats['rejected'] -= 1 + elif image_name not in self.decisions: + self.stats['reviewed'] += 1 + + self.decisions[image_name] = 'accepted' + self.stats['accepted'] += 1 + + # Copy files + self.copy_image_files(image_name) + + self.save_decisions() + self.next_image() + + def reject_image(self): + """Reject the current image""" + if not self.images: + return + + image_name = self.images[self.current_index] + + # Update decision + if image_name not in self.decisions or self.decisions[image_name] != 'rejected': + if image_name in self.decisions and self.decisions[image_name] == 'accepted': + self.stats['accepted'] -= 1 + # Remove files from output if they exist + self.remove_image_files(image_name) + elif image_name not in self.decisions: + self.stats['reviewed'] += 1 + + self.decisions[image_name] = 'rejected' + self.stats['rejected'] += 1 + + self.save_decisions() + self.next_image() + + def skip_image(self): + """Skip the current image without decision""" + self.next_image() + + def copy_image_files(self, image_name): + """Copy image files to output directory""" + files_to_copy = [ + (self.raw_dir / f"{image_name}.png", self.output_raw_dir / f"{image_name}.png"), + (self.annotations_dir / f"{image_name}.txt", self.output_annotations_dir / f"{image_name}.txt"), + ] + + # Optional files + seg_file = self.seg_dir / f"{image_name}.png" + if seg_file.exists(): + files_to_copy.append((seg_file, self.output_seg_dir / f"{image_name}.png")) + + meta_file = self.meta_dir / f"{image_name}.yaml" + if meta_file.exists(): + files_to_copy.append((meta_file, self.output_meta_dir / f"{image_name}.yaml")) + + for src, dst in files_to_copy: + if src.exists(): + shutil.copy2(src, dst) + + def remove_image_files(self, image_name): + """Remove image files from output directory""" + files_to_remove = [ + self.output_raw_dir / f"{image_name}.png", + self.output_annotations_dir / f"{image_name}.txt", + self.output_seg_dir / f"{image_name}.png", + self.output_meta_dir / f"{image_name}.yaml" + ] + + for file_path in files_to_remove: + if file_path.exists(): + file_path.unlink() + + def toggle_boxes(self): + """Toggle bounding box visibility""" + self.show_boxes = not self.show_boxes + self.show_current_image() + + def prev_image(self): + """Go to previous image""" + if self.images and self.current_index > 0: + self.current_index -= 1 + self.show_current_image() + + def next_image(self): + """Go to next image""" + if self.images and self.current_index < len(self.images) - 1: + self.current_index += 1 + self.show_current_image() + + def on_key_press(self, event): + """Handle key press events""" + key = event.keysym.lower() + + if key in ['left', 'up']: + self.prev_image() + elif key in ['right', 'down']: + self.next_image() + elif key in ['a', 'space']: + self.accept_image() + elif key in ['r', 'delete']: + self.reject_image() + elif key == 's': + self.skip_image() + elif key == 't': + self.toggle_boxes() + elif key in ['q', 'escape']: + self.quit() + + def quit(self): + """Quit the application""" + self.save_decisions() + self.root.quit() + + def run(self): + """Run the curator""" + try: + self.root.mainloop() + except KeyboardInterrupt: + self.quit() + + +def main(): + parser = argparse.ArgumentParser(description="Image Curator for NUpbr Dataset") + parser.add_argument("--input", "-i", required=True, help="Input directory (e.g., outputs/run_6)") + parser.add_argument("--output", "-o", required=True, help="Output directory for curated dataset") + + args = parser.parse_args() + + input_dir = Path(args.input) + output_dir = Path(args.output) + + if not input_dir.exists(): + print(f"Error: Input directory does not exist: {input_dir}") + return + + if not (input_dir / "raw").exists(): + print(f"Error: Raw images directory not found: {input_dir / 'raw'}") + return + + if not (input_dir / "annotations").exists(): + print(f"Error: Annotations directory not found: {input_dir / 'annotations'}") + return + + print(f"Input directory: {input_dir}") + print(f"Output directory: {output_dir}") + + curator = ImageCurator(input_dir, output_dir) + curator.run() + + +if __name__ == "__main__": + main() From 284cd7eb10697971cd63731ce27339dfa48b56f0 Mon Sep 17 00:00:00 2001 From: Joe Bailey Date: Fri, 16 Jan 2026 10:02:03 +1100 Subject: [PATCH 10/11] mostly equisolid projection --- pbr/config/scene_config.py | 2 +- pbr/util.py | 170 ++++++++++++++++++++++++++++++++++++- 2 files changed, 169 insertions(+), 3 deletions(-) diff --git a/pbr/config/scene_config.py b/pbr/config/scene_config.py index 27784cf..5c3192d 100644 --- a/pbr/config/scene_config.py +++ b/pbr/config/scene_config.py @@ -256,7 +256,7 @@ def configure_scene(): "camera": { **random.choice( [ - # {"type": "EQUISOLID", "focal_length": 10.5, "fov": pi}, + {"type": "EQUISOLID", "focal_length": 10.5, "fov": pi}, {"type": "RECTILINEAR", "fov": 1.6}, ] ), diff --git a/pbr/util.py b/pbr/util.py index 6f2f507..75ab58b 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -407,6 +407,62 @@ def get_robot_bounding_box(robot_obj, cam, scene): print(f"Robot {robot_prefix} combined bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") return (min_x, min_y, max_x, max_y) +def get_robot_bounding_box_panoramic(obj, h, w, lens, cam, scene): + + # Extract robot number from the object name (e.g., "r6_Torso" -> "r6") + robot_prefix = obj.name.split('_')[0] # e.g., "r6" + + # Find all objects that belong to this robot + robot_parts = [] + for obj in bpy.data.objects: + if obj.name.startswith(robot_prefix + '_'): + robot_parts.append(obj) + + bbox_corners = [] + screen_positions = [] + + for part in robot_parts: + + part_center = (cam.matrix_world.inverted() @ part.matrix_world @ Vector(part.location)) + part_center.normalize() + if part_center.z > 0: + continue + + for corner in part.bound_box: + + bbox_corner = (cam.matrix_world.inverted() @ part.matrix_world @ Vector(corner)) + bbox_corner.normalize() + + phi = math.atan2(bbox_corner.y, bbox_corner.x) + l = (bbox_corner.x**2 + bbox_corner.y**2)**(1/2) + l = np.clip(l, -0.999, 0.999) + theta = math.asin(l) + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + bbox_corners.append(bbox_corner) + screen_positions.append(Vector((x, y))) + + if not bbox_corners: + print("no valid corners for robot " + obj.name) + return None + + min_x = min(screen_pos.x for screen_pos in screen_positions) + max_x = max(screen_pos.x for screen_pos in screen_positions) + min_y = min(screen_pos.y for screen_pos in screen_positions) + max_y = max(screen_pos.y for screen_pos in screen_positions) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + + def get_bounding_box(obj): """Calculates 2D bounding box for YOLO format""" import bpy_extras @@ -445,6 +501,70 @@ def get_bounding_box(obj): print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") return (min_x, min_y, max_x, max_y) +def get_bounding_box_panoramic(obj): + import bpy_extras + + cam = bpy.context.scene.camera + scene = bpy.context.scene + + lens = cam.data.cycles.fisheye_lens + + aspect_ratio = bpy.context.scene.render.resolution_x / bpy.context.scene.render.resolution_y + if cam.data.sensor_fit == 'VERTICAL': + h = cam.data.sensor_height + w = aspect_ratio * h + else: + w = cam.data.sensor_width + h = w / aspect_ratio + + # Special handling for ball objects (spheres) + if obj.name == "Ball": + return get_sphere_bounding_box_panoramic(obj, h, w, lens, cam, scene) + + # Special handling for robot objects - check if this looks like a robot part + # Robot parts follow pattern "r_" (e.g., "r6_Torso") + if '_' in obj.name and obj.name.split('_')[0].startswith('r') and obj.name.split('_')[0][1:].isdigit(): + return get_robot_bounding_box_panoramic(obj, h, w, lens, cam, scene) + + bbox_corners = [] + screen_positions = [] + + for corner in obj.bound_box: + + bbox_corner = (cam.matrix_world.inverted() @ corner.matrix_world @ Vector(corner)) + bbox_corner.normalize() + + if bbox_corner.z > 0: + continue + + phi = math.atan2(bbox_corner.y, bbox_corner.x) + l = (bbox_corner.x**2 + bbox_corner.y**2)**(1/2) + theta = math.asin(l) + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + bbox_corners.append(bbox_corner) + screen_positions.append(Vector((x, y))) + + if not bbox_corners: + print("no valid corners for " + obj.name) + return None + + min_x = min(screen_pos.x for screen_pos in screen_positions) + max_x = max(screen_pos.x for screen_pos in screen_positions) + min_y = min(screen_pos.y for screen_pos in screen_positions) + max_y = max(screen_pos.y for screen_pos in screen_positions) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + def get_sphere_bounding_box(obj, cam, scene): """Calculates accurate 2D bounding box for spherical objects""" import bpy_extras @@ -500,11 +620,57 @@ def get_sphere_bounding_box(obj, cam, scene): return (min_x, min_y, max_x, max_y) +def get_sphere_bounding_box_panoramic(obj, h, w, lens, cam, scene): + + radius = max(obj.dimensions) / 2.0 + + center = (cam.matrix_world.inverted() @ Vector(obj.location)) + center.normalize() + + print(center.z) + if center.z > 0: + print("Ball " + obj.name + " behind camera") + return None + + phi = math.atan2(center.y, center.x) + l = (center.x**2 + center.y**2)**(1/2) + theta = math.asin(l) + + world_center = obj.matrix_world.translation + camera_pos = cam.matrix_world.translation + distance = (world_center - camera_pos).length + + apparent_diameter = (radius * 2.0 / distance) * lens * (scene.render.resolution_x / w) + radius_pixels = apparent_diameter / 2.0 + + # Equisolid projection + r = 2.0 * lens * math.sin(theta / 2) + + u = r * math.cos(phi) / w + 0.5 + v = r * math.sin(phi) / h + 0.5 + + x = u * scene.render.resolution_x + y = v * scene.render.resolution_y + + min_x = x - (radius_pixels) + max_x = x + (radius_pixels) + min_y = y - (radius_pixels) + max_y = y + (radius_pixels) + + print(f"{obj.name} bbox: ({min_x:.1f}, {min_y:.1f}, {max_x:.1f}, {max_y:.1f})") + return (min_x, min_y, max_x, max_y) + def write_annotations(obj, class_id=0): """Writes YOLO annotations for the object""" scene = bpy.context.scene - bbox_result = get_bounding_box(obj) - + + cam = bpy.context.scene.camera + print(cam.data.type) + if cam.data.type == "PERSP": + bbox_result = get_bounding_box(obj) + else: + bbox_result = get_bounding_box_panoramic(obj) + # Check if bounding box calculation failed if bbox_result is None: print(f"Failed to calculate bounding box for {obj.name}") From dfc101510485ebe550fdbb7afc457553e13ae28c Mon Sep 17 00:00:00 2001 From: Joe Bailey Date: Thu, 12 Feb 2026 19:21:50 +1100 Subject: [PATCH 11/11] added check for occluded balls --- pbr/util.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pbr/util.py b/pbr/util.py index 75ab58b..2aa4b49 100644 --- a/pbr/util.py +++ b/pbr/util.py @@ -596,6 +596,13 @@ def get_sphere_bounding_box(obj, cam, scene): # Calculate distance from camera to ball camera_pos = cam.matrix_world.translation distance = (world_center - camera_pos).length + + # Check if ball is occluded + cam_to_ball = world_center - camera_pos + ray_hit = scene.ray_cast(bpy.context.evaluated_depsgraph_get(), cam.matrix_world.translation + cam_to_ball * 0.20, cam_to_ball, distance=10) + + if ray_hit[4] != obj: + return None # Simple perspective projection for radius # Use camera focal length to calculate apparent size @@ -639,6 +646,13 @@ def get_sphere_bounding_box_panoramic(obj, h, w, lens, cam, scene): world_center = obj.matrix_world.translation camera_pos = cam.matrix_world.translation distance = (world_center - camera_pos).length + + # Check if ball is occluded + cam_to_ball = world_center - camera_pos + ray_hit = scene.ray_cast(bpy.context.evaluated_depsgraph_get(), cam.matrix_world.translation + cam_to_ball * 0.20, cam_to_ball, distance=10) + + if ray_hit[4] != obj: + return None apparent_diameter = (radius * 2.0 / distance) * lens * (scene.render.resolution_x / w) radius_pixels = apparent_diameter / 2.0