diff --git a/.gitignore b/.gitignore index 4045db012e..d526ac0f87 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,7 @@ yolo11n.pt *mobileclip* /results -**/cpp/result +**/result CLAUDE.MD /assets/teleop_certs/ diff --git a/dimos/hardware/sensors/camera/webcam.py b/dimos/hardware/sensors/camera/webcam.py index cfd1a080a0..61717b699e 100644 --- a/dimos/hardware/sensors/camera/webcam.py +++ b/dimos/hardware/sensors/camera/webcam.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass, field from functools import cache import threading import time from typing import Literal import cv2 +from pydantic import Field from reactivex import create from reactivex.observable import Observable @@ -28,13 +28,12 @@ from dimos.utils.reactive import backpressure -@dataclass class WebcamConfig(CameraConfig): camera_index: int = 0 # /dev/videoN width: int = 640 height: int = 480 fps: float = 15.0 - camera_info: CameraInfo = field(default_factory=CameraInfo) + camera_info: CameraInfo = Field(default_factory=CameraInfo) frame_id_prefix: str | None = None stereo_slice: Literal["left", "right"] | None = None # For stereo cameras @@ -167,6 +166,10 @@ def _capture_loop(self) -> None: @property def camera_info(self) -> CameraInfo: - return self.config.camera_info + info = self.config.camera_info + if info.width == 0 or info.height == 0: + info.width = self.config.width + info.height = self.config.height + return info def emit(self, image: Image) -> None: ... diff --git a/dimos/perception/slam/orbslam3/README.md b/dimos/perception/slam/orbslam3/README.md new file mode 100644 index 0000000000..a38fe700e8 --- /dev/null +++ b/dimos/perception/slam/orbslam3/README.md @@ -0,0 +1,9 @@ +# ORB-SLAM3 Native Module + +Visual SLAM module wrapping [ORB-SLAM3](https://github.com/UZ-SLAMLab/ORB_SLAM3) as a native subprocess. + +The C++ binary lives in a separate GPL-3.0 repo ([dimos-orb-slam3](https://github.com/dimensionalOS/dimos-orb-slam3)) and is pulled in at build time via Nix. + +## Known Issues + +- **Transform / trajectory reconstruction mismatch**: The reconstructed trajectory does not match ground-truth poses. There is a suspected coordinate-frame or transform-composition issue causing output to diverge from base truth. Needs investigation. diff --git a/dimos/perception/slam/orbslam3/blueprints/webcam.py b/dimos/perception/slam/orbslam3/blueprints/webcam.py new file mode 100644 index 0000000000..d620780b02 --- /dev/null +++ b/dimos/perception/slam/orbslam3/blueprints/webcam.py @@ -0,0 +1,26 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dimos.core.blueprints import autoconnect +from dimos.hardware.sensors.camera.module import CameraModule +from dimos.perception.slam.orbslam3.module import OrbSlam3 +from dimos.visualization.rerun.bridge import _resolve_viewer_mode, rerun_bridge + +orbslam3_webcam = autoconnect( + rerun_bridge(viewer_mode=_resolve_viewer_mode()), + CameraModule.blueprint(), + OrbSlam3.blueprint(sensor_mode="MONOCULAR"), +).global_config(n_workers=3) + +__all__ = ["orbslam3_webcam"] diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py new file mode 100644 index 0000000000..3a2064bbbe --- /dev/null +++ b/dimos/perception/slam/orbslam3/module.py @@ -0,0 +1,118 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python NativeModule wrapper for ORB-SLAM3 visual SLAM. + +Wraps ORB-SLAM3 as a native subprocess that receives camera images and +outputs camera pose estimates (odometry). + +Usage:: + + from dimos.perception.slam.orbslam3.module import OrbSlam3 + from dimos.core.blueprints import autoconnect + + autoconnect( + OrbSlam3.blueprint(sensor_mode=SensorMode.MONOCULAR), + SomeConsumer.blueprint(), + ).build().loop() +""" + +from __future__ import annotations + +import enum +from pathlib import Path +from typing import TYPE_CHECKING + +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.nav_msgs.Path import Path as NavPath +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.spec import perception + +_MODULE_DIR = Path(__file__).parent + + +class SensorMode(enum.StrEnum): + MONOCULAR = "MONOCULAR" + STEREO = "STEREO" + RGBD = "RGBD" + IMU_MONOCULAR = "IMU_MONOCULAR" + IMU_STEREO = "IMU_STEREO" + IMU_RGBD = "IMU_RGBD" + + +class OrbSlam3Config(NativeModuleConfig): + """Config for the ORB-SLAM3 visual SLAM native module.""" + + cwd: str | None = str(_MODULE_DIR) + executable: str = "result/bin/orbslam3_native" + build_command: str | None = ( + "nix build github:dimensionalOS/dimos-orb-slam3/v0.2.0 --no-write-lock-file" + ) + + # ORB-SLAM3 sensor mode + sensor_mode: SensorMode = SensorMode.MONOCULAR + + # Pangolin viewer (disable for headless) + use_viewer: bool = False + + # Frame IDs for output messages + frame_id: str = "world" + child_frame_id: str = "camera_link" + + # Camera settings YAML (absolute path, or override with your own calibration) + settings_path: str = str( + _MODULE_DIR / "result" / "share" / "orbslam3" / "config" / "RealSense_D435i.yaml" + ) + + # Vocabulary path (None = use compiled-in default from nix build) + vocab_path: str | None = None + + # How often to publish map points and keyframe path (every N frames) + map_publish_interval: int = 10 + + +class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): + """ORB-SLAM3 visual SLAM module. + + Ports: + color_image (In[Image]): Camera frames to track. + odometry (Out[Odometry]): Camera pose as nav_msgs.Odometry. + keypoints_image (Out[Image]): Color image with tracked keypoints overlay. + map_points (Out[PointCloud2]): Sparse 3D map points. + keyframe_path (Out[NavPath]): Keyframe pose graph as a path. + """ + + default_config = OrbSlam3Config + color_image: In[Image] + odometry: Out[Odometry] + keypoints_image: Out[Image] + map_points: Out[PointCloud2] + keyframe_path: Out[NavPath] + + +orbslam3_module = OrbSlam3.blueprint + +__all__ = [ + "OrbSlam3", + "OrbSlam3Config", + "SensorMode", + "orbslam3_module", +] + +# Verify protocol port compliance (mypy will flag missing ports) +if TYPE_CHECKING: + OrbSlam3() diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 5910093d61..93af60aa27 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -56,6 +56,7 @@ "mid360-fastlio": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio", "mid360-fastlio-voxels": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels", "mid360-fastlio-voxels-native": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels_native", + "orbslam3-webcam": "dimos.perception.slam.orbslam3.blueprints.webcam:orbslam3_webcam", "teleop-phone": "dimos.teleop.phone.blueprints:teleop_phone", "teleop-phone-go2": "dimos.teleop.phone.blueprints:teleop_phone_go2", "teleop-phone-go2-fleet": "dimos.teleop.phone.blueprints:teleop_phone_go2_fleet", @@ -144,6 +145,7 @@ "object-tracker2-d": "dimos.perception.object_tracker_2d", "object-tracker3-d": "dimos.perception.object_tracker_3d", "object-tracking": "dimos.perception.object_tracker", + "orbslam3-module": "dimos.perception.slam.orbslam3.module", "osm-skill": "dimos.agents.skills.osm", "patrolling-module": "dimos.navigation.patrolling.module", "perceive-loop-skill": "dimos.perception.perceive_loop_skill",