From ea780596abb5f5ea543766ae01a14e534e40208d Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Tue, 17 Mar 2026 21:54:28 +0800 Subject: [PATCH 1/8] feat(perception): add ORB-SLAM3 native module (phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ORB-SLAM3 visual SLAM as a NativeModule under perception/slam/, following the same pattern as FastLIO2. Phase 1: nix build from source, binary initializes System and blocks — no LCM I/O yet. - Python module with OrbSlam3Config and perception.Odometry protocol - C++ wrapper using dimos::NativeModule for CLI arg parsing - Nix flake that builds ORB-SLAM3 from github:thuvasooriya/orb-slam3 - Default RealSense D435i camera settings - Vocab auto-resolved from nix store path --- .../slam/orbslam3/config/RealSense_D435i.yaml | 61 ++++++++ .../slam/orbslam3/cpp/CMakeLists.txt | 73 ++++++++++ dimos/perception/slam/orbslam3/cpp/flake.lock | 96 +++++++++++++ dimos/perception/slam/orbslam3/cpp/flake.nix | 134 ++++++++++++++++++ dimos/perception/slam/orbslam3/cpp/main.cpp | 108 ++++++++++++++ dimos/perception/slam/orbslam3/module.py | 104 ++++++++++++++ 6 files changed, 576 insertions(+) create mode 100644 dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml create mode 100644 dimos/perception/slam/orbslam3/cpp/CMakeLists.txt create mode 100644 dimos/perception/slam/orbslam3/cpp/flake.lock create mode 100644 dimos/perception/slam/orbslam3/cpp/flake.nix create mode 100644 dimos/perception/slam/orbslam3/cpp/main.cpp create mode 100644 dimos/perception/slam/orbslam3/module.py diff --git a/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml b/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml new file mode 100644 index 0000000000..bc170c6fb6 --- /dev/null +++ b/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml @@ -0,0 +1,61 @@ +#-------------------------------------------------------------------------------------------- +# Camera Parameters. Adjust them! +#-------------------------------------------------------------------------------------------- +File.version: "1.0" + +Camera.type: "PinHole" + +# Left Camera calibration and distortion parameters (OpenCV) +Camera1.fx: 382.613 +Camera1.fy: 382.613 +Camera1.cx: 320.183 +Camera1.cy: 236.455 + +# distortion parameters +Camera1.k1: 0.0 +Camera1.k2: 0.0 +Camera1.p1: 0.0 +Camera1.p2: 0.0 + +# Camera resolution +Camera.width: 640 +Camera.height: 480 + +# Camera frames per second +Camera.fps: 30 + +# Color order of the images (0: BGR, 1: RGB. It is ignored if images are grayscale) +Camera.RGB: 1 + +#-------------------------------------------------------------------------------------------- +# ORB Parameters +#-------------------------------------------------------------------------------------------- +# ORB Extractor: Number of features per image +ORBextractor.nFeatures: 1250 + +# ORB Extractor: Scale factor between levels in the scale pyramid +ORBextractor.scaleFactor: 1.2 + +# ORB Extractor: Number of levels in the scale pyramid +ORBextractor.nLevels: 8 + +# ORB Extractor: Fast threshold +# Image is divided in a grid. At each cell FAST are extracted imposing a minimum response. +# Firstly we impose iniThFAST. If no corners are detected we impose a lower value minThFAST +# You can lower these values if your images have low contrast +ORBextractor.iniThFAST: 20 +ORBextractor.minThFAST: 7 + +#-------------------------------------------------------------------------------------------- +# Viewer Parameters +#-------------------------------------------------------------------------------------------- +Viewer.KeyFrameSize: 0.05 +Viewer.KeyFrameLineWidth: 1.0 +Viewer.GraphLineWidth: 0.9 +Viewer.PointSize: 2.0 +Viewer.CameraSize: 0.08 +Viewer.CameraLineWidth: 3.0 +Viewer.ViewpointX: 0.0 +Viewer.ViewpointY: -0.7 +Viewer.ViewpointZ: -3.5 +Viewer.ViewpointF: 500.0 diff --git a/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt b/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt new file mode 100644 index 0000000000..37cc90b995 --- /dev/null +++ b/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt @@ -0,0 +1,73 @@ +cmake_minimum_required(VERSION 3.14) +project(orbslam3_native CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") + +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "${CMAKE_SOURCE_DIR}/result" CACHE PATH "" FORCE) +endif() + +# Ensure the binary can find ORB-SLAM3 libraries at runtime +if(ORBSLAM3_DIR) + set(CMAKE_INSTALL_RPATH "${ORBSLAM3_DIR}/lib") + set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) +endif() + +# ORB-SLAM3 installed prefix (pass -DORBSLAM3_DIR=) +if(NOT ORBSLAM3_DIR) + message(FATAL_ERROR "ORBSLAM3_DIR not set. Pass -DORBSLAM3_DIR=") +endif() + +# dimos-lcm C++ message headers (for dimos_native_module.hpp dependencies) +include(FetchContent) +FetchContent_Declare(dimos_lcm + GIT_REPOSITORY https://github.com/dimensionalOS/dimos-lcm.git + GIT_TAG main + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(dimos_lcm) + +# dimos_native_module.hpp location +if(NOT DIMOS_NATIVE_MODULE_DIR) + set(DIMOS_NATIVE_MODULE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../hardware/sensors/lidar/common) +endif() + +# Dependencies +find_package(OpenCV REQUIRED) +find_package(Eigen3 REQUIRED) +find_package(Pangolin REQUIRED) +find_package(PkgConfig REQUIRED) +pkg_check_modules(LCM REQUIRED lcm) + +# Compile-time default vocab path (set by nix flake) +if(ORBSLAM3_DEFAULT_VOCAB) + add_compile_definitions(ORBSLAM3_DEFAULT_VOCAB="${ORBSLAM3_DEFAULT_VOCAB}") +endif() + +add_executable(orbslam3_native main.cpp) + +target_include_directories(orbslam3_native PRIVATE + ${ORBSLAM3_DIR} # for deps/DBoW2/... deps/g2o/... + ${ORBSLAM3_DIR}/include # for System.h etc. + ${ORBSLAM3_DIR}/include/CameraModels + ${ORBSLAM3_DIR}/deps/Sophus # for sophus/se3.hpp etc. + ${dimos_lcm_SOURCE_DIR}/generated/cpp_lcm_msgs + ${DIMOS_NATIVE_MODULE_DIR} + ${LCM_INCLUDE_DIRS} + ${EIGEN3_INCLUDE_DIR} + ${OpenCV_INCLUDE_DIRS} +) + +target_link_libraries(orbslam3_native PRIVATE + ${ORBSLAM3_DIR}/lib/liborb-slam3.so + ${ORBSLAM3_DIR}/lib/libDBoW2.so + ${ORBSLAM3_DIR}/lib/libg2o.so + ${OpenCV_LIBS} + ${Pangolin_LIBRARIES} + -lboost_serialization + -lcrypto +) + +install(TARGETS orbslam3_native DESTINATION bin) diff --git a/dimos/perception/slam/orbslam3/cpp/flake.lock b/dimos/perception/slam/orbslam3/cpp/flake.lock new file mode 100644 index 0000000000..531f35f890 --- /dev/null +++ b/dimos/perception/slam/orbslam3/cpp/flake.lock @@ -0,0 +1,96 @@ +{ + "nodes": { + "dimos-lcm": { + "flake": false, + "locked": { + "lastModified": 1769774949, + "narHash": "sha256-icRK7jerqNlwK1WZBrnIP04I2WozzFqTD7qsmnPxQuo=", + "owner": "dimensionalOS", + "repo": "dimos-lcm", + "rev": "0aa72b7b1bd3a65f50f5c03485ee9b728df56afe", + "type": "github" + }, + "original": { + "owner": "dimensionalOS", + "ref": "main", + "repo": "dimos-lcm", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1736012469, + "narHash": "sha256-/qlNWm/IEVVH7GfgAIyP6EsVZI6zjAx1cV5zNyrs+rI=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d", + "type": "github" + }, + "original": { + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d", + "type": "github" + } + }, + "orb-slam3-src": { + "flake": false, + "locked": { + "lastModified": 1736936489, + "narHash": "sha256-as8Wk9/6E+1bN0xJIfTn4SaCbz2fSXz2iJEuqIJsi88=", + "owner": "thuvasooriya", + "repo": "orb-slam3", + "rev": "a48413f8fdc47f7ca8354e83dbcfb85c6f8da7ae", + "type": "github" + }, + "original": { + "owner": "thuvasooriya", + "repo": "orb-slam3", + "type": "github" + } + }, + "root": { + "inputs": { + "dimos-lcm": "dimos-lcm", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "orb-slam3-src": "orb-slam3-src" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/dimos/perception/slam/orbslam3/cpp/flake.nix b/dimos/perception/slam/orbslam3/cpp/flake.nix new file mode 100644 index 0000000000..a36d62e99a --- /dev/null +++ b/dimos/perception/slam/orbslam3/cpp/flake.nix @@ -0,0 +1,134 @@ +{ + description = "ORB-SLAM3 native module for dimos"; + + inputs = { + # Pin to a nixpkgs that still has pangolin (removed in later revisions) + nixpkgs.url = "github:NixOS/nixpkgs/8f3e1f807051e32d8c95cd12b9b421623850a34d"; + flake-utils.url = "github:numtide/flake-utils"; + dimos-lcm = { + url = "github:dimensionalOS/dimos-lcm/main"; + flake = false; + }; + orb-slam3-src = { + url = "github:thuvasooriya/orb-slam3"; + flake = false; + }; + }; + + outputs = { self, nixpkgs, flake-utils, dimos-lcm, orb-slam3-src, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + + # Shared dimos native module header (dimos_native_module.hpp) + dimos-native-module-dir = ../../../../hardware/sensors/lidar/common; + + # Build ORB-SLAM3 library from source + orb-slam3 = pkgs.stdenv.mkDerivation { + pname = "orb-slam3"; + version = "1.0.0"; + src = orb-slam3-src; + + nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; + buildInputs = [ + pkgs.opencv + pkgs.eigen + pkgs.pangolin + pkgs.boost + pkgs.openssl + pkgs.glew + pkgs.llvmPackages.openmp + ]; + + # Skip default cmake configure (we need to build DBoW2 first) + dontConfigure = true; + + # Strip -march=native for nix reproducibility + postPatch = '' + sed -i 's/-march=native//g' CMakeLists.txt deps/g2o/CMakeLists.txt + ''; + + buildPhase = '' + # Build DBoW2 first (main CMakeLists expects it pre-built) + cd deps/DBoW2 + mkdir -p build && cd build + cmake .. -DCMAKE_BUILD_TYPE=Release + make -j$NIX_BUILD_CORES + cd ../../.. + + # Build main ORB-SLAM3 (g2o is built via add_subdirectory) + # Only build the library target, skip example binaries + mkdir -p build && cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_RPATH=$out/lib \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON + make -j$NIX_BUILD_CORES orb-slam3 g2o + ''; + + installPhase = '' + cd .. # back to source root from build/ + + mkdir -p $out/lib + mkdir -p $out/include/CameraModels + mkdir -p $out/share/orb-slam3/Vocabulary + + # Libraries (keep original name to match SONAME) + cp lib/liborb-slam3.so $out/lib/ + cp deps/DBoW2/lib/libDBoW2.so $out/lib/ + find . -name 'libg2o.so' -exec cp {} $out/lib/ \; + + # Headers + cp include/*.h $out/include/ + cp include/CameraModels/*.h $out/include/CameraModels/ + + # Full deps/ tree for transitive header includes + mkdir -p $out/deps + cp -r deps/DBoW2 $out/deps/DBoW2 + cp -r deps/g2o $out/deps/g2o + cp -r deps/Sophus $out/deps/Sophus + # Remove build artifacts from deps + rm -rf $out/deps/*/build $out/deps/*/lib + + # Vocabulary (only .tar.gz is tracked in git) + if [ ! -f Vocabulary/ORBvoc.txt ]; then + tar -xzf Vocabulary/ORBvoc.txt.tar.gz -C Vocabulary/ + fi + cp Vocabulary/ORBvoc.txt $out/share/orb-slam3/Vocabulary/ + ''; + }; + + # Build our thin wrapper binary + orbslam3_native = pkgs.stdenv.mkDerivation { + pname = "orbslam3_native"; + version = "0.1.0"; + src = ./.; + + nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; + buildInputs = [ + orb-slam3 + pkgs.opencv + pkgs.eigen + pkgs.pangolin + pkgs.boost + pkgs.openssl + pkgs.lcm + pkgs.glib + pkgs.glew + ]; + + cmakeFlags = [ + "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" + "-DFETCHCONTENT_SOURCE_DIR_DIMOS_LCM=${dimos-lcm}" + "-DORBSLAM3_DIR=${orb-slam3}" + "-DDIMOS_NATIVE_MODULE_DIR=${dimos-native-module-dir}" + "-DORBSLAM3_DEFAULT_VOCAB=${orb-slam3}/share/orb-slam3/Vocabulary/ORBvoc.txt" + ]; + }; + in { + packages = { + default = orbslam3_native; + inherit orbslam3_native orb-slam3; + }; + }); +} diff --git a/dimos/perception/slam/orbslam3/cpp/main.cpp b/dimos/perception/slam/orbslam3/cpp/main.cpp new file mode 100644 index 0000000000..8b9d702a4e --- /dev/null +++ b/dimos/perception/slam/orbslam3/cpp/main.cpp @@ -0,0 +1,108 @@ +// Copyright 2026 Dimensional Inc. +// SPDX-License-Identifier: Apache-2.0 +// +// ORB-SLAM3 native module for dimos NativeModule framework. +// +// Phase 1: Initializes ORB_SLAM3::System, loads vocabulary, and blocks +// until SIGTERM. No LCM pub/sub yet — topics come in phase 2. +// +// Usage: +// ./orbslam3_native \ +// --settings_path /path/to/RealSense_D435i.yaml \ +// --sensor_mode MONOCULAR \ +// --use_viewer false + +#include +#include +#include +#include +#include +#include + +#include "dimos_native_module.hpp" + +// ORB-SLAM3 +#include "System.h" + +// --------------------------------------------------------------------------- +// Signal handling +// --------------------------------------------------------------------------- + +static std::atomic g_running{true}; + +static void signal_handler(int /*sig*/) { + g_running.store(false); +} + +// --------------------------------------------------------------------------- +// Sensor mode parsing +// --------------------------------------------------------------------------- + +static ORB_SLAM3::System::eSensor parse_sensor_mode(const std::string& mode) { + if (mode == "MONOCULAR") return ORB_SLAM3::System::MONOCULAR; + if (mode == "STEREO") return ORB_SLAM3::System::STEREO; + if (mode == "RGBD") return ORB_SLAM3::System::RGBD; + if (mode == "IMU_MONOCULAR") return ORB_SLAM3::System::IMU_MONOCULAR; + if (mode == "IMU_STEREO") return ORB_SLAM3::System::IMU_STEREO; + if (mode == "IMU_RGBD") return ORB_SLAM3::System::IMU_RGBD; + fprintf(stderr, "[orbslam3] Unknown sensor mode: %s, defaulting to MONOCULAR\n", + mode.c_str()); + return ORB_SLAM3::System::MONOCULAR; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +int main(int argc, char** argv) { + dimos::NativeModule mod(argc, argv); + + // Required: camera settings YAML + std::string settings_path = mod.arg("settings_path", ""); + if (settings_path.empty()) { + fprintf(stderr, "[orbslam3] Error: --settings_path is required\n"); + return 1; + } + + // Vocabulary (compile-time default from nix build, or override via CLI) +#ifdef ORBSLAM3_DEFAULT_VOCAB + std::string vocab_path = mod.arg("vocab_path", ORBSLAM3_DEFAULT_VOCAB); +#else + std::string vocab_path = mod.arg("vocab_path", ""); +#endif + if (vocab_path.empty()) { + fprintf(stderr, "[orbslam3] Error: --vocab_path is required " + "(no compiled-in default available)\n"); + return 1; + } + + std::string sensor_str = mod.arg("sensor_mode", "MONOCULAR"); + bool use_viewer = mod.arg("use_viewer", "false") == "true"; + auto sensor_mode = parse_sensor_mode(sensor_str); + + printf("[orbslam3] Initializing ORB-SLAM3\n"); + printf("[orbslam3] vocab: %s\n", vocab_path.c_str()); + printf("[orbslam3] settings: %s\n", settings_path.c_str()); + printf("[orbslam3] sensor: %s\n", sensor_str.c_str()); + printf("[orbslam3] viewer: %s\n", use_viewer ? "true" : "false"); + + // Signal handlers + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + + // Initialize ORB-SLAM3 (loads vocabulary, starts internal threads) + ORB_SLAM3::System slam(vocab_path, settings_path, sensor_mode, use_viewer); + + printf("[orbslam3] System initialized, ready.\n"); + + // Block until shutdown signal + while (g_running.load()) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + printf("[orbslam3] Shutting down...\n"); + slam.Shutdown(); + printf("[orbslam3] Done.\n"); + + return 0; +} diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py new file mode 100644 index 0000000000..94a033efcd --- /dev/null +++ b/dimos/perception/slam/orbslam3/module.py @@ -0,0 +1,104 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python NativeModule wrapper for ORB-SLAM3 visual SLAM. + +Wraps ORB-SLAM3 as a native subprocess that receives camera images and +outputs camera pose estimates (odometry). + +Usage:: + + from dimos.perception.slam.orbslam3.module import OrbSlam3 + from dimos.core.blueprints import autoconnect + + autoconnect( + OrbSlam3.blueprint(sensor_mode="MONOCULAR"), + SomeConsumer.blueprint(), + ).build().loop() +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Annotated + +from pydantic.experimental.pipeline import validate_as + +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.core.stream import Out +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.spec import perception + +_CONFIG_DIR = Path(__file__).parent / "config" + + +class OrbSlam3Config(NativeModuleConfig): + """Config for the ORB-SLAM3 visual SLAM native module.""" + + cwd: str | None = "cpp" + executable: str = "result/bin/orbslam3_native" + build_command: str | None = "nix build .#orbslam3_native" + + # ORB-SLAM3 sensor mode + sensor_mode: str = "MONOCULAR" # MONOCULAR, STEREO, RGBD, IMU_MONOCULAR, IMU_STEREO, IMU_RGBD + + # Pangolin viewer (disable for headless) + use_viewer: bool = False + + # Frame IDs for output messages + frame_id: str = "map" + child_frame_id: str = "camera" + + # Camera settings YAML (relative to config/ dir, or absolute path) + settings: Annotated[ + Path, validate_as(...).transform(lambda p: p if p.is_absolute() else _CONFIG_DIR / p) + ] = Path("RealSense_D435i.yaml") + + # Resolved from settings, passed as --settings_path to the binary + settings_path: str | None = None + + # Vocabulary path (None = use compiled-in default from nix build) + vocab_path: str | None = None + + # settings is not a CLI arg (settings_path is) + cli_exclude: frozenset[str] = frozenset({"settings"}) + + def model_post_init(self, __context: object) -> None: + super().model_post_init(__context) + if self.settings_path is None: + self.settings_path = str(self.settings) + + +class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): + """ORB-SLAM3 visual SLAM module. + + Ports: + odometry (Out[Odometry]): Camera pose as nav_msgs.Odometry. + """ + + default_config = OrbSlam3Config + odometry: Out[Odometry] + + +orbslam3_module = OrbSlam3.blueprint + +__all__ = [ + "OrbSlam3", + "OrbSlam3Config", + "orbslam3_module", +] + +# Verify protocol port compliance (mypy will flag missing ports) +if TYPE_CHECKING: + OrbSlam3() From ba2124f8d24ce354385ca82ff26bcac51865ba58 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Tue, 17 Mar 2026 22:32:50 +0800 Subject: [PATCH 2/8] orbclam3 module working --- dimos/hardware/sensors/camera/webcam.py | 5 +- .../slam/orbslam3/blueprints/webcam.py | 24 ++ ...D435i.yaml => RealSense_D435i.yaml.opencv} | 2 + .../slam/orbslam3/cpp/CMakeLists.txt | 5 + dimos/perception/slam/orbslam3/cpp/main.cpp | 227 +++++++++++++++++- dimos/perception/slam/orbslam3/module.py | 10 +- dimos/robot/all_blueprints.py | 2 + 7 files changed, 261 insertions(+), 14 deletions(-) create mode 100644 dimos/perception/slam/orbslam3/blueprints/webcam.py rename dimos/perception/slam/orbslam3/config/{RealSense_D435i.yaml => RealSense_D435i.yaml.opencv} (99%) diff --git a/dimos/hardware/sensors/camera/webcam.py b/dimos/hardware/sensors/camera/webcam.py index cfd1a080a0..3f3b047ba0 100644 --- a/dimos/hardware/sensors/camera/webcam.py +++ b/dimos/hardware/sensors/camera/webcam.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass, field from functools import cache import threading import time from typing import Literal import cv2 +from pydantic import Field from reactivex import create from reactivex.observable import Observable @@ -28,13 +28,12 @@ from dimos.utils.reactive import backpressure -@dataclass class WebcamConfig(CameraConfig): camera_index: int = 0 # /dev/videoN width: int = 640 height: int = 480 fps: float = 15.0 - camera_info: CameraInfo = field(default_factory=CameraInfo) + camera_info: CameraInfo = Field(default_factory=CameraInfo) frame_id_prefix: str | None = None stereo_slice: Literal["left", "right"] | None = None # For stereo cameras diff --git a/dimos/perception/slam/orbslam3/blueprints/webcam.py b/dimos/perception/slam/orbslam3/blueprints/webcam.py new file mode 100644 index 0000000000..1b37480a1f --- /dev/null +++ b/dimos/perception/slam/orbslam3/blueprints/webcam.py @@ -0,0 +1,24 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dimos.core.blueprints import autoconnect +from dimos.hardware.sensors.camera.module import CameraModule +from dimos.perception.slam.orbslam3.module import OrbSlam3 + +orbslam3_webcam = autoconnect( + CameraModule.blueprint(), + OrbSlam3.blueprint(sensor_mode="MONOCULAR"), +) + +__all__ = ["orbslam3_webcam"] diff --git a/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml b/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv similarity index 99% rename from dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml rename to dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv index bc170c6fb6..e7d341122f 100644 --- a/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml +++ b/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv @@ -1,3 +1,5 @@ +%YAML:1.0 + #-------------------------------------------------------------------------------------------- # Camera Parameters. Adjust them! #-------------------------------------------------------------------------------------------- diff --git a/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt b/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt index 37cc90b995..727b5f8539 100644 --- a/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt +++ b/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt @@ -66,8 +66,13 @@ target_link_libraries(orbslam3_native PRIVATE ${ORBSLAM3_DIR}/lib/libg2o.so ${OpenCV_LIBS} ${Pangolin_LIBRARIES} + ${LCM_LIBRARIES} -lboost_serialization -lcrypto ) +target_link_directories(orbslam3_native PRIVATE + ${LCM_LIBRARY_DIRS} +) + install(TARGETS orbslam3_native DESTINATION bin) diff --git a/dimos/perception/slam/orbslam3/cpp/main.cpp b/dimos/perception/slam/orbslam3/cpp/main.cpp index 8b9d702a4e..b1d0f0a9f1 100644 --- a/dimos/perception/slam/orbslam3/cpp/main.cpp +++ b/dimos/perception/slam/orbslam3/cpp/main.cpp @@ -3,32 +3,60 @@ // // ORB-SLAM3 native module for dimos NativeModule framework. // -// Phase 1: Initializes ORB_SLAM3::System, loads vocabulary, and blocks -// until SIGTERM. No LCM pub/sub yet — topics come in phase 2. +// Subscribes to camera images on LCM, feeds them to ORB_SLAM3::TrackMonocular(), +// and publishes camera pose estimates as nav_msgs::Odometry. // // Usage: // ./orbslam3_native \ +// --color_image '/color_image#sensor_msgs.Image' \ +// --odometry '/odometry#nav_msgs.Odometry' \ // --settings_path /path/to/RealSense_D435i.yaml \ // --sensor_mode MONOCULAR \ // --use_viewer false +#include + #include #include +#include #include #include +#include #include #include +#include +#include + #include "dimos_native_module.hpp" +// dimos LCM message headers +#include "geometry_msgs/Point.hpp" +#include "geometry_msgs/Quaternion.hpp" +#include "nav_msgs/Odometry.hpp" +#include "sensor_msgs/Image.hpp" + // ORB-SLAM3 #include "System.h" // --------------------------------------------------------------------------- -// Signal handling +// Global state // --------------------------------------------------------------------------- static std::atomic g_running{true}; +static lcm::LCM* g_lcm = nullptr; +static ORB_SLAM3::System* g_slam = nullptr; + +static std::string g_image_topic; +static std::string g_odometry_topic; +static std::string g_frame_id = "map"; +static std::string g_child_frame_id = "camera"; + +static int g_frame_count = 0; + +// --------------------------------------------------------------------------- +// Signal handling +// --------------------------------------------------------------------------- static void signal_handler(int /*sig*/) { g_running.store(false); @@ -50,6 +78,155 @@ static ORB_SLAM3::System::eSensor parse_sensor_mode(const std::string& mode) { return ORB_SLAM3::System::MONOCULAR; } +// --------------------------------------------------------------------------- +// Image decoding +// --------------------------------------------------------------------------- + +using dimos::time_from_seconds; +using dimos::make_header; + +static cv::Mat decode_image(const sensor_msgs::Image& msg) { + int cv_type; + int channels; + + if (msg.encoding == "mono8") { + cv_type = CV_8UC1; + channels = 1; + } else if (msg.encoding == "rgb8") { + cv_type = CV_8UC3; + channels = 3; + } else if (msg.encoding == "bgr8") { + cv_type = CV_8UC3; + channels = 3; + } else if (msg.encoding == "rgba8") { + cv_type = CV_8UC4; + channels = 4; + } else if (msg.encoding == "bgra8") { + cv_type = CV_8UC4; + channels = 4; + } else { + // Fallback: treat as mono8 + fprintf(stderr, "[orbslam3] Unknown encoding '%s', treating as mono8\n", + msg.encoding.c_str()); + cv_type = CV_8UC1; + channels = 1; + } + + // Wrap raw data in cv::Mat (zero-copy from LCM buffer) + cv::Mat raw(msg.height, msg.width, cv_type, + const_cast(msg.data.data()), msg.step); + + // Convert to grayscale + cv::Mat gray; + if (channels == 1) { + gray = raw.clone(); + } else if (msg.encoding == "rgb8") { + cv::cvtColor(raw, gray, cv::COLOR_RGB2GRAY); + } else if (msg.encoding == "bgr8") { + cv::cvtColor(raw, gray, cv::COLOR_BGR2GRAY); + } else if (msg.encoding == "rgba8") { + cv::cvtColor(raw, gray, cv::COLOR_RGBA2GRAY); + } else if (msg.encoding == "bgra8") { + cv::cvtColor(raw, gray, cv::COLOR_BGRA2GRAY); + } else { + gray = raw.clone(); + } + + return gray; +} + +static double image_timestamp(const sensor_msgs::Image& msg) { + return static_cast(msg.header.stamp.sec) + + static_cast(msg.header.stamp.nsec) / 1e9; +} + +// --------------------------------------------------------------------------- +// Publish odometry +// --------------------------------------------------------------------------- + +static void publish_odometry(const Sophus::SE3f& pose, double timestamp) { + if (!g_lcm || g_odometry_topic.empty()) return; + + // Extract translation and quaternion + Eigen::Vector3f t = pose.translation(); + Eigen::Quaternionf q = pose.unit_quaternion(); + + nav_msgs::Odometry msg; + msg.header = make_header(g_frame_id, timestamp); + msg.child_frame_id = g_child_frame_id; + + // Position + msg.pose.pose.position.x = static_cast(t.x()); + msg.pose.pose.position.y = static_cast(t.y()); + msg.pose.pose.position.z = static_cast(t.z()); + + // Orientation (Eigen quaternion: x,y,z,w) + msg.pose.pose.orientation.x = static_cast(q.x()); + msg.pose.pose.orientation.y = static_cast(q.y()); + msg.pose.pose.orientation.z = static_cast(q.z()); + msg.pose.pose.orientation.w = static_cast(q.w()); + + // Diagonal covariance (ORB-SLAM3 doesn't provide covariance) + std::memset(msg.pose.covariance, 0, sizeof(msg.pose.covariance)); + for (int i = 0; i < 6; ++i) { + msg.pose.covariance[i * 6 + i] = 0.01; + } + + // Zero twist + msg.twist.twist.linear.x = 0; + msg.twist.twist.linear.y = 0; + msg.twist.twist.linear.z = 0; + msg.twist.twist.angular.x = 0; + msg.twist.twist.angular.y = 0; + msg.twist.twist.angular.z = 0; + std::memset(msg.twist.covariance, 0, sizeof(msg.twist.covariance)); + + g_lcm->publish(g_odometry_topic, &msg); +} + +// --------------------------------------------------------------------------- +// Image handler +// --------------------------------------------------------------------------- + +class ImageHandler { +public: + void on_image(const lcm::ReceiveBuffer* /*rbuf*/, + const std::string& /*channel*/, + const sensor_msgs::Image* msg) { + if (!g_slam || !g_running.load()) return; + + // Decode image to grayscale + cv::Mat gray = decode_image(*msg); + if (gray.empty()) return; + + double ts = image_timestamp(*msg); + + // Track + Sophus::SE3f Tcw = g_slam->TrackMonocular(gray, ts); + + // Only publish when actively tracking (state 2 = eTracking) + int state = g_slam->GetTrackingState(); + if (state == 2) { + // Invert: TrackMonocular returns Tcw (world-to-camera), + // we want Twc (camera pose in world frame) + Sophus::SE3f Twc = Tcw.inverse(); + publish_odometry(Twc, ts); + } + + g_frame_count++; + if (g_frame_count % 100 == 0) { + const char* state_str = "unknown"; + switch (state) { + case 0: state_str = "not_initialized"; break; + case 1: state_str = "initializing"; break; + case 2: state_str = "tracking"; break; + case 3: state_str = "lost"; break; + } + printf("[orbslam3] frame=%d state=%s\n", g_frame_count, state_str); + } + } +}; + // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- @@ -76,33 +253,65 @@ int main(int argc, char** argv) { return 1; } + // Topics + g_image_topic = mod.has("color_image") ? mod.topic("color_image") : ""; + g_odometry_topic = mod.has("odometry") ? mod.topic("odometry") : ""; + + if (g_image_topic.empty()) { + fprintf(stderr, "[orbslam3] Error: --color_image topic is required\n"); + return 1; + } + + // Config std::string sensor_str = mod.arg("sensor_mode", "MONOCULAR"); bool use_viewer = mod.arg("use_viewer", "false") == "true"; + g_frame_id = mod.arg("frame_id", "map"); + g_child_frame_id = mod.arg("child_frame_id", "camera"); auto sensor_mode = parse_sensor_mode(sensor_str); - printf("[orbslam3] Initializing ORB-SLAM3\n"); + printf("[orbslam3] Starting ORB-SLAM3 native module\n"); printf("[orbslam3] vocab: %s\n", vocab_path.c_str()); printf("[orbslam3] settings: %s\n", settings_path.c_str()); printf("[orbslam3] sensor: %s\n", sensor_str.c_str()); printf("[orbslam3] viewer: %s\n", use_viewer ? "true" : "false"); + printf("[orbslam3] image topic: %s\n", g_image_topic.c_str()); + printf("[orbslam3] odometry topic: %s\n", + g_odometry_topic.empty() ? "(disabled)" : g_odometry_topic.c_str()); // Signal handlers signal(SIGTERM, signal_handler); signal(SIGINT, signal_handler); + // Init LCM + lcm::LCM lcm; + if (!lcm.good()) { + fprintf(stderr, "[orbslam3] Error: LCM init failed\n"); + return 1; + } + g_lcm = &lcm; + + // Subscribe to image topic + ImageHandler handler; + lcm.subscribe(g_image_topic, &ImageHandler::on_image, &handler); + // Initialize ORB-SLAM3 (loads vocabulary, starts internal threads) + printf("[orbslam3] Loading vocabulary...\n"); ORB_SLAM3::System slam(vocab_path, settings_path, sensor_mode, use_viewer); + g_slam = &slam; - printf("[orbslam3] System initialized, ready.\n"); + printf("[orbslam3] System initialized, processing frames.\n"); - // Block until shutdown signal + // Main loop: dispatch LCM messages while (g_running.load()) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + lcm.handleTimeout(100); } - printf("[orbslam3] Shutting down...\n"); + // Cleanup + printf("[orbslam3] Shutting down... (%d frames processed)\n", g_frame_count); + g_slam = nullptr; slam.Shutdown(); - printf("[orbslam3] Done.\n"); + g_lcm = nullptr; + printf("[orbslam3] Done.\n"); return 0; } diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index 94a033efcd..0e7b2ccd98 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -36,8 +36,9 @@ from pydantic.experimental.pipeline import validate_as from dimos.core.native_module import NativeModule, NativeModuleConfig -from dimos.core.stream import Out +from dimos.core.stream import In, Out from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.Image import Image from dimos.spec import perception _CONFIG_DIR = Path(__file__).parent / "config" @@ -63,7 +64,7 @@ class OrbSlam3Config(NativeModuleConfig): # Camera settings YAML (relative to config/ dir, or absolute path) settings: Annotated[ Path, validate_as(...).transform(lambda p: p if p.is_absolute() else _CONFIG_DIR / p) - ] = Path("RealSense_D435i.yaml") + ] = Path("RealSense_D435i.yaml.opencv") # Resolved from settings, passed as --settings_path to the binary settings_path: str | None = None @@ -76,6 +77,9 @@ class OrbSlam3Config(NativeModuleConfig): def model_post_init(self, __context: object) -> None: super().model_post_init(__context) + # Resolve relative settings path against config/ dir + if not self.settings.is_absolute(): + self.settings = _CONFIG_DIR / self.settings if self.settings_path is None: self.settings_path = str(self.settings) @@ -84,10 +88,12 @@ class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): """ORB-SLAM3 visual SLAM module. Ports: + color_image (In[Image]): Camera frames to track. odometry (Out[Odometry]): Camera pose as nav_msgs.Odometry. """ default_config = OrbSlam3Config + color_image: In[Image] odometry: Out[Odometry] diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index e82cb656ce..f70d85e136 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -60,6 +60,7 @@ "mid360-fastlio": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio", "mid360-fastlio-voxels": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels", "mid360-fastlio-voxels-native": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels_native", + "orbslam3-webcam": "dimos.perception.slam.orbslam3.blueprints.webcam:orbslam3_webcam", "phone-go2-fleet-teleop": "dimos.teleop.phone.blueprints:phone_go2_fleet_teleop", "phone-go2-teleop": "dimos.teleop.phone.blueprints:phone_go2_teleop", "simple-phone-teleop": "dimos.teleop.phone.blueprints:simple_phone_teleop", @@ -126,6 +127,7 @@ "navigation-skill": "dimos.agents.skills.navigation", "object-scene-registration-module": "dimos.perception.object_scene_registration", "object-tracking": "dimos.perception.object_tracker", + "orbslam3-module": "dimos.perception.slam.orbslam3.module", "osm-skill": "dimos.agents.skills.osm", "person-follow-skill": "dimos.agents.skills.person_follow", "person-tracker-module": "dimos.perception.detection.person_tracker", From 8afecb80efe1e081a13b577b688536fdec6749f6 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Wed, 18 Mar 2026 16:25:54 +0800 Subject: [PATCH 3/8] refactor(orbslam3): move GPL-3.0 C++ code to external repo ORB-SLAM3 is GPL-3.0, incompatible with our Apache 2.0 license. Moved C++ native module (main.cpp, CMakeLists, flake.nix) and ORB-SLAM3 configs to dimensionalOS/dimos-orb-slam3. The Python wrapper now builds from the external flake via `nix build github:dimensionalOS/dimos-orb-slam3` into a local cache directory. IPC boundary via LCM is unchanged. --- .../config/RealSense_D435i.yaml.opencv | 63 ---- .../slam/orbslam3/cpp/CMakeLists.txt | 78 ----- dimos/perception/slam/orbslam3/cpp/flake.lock | 96 ------ dimos/perception/slam/orbslam3/cpp/flake.nix | 134 -------- dimos/perception/slam/orbslam3/cpp/main.cpp | 317 ------------------ dimos/perception/slam/orbslam3/module.py | 30 +- 6 files changed, 16 insertions(+), 702 deletions(-) delete mode 100644 dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv delete mode 100644 dimos/perception/slam/orbslam3/cpp/CMakeLists.txt delete mode 100644 dimos/perception/slam/orbslam3/cpp/flake.lock delete mode 100644 dimos/perception/slam/orbslam3/cpp/flake.nix delete mode 100644 dimos/perception/slam/orbslam3/cpp/main.cpp diff --git a/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv b/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv deleted file mode 100644 index e7d341122f..0000000000 --- a/dimos/perception/slam/orbslam3/config/RealSense_D435i.yaml.opencv +++ /dev/null @@ -1,63 +0,0 @@ -%YAML:1.0 - -#-------------------------------------------------------------------------------------------- -# Camera Parameters. Adjust them! -#-------------------------------------------------------------------------------------------- -File.version: "1.0" - -Camera.type: "PinHole" - -# Left Camera calibration and distortion parameters (OpenCV) -Camera1.fx: 382.613 -Camera1.fy: 382.613 -Camera1.cx: 320.183 -Camera1.cy: 236.455 - -# distortion parameters -Camera1.k1: 0.0 -Camera1.k2: 0.0 -Camera1.p1: 0.0 -Camera1.p2: 0.0 - -# Camera resolution -Camera.width: 640 -Camera.height: 480 - -# Camera frames per second -Camera.fps: 30 - -# Color order of the images (0: BGR, 1: RGB. It is ignored if images are grayscale) -Camera.RGB: 1 - -#-------------------------------------------------------------------------------------------- -# ORB Parameters -#-------------------------------------------------------------------------------------------- -# ORB Extractor: Number of features per image -ORBextractor.nFeatures: 1250 - -# ORB Extractor: Scale factor between levels in the scale pyramid -ORBextractor.scaleFactor: 1.2 - -# ORB Extractor: Number of levels in the scale pyramid -ORBextractor.nLevels: 8 - -# ORB Extractor: Fast threshold -# Image is divided in a grid. At each cell FAST are extracted imposing a minimum response. -# Firstly we impose iniThFAST. If no corners are detected we impose a lower value minThFAST -# You can lower these values if your images have low contrast -ORBextractor.iniThFAST: 20 -ORBextractor.minThFAST: 7 - -#-------------------------------------------------------------------------------------------- -# Viewer Parameters -#-------------------------------------------------------------------------------------------- -Viewer.KeyFrameSize: 0.05 -Viewer.KeyFrameLineWidth: 1.0 -Viewer.GraphLineWidth: 0.9 -Viewer.PointSize: 2.0 -Viewer.CameraSize: 0.08 -Viewer.CameraLineWidth: 3.0 -Viewer.ViewpointX: 0.0 -Viewer.ViewpointY: -0.7 -Viewer.ViewpointZ: -3.5 -Viewer.ViewpointF: 500.0 diff --git a/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt b/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt deleted file mode 100644 index 727b5f8539..0000000000 --- a/dimos/perception/slam/orbslam3/cpp/CMakeLists.txt +++ /dev/null @@ -1,78 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(orbslam3_native CXX) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") - -if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set(CMAKE_INSTALL_PREFIX "${CMAKE_SOURCE_DIR}/result" CACHE PATH "" FORCE) -endif() - -# Ensure the binary can find ORB-SLAM3 libraries at runtime -if(ORBSLAM3_DIR) - set(CMAKE_INSTALL_RPATH "${ORBSLAM3_DIR}/lib") - set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) -endif() - -# ORB-SLAM3 installed prefix (pass -DORBSLAM3_DIR=) -if(NOT ORBSLAM3_DIR) - message(FATAL_ERROR "ORBSLAM3_DIR not set. Pass -DORBSLAM3_DIR=") -endif() - -# dimos-lcm C++ message headers (for dimos_native_module.hpp dependencies) -include(FetchContent) -FetchContent_Declare(dimos_lcm - GIT_REPOSITORY https://github.com/dimensionalOS/dimos-lcm.git - GIT_TAG main - GIT_SHALLOW TRUE -) -FetchContent_MakeAvailable(dimos_lcm) - -# dimos_native_module.hpp location -if(NOT DIMOS_NATIVE_MODULE_DIR) - set(DIMOS_NATIVE_MODULE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../hardware/sensors/lidar/common) -endif() - -# Dependencies -find_package(OpenCV REQUIRED) -find_package(Eigen3 REQUIRED) -find_package(Pangolin REQUIRED) -find_package(PkgConfig REQUIRED) -pkg_check_modules(LCM REQUIRED lcm) - -# Compile-time default vocab path (set by nix flake) -if(ORBSLAM3_DEFAULT_VOCAB) - add_compile_definitions(ORBSLAM3_DEFAULT_VOCAB="${ORBSLAM3_DEFAULT_VOCAB}") -endif() - -add_executable(orbslam3_native main.cpp) - -target_include_directories(orbslam3_native PRIVATE - ${ORBSLAM3_DIR} # for deps/DBoW2/... deps/g2o/... - ${ORBSLAM3_DIR}/include # for System.h etc. - ${ORBSLAM3_DIR}/include/CameraModels - ${ORBSLAM3_DIR}/deps/Sophus # for sophus/se3.hpp etc. - ${dimos_lcm_SOURCE_DIR}/generated/cpp_lcm_msgs - ${DIMOS_NATIVE_MODULE_DIR} - ${LCM_INCLUDE_DIRS} - ${EIGEN3_INCLUDE_DIR} - ${OpenCV_INCLUDE_DIRS} -) - -target_link_libraries(orbslam3_native PRIVATE - ${ORBSLAM3_DIR}/lib/liborb-slam3.so - ${ORBSLAM3_DIR}/lib/libDBoW2.so - ${ORBSLAM3_DIR}/lib/libg2o.so - ${OpenCV_LIBS} - ${Pangolin_LIBRARIES} - ${LCM_LIBRARIES} - -lboost_serialization - -lcrypto -) - -target_link_directories(orbslam3_native PRIVATE - ${LCM_LIBRARY_DIRS} -) - -install(TARGETS orbslam3_native DESTINATION bin) diff --git a/dimos/perception/slam/orbslam3/cpp/flake.lock b/dimos/perception/slam/orbslam3/cpp/flake.lock deleted file mode 100644 index 531f35f890..0000000000 --- a/dimos/perception/slam/orbslam3/cpp/flake.lock +++ /dev/null @@ -1,96 +0,0 @@ -{ - "nodes": { - "dimos-lcm": { - "flake": false, - "locked": { - "lastModified": 1769774949, - "narHash": "sha256-icRK7jerqNlwK1WZBrnIP04I2WozzFqTD7qsmnPxQuo=", - "owner": "dimensionalOS", - "repo": "dimos-lcm", - "rev": "0aa72b7b1bd3a65f50f5c03485ee9b728df56afe", - "type": "github" - }, - "original": { - "owner": "dimensionalOS", - "ref": "main", - "repo": "dimos-lcm", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1736012469, - "narHash": "sha256-/qlNWm/IEVVH7GfgAIyP6EsVZI6zjAx1cV5zNyrs+rI=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d", - "type": "github" - }, - "original": { - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d", - "type": "github" - } - }, - "orb-slam3-src": { - "flake": false, - "locked": { - "lastModified": 1736936489, - "narHash": "sha256-as8Wk9/6E+1bN0xJIfTn4SaCbz2fSXz2iJEuqIJsi88=", - "owner": "thuvasooriya", - "repo": "orb-slam3", - "rev": "a48413f8fdc47f7ca8354e83dbcfb85c6f8da7ae", - "type": "github" - }, - "original": { - "owner": "thuvasooriya", - "repo": "orb-slam3", - "type": "github" - } - }, - "root": { - "inputs": { - "dimos-lcm": "dimos-lcm", - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs", - "orb-slam3-src": "orb-slam3-src" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/dimos/perception/slam/orbslam3/cpp/flake.nix b/dimos/perception/slam/orbslam3/cpp/flake.nix deleted file mode 100644 index a36d62e99a..0000000000 --- a/dimos/perception/slam/orbslam3/cpp/flake.nix +++ /dev/null @@ -1,134 +0,0 @@ -{ - description = "ORB-SLAM3 native module for dimos"; - - inputs = { - # Pin to a nixpkgs that still has pangolin (removed in later revisions) - nixpkgs.url = "github:NixOS/nixpkgs/8f3e1f807051e32d8c95cd12b9b421623850a34d"; - flake-utils.url = "github:numtide/flake-utils"; - dimos-lcm = { - url = "github:dimensionalOS/dimos-lcm/main"; - flake = false; - }; - orb-slam3-src = { - url = "github:thuvasooriya/orb-slam3"; - flake = false; - }; - }; - - outputs = { self, nixpkgs, flake-utils, dimos-lcm, orb-slam3-src, ... }: - flake-utils.lib.eachDefaultSystem (system: - let - pkgs = import nixpkgs { inherit system; }; - - # Shared dimos native module header (dimos_native_module.hpp) - dimos-native-module-dir = ../../../../hardware/sensors/lidar/common; - - # Build ORB-SLAM3 library from source - orb-slam3 = pkgs.stdenv.mkDerivation { - pname = "orb-slam3"; - version = "1.0.0"; - src = orb-slam3-src; - - nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; - buildInputs = [ - pkgs.opencv - pkgs.eigen - pkgs.pangolin - pkgs.boost - pkgs.openssl - pkgs.glew - pkgs.llvmPackages.openmp - ]; - - # Skip default cmake configure (we need to build DBoW2 first) - dontConfigure = true; - - # Strip -march=native for nix reproducibility - postPatch = '' - sed -i 's/-march=native//g' CMakeLists.txt deps/g2o/CMakeLists.txt - ''; - - buildPhase = '' - # Build DBoW2 first (main CMakeLists expects it pre-built) - cd deps/DBoW2 - mkdir -p build && cd build - cmake .. -DCMAKE_BUILD_TYPE=Release - make -j$NIX_BUILD_CORES - cd ../../.. - - # Build main ORB-SLAM3 (g2o is built via add_subdirectory) - # Only build the library target, skip example binaries - mkdir -p build && cd build - cmake .. \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_RPATH=$out/lib \ - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON - make -j$NIX_BUILD_CORES orb-slam3 g2o - ''; - - installPhase = '' - cd .. # back to source root from build/ - - mkdir -p $out/lib - mkdir -p $out/include/CameraModels - mkdir -p $out/share/orb-slam3/Vocabulary - - # Libraries (keep original name to match SONAME) - cp lib/liborb-slam3.so $out/lib/ - cp deps/DBoW2/lib/libDBoW2.so $out/lib/ - find . -name 'libg2o.so' -exec cp {} $out/lib/ \; - - # Headers - cp include/*.h $out/include/ - cp include/CameraModels/*.h $out/include/CameraModels/ - - # Full deps/ tree for transitive header includes - mkdir -p $out/deps - cp -r deps/DBoW2 $out/deps/DBoW2 - cp -r deps/g2o $out/deps/g2o - cp -r deps/Sophus $out/deps/Sophus - # Remove build artifacts from deps - rm -rf $out/deps/*/build $out/deps/*/lib - - # Vocabulary (only .tar.gz is tracked in git) - if [ ! -f Vocabulary/ORBvoc.txt ]; then - tar -xzf Vocabulary/ORBvoc.txt.tar.gz -C Vocabulary/ - fi - cp Vocabulary/ORBvoc.txt $out/share/orb-slam3/Vocabulary/ - ''; - }; - - # Build our thin wrapper binary - orbslam3_native = pkgs.stdenv.mkDerivation { - pname = "orbslam3_native"; - version = "0.1.0"; - src = ./.; - - nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; - buildInputs = [ - orb-slam3 - pkgs.opencv - pkgs.eigen - pkgs.pangolin - pkgs.boost - pkgs.openssl - pkgs.lcm - pkgs.glib - pkgs.glew - ]; - - cmakeFlags = [ - "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" - "-DFETCHCONTENT_SOURCE_DIR_DIMOS_LCM=${dimos-lcm}" - "-DORBSLAM3_DIR=${orb-slam3}" - "-DDIMOS_NATIVE_MODULE_DIR=${dimos-native-module-dir}" - "-DORBSLAM3_DEFAULT_VOCAB=${orb-slam3}/share/orb-slam3/Vocabulary/ORBvoc.txt" - ]; - }; - in { - packages = { - default = orbslam3_native; - inherit orbslam3_native orb-slam3; - }; - }); -} diff --git a/dimos/perception/slam/orbslam3/cpp/main.cpp b/dimos/perception/slam/orbslam3/cpp/main.cpp deleted file mode 100644 index b1d0f0a9f1..0000000000 --- a/dimos/perception/slam/orbslam3/cpp/main.cpp +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2026 Dimensional Inc. -// SPDX-License-Identifier: Apache-2.0 -// -// ORB-SLAM3 native module for dimos NativeModule framework. -// -// Subscribes to camera images on LCM, feeds them to ORB_SLAM3::TrackMonocular(), -// and publishes camera pose estimates as nav_msgs::Odometry. -// -// Usage: -// ./orbslam3_native \ -// --color_image '/color_image#sensor_msgs.Image' \ -// --odometry '/odometry#nav_msgs.Odometry' \ -// --settings_path /path/to/RealSense_D435i.yaml \ -// --sensor_mode MONOCULAR \ -// --use_viewer false - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "dimos_native_module.hpp" - -// dimos LCM message headers -#include "geometry_msgs/Point.hpp" -#include "geometry_msgs/Quaternion.hpp" -#include "nav_msgs/Odometry.hpp" -#include "sensor_msgs/Image.hpp" - -// ORB-SLAM3 -#include "System.h" - -// --------------------------------------------------------------------------- -// Global state -// --------------------------------------------------------------------------- - -static std::atomic g_running{true}; -static lcm::LCM* g_lcm = nullptr; -static ORB_SLAM3::System* g_slam = nullptr; - -static std::string g_image_topic; -static std::string g_odometry_topic; -static std::string g_frame_id = "map"; -static std::string g_child_frame_id = "camera"; - -static int g_frame_count = 0; - -// --------------------------------------------------------------------------- -// Signal handling -// --------------------------------------------------------------------------- - -static void signal_handler(int /*sig*/) { - g_running.store(false); -} - -// --------------------------------------------------------------------------- -// Sensor mode parsing -// --------------------------------------------------------------------------- - -static ORB_SLAM3::System::eSensor parse_sensor_mode(const std::string& mode) { - if (mode == "MONOCULAR") return ORB_SLAM3::System::MONOCULAR; - if (mode == "STEREO") return ORB_SLAM3::System::STEREO; - if (mode == "RGBD") return ORB_SLAM3::System::RGBD; - if (mode == "IMU_MONOCULAR") return ORB_SLAM3::System::IMU_MONOCULAR; - if (mode == "IMU_STEREO") return ORB_SLAM3::System::IMU_STEREO; - if (mode == "IMU_RGBD") return ORB_SLAM3::System::IMU_RGBD; - fprintf(stderr, "[orbslam3] Unknown sensor mode: %s, defaulting to MONOCULAR\n", - mode.c_str()); - return ORB_SLAM3::System::MONOCULAR; -} - -// --------------------------------------------------------------------------- -// Image decoding -// --------------------------------------------------------------------------- - -using dimos::time_from_seconds; -using dimos::make_header; - -static cv::Mat decode_image(const sensor_msgs::Image& msg) { - int cv_type; - int channels; - - if (msg.encoding == "mono8") { - cv_type = CV_8UC1; - channels = 1; - } else if (msg.encoding == "rgb8") { - cv_type = CV_8UC3; - channels = 3; - } else if (msg.encoding == "bgr8") { - cv_type = CV_8UC3; - channels = 3; - } else if (msg.encoding == "rgba8") { - cv_type = CV_8UC4; - channels = 4; - } else if (msg.encoding == "bgra8") { - cv_type = CV_8UC4; - channels = 4; - } else { - // Fallback: treat as mono8 - fprintf(stderr, "[orbslam3] Unknown encoding '%s', treating as mono8\n", - msg.encoding.c_str()); - cv_type = CV_8UC1; - channels = 1; - } - - // Wrap raw data in cv::Mat (zero-copy from LCM buffer) - cv::Mat raw(msg.height, msg.width, cv_type, - const_cast(msg.data.data()), msg.step); - - // Convert to grayscale - cv::Mat gray; - if (channels == 1) { - gray = raw.clone(); - } else if (msg.encoding == "rgb8") { - cv::cvtColor(raw, gray, cv::COLOR_RGB2GRAY); - } else if (msg.encoding == "bgr8") { - cv::cvtColor(raw, gray, cv::COLOR_BGR2GRAY); - } else if (msg.encoding == "rgba8") { - cv::cvtColor(raw, gray, cv::COLOR_RGBA2GRAY); - } else if (msg.encoding == "bgra8") { - cv::cvtColor(raw, gray, cv::COLOR_BGRA2GRAY); - } else { - gray = raw.clone(); - } - - return gray; -} - -static double image_timestamp(const sensor_msgs::Image& msg) { - return static_cast(msg.header.stamp.sec) + - static_cast(msg.header.stamp.nsec) / 1e9; -} - -// --------------------------------------------------------------------------- -// Publish odometry -// --------------------------------------------------------------------------- - -static void publish_odometry(const Sophus::SE3f& pose, double timestamp) { - if (!g_lcm || g_odometry_topic.empty()) return; - - // Extract translation and quaternion - Eigen::Vector3f t = pose.translation(); - Eigen::Quaternionf q = pose.unit_quaternion(); - - nav_msgs::Odometry msg; - msg.header = make_header(g_frame_id, timestamp); - msg.child_frame_id = g_child_frame_id; - - // Position - msg.pose.pose.position.x = static_cast(t.x()); - msg.pose.pose.position.y = static_cast(t.y()); - msg.pose.pose.position.z = static_cast(t.z()); - - // Orientation (Eigen quaternion: x,y,z,w) - msg.pose.pose.orientation.x = static_cast(q.x()); - msg.pose.pose.orientation.y = static_cast(q.y()); - msg.pose.pose.orientation.z = static_cast(q.z()); - msg.pose.pose.orientation.w = static_cast(q.w()); - - // Diagonal covariance (ORB-SLAM3 doesn't provide covariance) - std::memset(msg.pose.covariance, 0, sizeof(msg.pose.covariance)); - for (int i = 0; i < 6; ++i) { - msg.pose.covariance[i * 6 + i] = 0.01; - } - - // Zero twist - msg.twist.twist.linear.x = 0; - msg.twist.twist.linear.y = 0; - msg.twist.twist.linear.z = 0; - msg.twist.twist.angular.x = 0; - msg.twist.twist.angular.y = 0; - msg.twist.twist.angular.z = 0; - std::memset(msg.twist.covariance, 0, sizeof(msg.twist.covariance)); - - g_lcm->publish(g_odometry_topic, &msg); -} - -// --------------------------------------------------------------------------- -// Image handler -// --------------------------------------------------------------------------- - -class ImageHandler { -public: - void on_image(const lcm::ReceiveBuffer* /*rbuf*/, - const std::string& /*channel*/, - const sensor_msgs::Image* msg) { - if (!g_slam || !g_running.load()) return; - - // Decode image to grayscale - cv::Mat gray = decode_image(*msg); - if (gray.empty()) return; - - double ts = image_timestamp(*msg); - - // Track - Sophus::SE3f Tcw = g_slam->TrackMonocular(gray, ts); - - // Only publish when actively tracking (state 2 = eTracking) - int state = g_slam->GetTrackingState(); - if (state == 2) { - // Invert: TrackMonocular returns Tcw (world-to-camera), - // we want Twc (camera pose in world frame) - Sophus::SE3f Twc = Tcw.inverse(); - publish_odometry(Twc, ts); - } - - g_frame_count++; - if (g_frame_count % 100 == 0) { - const char* state_str = "unknown"; - switch (state) { - case 0: state_str = "not_initialized"; break; - case 1: state_str = "initializing"; break; - case 2: state_str = "tracking"; break; - case 3: state_str = "lost"; break; - } - printf("[orbslam3] frame=%d state=%s\n", g_frame_count, state_str); - } - } -}; - -// --------------------------------------------------------------------------- -// Main -// --------------------------------------------------------------------------- - -int main(int argc, char** argv) { - dimos::NativeModule mod(argc, argv); - - // Required: camera settings YAML - std::string settings_path = mod.arg("settings_path", ""); - if (settings_path.empty()) { - fprintf(stderr, "[orbslam3] Error: --settings_path is required\n"); - return 1; - } - - // Vocabulary (compile-time default from nix build, or override via CLI) -#ifdef ORBSLAM3_DEFAULT_VOCAB - std::string vocab_path = mod.arg("vocab_path", ORBSLAM3_DEFAULT_VOCAB); -#else - std::string vocab_path = mod.arg("vocab_path", ""); -#endif - if (vocab_path.empty()) { - fprintf(stderr, "[orbslam3] Error: --vocab_path is required " - "(no compiled-in default available)\n"); - return 1; - } - - // Topics - g_image_topic = mod.has("color_image") ? mod.topic("color_image") : ""; - g_odometry_topic = mod.has("odometry") ? mod.topic("odometry") : ""; - - if (g_image_topic.empty()) { - fprintf(stderr, "[orbslam3] Error: --color_image topic is required\n"); - return 1; - } - - // Config - std::string sensor_str = mod.arg("sensor_mode", "MONOCULAR"); - bool use_viewer = mod.arg("use_viewer", "false") == "true"; - g_frame_id = mod.arg("frame_id", "map"); - g_child_frame_id = mod.arg("child_frame_id", "camera"); - auto sensor_mode = parse_sensor_mode(sensor_str); - - printf("[orbslam3] Starting ORB-SLAM3 native module\n"); - printf("[orbslam3] vocab: %s\n", vocab_path.c_str()); - printf("[orbslam3] settings: %s\n", settings_path.c_str()); - printf("[orbslam3] sensor: %s\n", sensor_str.c_str()); - printf("[orbslam3] viewer: %s\n", use_viewer ? "true" : "false"); - printf("[orbslam3] image topic: %s\n", g_image_topic.c_str()); - printf("[orbslam3] odometry topic: %s\n", - g_odometry_topic.empty() ? "(disabled)" : g_odometry_topic.c_str()); - - // Signal handlers - signal(SIGTERM, signal_handler); - signal(SIGINT, signal_handler); - - // Init LCM - lcm::LCM lcm; - if (!lcm.good()) { - fprintf(stderr, "[orbslam3] Error: LCM init failed\n"); - return 1; - } - g_lcm = &lcm; - - // Subscribe to image topic - ImageHandler handler; - lcm.subscribe(g_image_topic, &ImageHandler::on_image, &handler); - - // Initialize ORB-SLAM3 (loads vocabulary, starts internal threads) - printf("[orbslam3] Loading vocabulary...\n"); - ORB_SLAM3::System slam(vocab_path, settings_path, sensor_mode, use_viewer); - g_slam = &slam; - - printf("[orbslam3] System initialized, processing frames.\n"); - - // Main loop: dispatch LCM messages - while (g_running.load()) { - lcm.handleTimeout(100); - } - - // Cleanup - printf("[orbslam3] Shutting down... (%d frames processed)\n", g_frame_count); - g_slam = nullptr; - slam.Shutdown(); - g_lcm = nullptr; - - printf("[orbslam3] Done.\n"); - return 0; -} diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index 0e7b2ccd98..ed642075fb 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -30,10 +30,9 @@ from __future__ import annotations +import os from pathlib import Path -from typing import TYPE_CHECKING, Annotated - -from pydantic.experimental.pipeline import validate_as +from typing import TYPE_CHECKING from dimos.core.native_module import NativeModule, NativeModuleConfig from dimos.core.stream import In, Out @@ -41,15 +40,15 @@ from dimos.msgs.sensor_msgs.Image import Image from dimos.spec import perception -_CONFIG_DIR = Path(__file__).parent / "config" +_CACHE_DIR = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) / "dimos" / "orbslam3" class OrbSlam3Config(NativeModuleConfig): """Config for the ORB-SLAM3 visual SLAM native module.""" - cwd: str | None = "cpp" + cwd: str | None = None executable: str = "result/bin/orbslam3_native" - build_command: str | None = "nix build .#orbslam3_native" + build_command: str | None = "nix build github:dimensionalOS/dimos-orb-slam3" # ORB-SLAM3 sensor mode sensor_mode: str = "MONOCULAR" # MONOCULAR, STEREO, RGBD, IMU_MONOCULAR, IMU_STEREO, IMU_RGBD @@ -61,10 +60,9 @@ class OrbSlam3Config(NativeModuleConfig): frame_id: str = "map" child_frame_id: str = "camera" - # Camera settings YAML (relative to config/ dir, or absolute path) - settings: Annotated[ - Path, validate_as(...).transform(lambda p: p if p.is_absolute() else _CONFIG_DIR / p) - ] = Path("RealSense_D435i.yaml.opencv") + # Camera settings YAML (relative name resolves against nix package's bundled configs, + # or pass an absolute path for custom configs) + settings: Path = Path("RealSense_D435i.yaml.opencv") # Resolved from settings, passed as --settings_path to the binary settings_path: str | None = None @@ -76,12 +74,16 @@ class OrbSlam3Config(NativeModuleConfig): cli_exclude: frozenset[str] = frozenset({"settings"}) def model_post_init(self, __context: object) -> None: + _CACHE_DIR.mkdir(parents=True, exist_ok=True) + if self.cwd is None: + self.cwd = str(_CACHE_DIR) super().model_post_init(__context) - # Resolve relative settings path against config/ dir - if not self.settings.is_absolute(): - self.settings = _CONFIG_DIR / self.settings if self.settings_path is None: - self.settings_path = str(self.settings) + if self.settings.is_absolute(): + self.settings_path = str(self.settings) + else: + nix_config = _CACHE_DIR / "result" / "share" / "orbslam3" / "config" + self.settings_path = str(nix_config / self.settings) class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): From bb3d0075d9f66883058a2c776de8465eb6fc1c4a Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Wed, 18 Mar 2026 16:34:35 +0800 Subject: [PATCH 4/8] working example with vis and remote orbslam3 repo --- dimos/perception/slam/orbslam3/blueprints/webcam.py | 4 +++- dimos/perception/slam/orbslam3/module.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dimos/perception/slam/orbslam3/blueprints/webcam.py b/dimos/perception/slam/orbslam3/blueprints/webcam.py index 1b37480a1f..d620780b02 100644 --- a/dimos/perception/slam/orbslam3/blueprints/webcam.py +++ b/dimos/perception/slam/orbslam3/blueprints/webcam.py @@ -15,10 +15,12 @@ from dimos.core.blueprints import autoconnect from dimos.hardware.sensors.camera.module import CameraModule from dimos.perception.slam.orbslam3.module import OrbSlam3 +from dimos.visualization.rerun.bridge import _resolve_viewer_mode, rerun_bridge orbslam3_webcam = autoconnect( + rerun_bridge(viewer_mode=_resolve_viewer_mode()), CameraModule.blueprint(), OrbSlam3.blueprint(sensor_mode="MONOCULAR"), -) +).global_config(n_workers=3) __all__ = ["orbslam3_webcam"] diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index ed642075fb..0544f5efab 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -48,7 +48,9 @@ class OrbSlam3Config(NativeModuleConfig): cwd: str | None = None executable: str = "result/bin/orbslam3_native" - build_command: str | None = "nix build github:dimensionalOS/dimos-orb-slam3" + build_command: str | None = ( + "nix build github:dimensionalOS/dimos-orb-slam3 --no-write-lock-file" + ) # ORB-SLAM3 sensor mode sensor_mode: str = "MONOCULAR" # MONOCULAR, STEREO, RGBD, IMU_MONOCULAR, IMU_STEREO, IMU_RGBD From 4f22d5d3c3d5ed78925124eda5a6bd208abc8ea9 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Wed, 18 Mar 2026 16:47:36 +0800 Subject: [PATCH 5/8] refactor(orbslam3): simplify module config, pin external flake to v0.1.0 Add SensorMode enum, remove model_post_init, use module directory for cwd, pin build to dimos-orb-slam3/v0.1.0. Gitignore **/result. --- .gitignore | 2 +- dimos/perception/slam/orbslam3/module.py | 47 ++++++++++-------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 4045db012e..d526ac0f87 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,7 @@ yolo11n.pt *mobileclip* /results -**/cpp/result +**/result CLAUDE.MD /assets/teleop_certs/ diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index 0544f5efab..34246514b9 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -23,14 +23,14 @@ from dimos.core.blueprints import autoconnect autoconnect( - OrbSlam3.blueprint(sensor_mode="MONOCULAR"), + OrbSlam3.blueprint(sensor_mode=SensorMode.MONOCULAR), SomeConsumer.blueprint(), ).build().loop() """ from __future__ import annotations -import os +import enum from pathlib import Path from typing import TYPE_CHECKING @@ -40,20 +40,29 @@ from dimos.msgs.sensor_msgs.Image import Image from dimos.spec import perception -_CACHE_DIR = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) / "dimos" / "orbslam3" +_MODULE_DIR = Path(__file__).parent + + +class SensorMode(enum.StrEnum): + MONOCULAR = "MONOCULAR" + STEREO = "STEREO" + RGBD = "RGBD" + IMU_MONOCULAR = "IMU_MONOCULAR" + IMU_STEREO = "IMU_STEREO" + IMU_RGBD = "IMU_RGBD" class OrbSlam3Config(NativeModuleConfig): """Config for the ORB-SLAM3 visual SLAM native module.""" - cwd: str | None = None + cwd: str | None = str(_MODULE_DIR) executable: str = "result/bin/orbslam3_native" build_command: str | None = ( - "nix build github:dimensionalOS/dimos-orb-slam3 --no-write-lock-file" + "nix build github:dimensionalOS/dimos-orb-slam3/v0.1.0 --no-write-lock-file" ) # ORB-SLAM3 sensor mode - sensor_mode: str = "MONOCULAR" # MONOCULAR, STEREO, RGBD, IMU_MONOCULAR, IMU_STEREO, IMU_RGBD + sensor_mode: SensorMode = SensorMode.MONOCULAR # Pangolin viewer (disable for headless) use_viewer: bool = False @@ -62,31 +71,14 @@ class OrbSlam3Config(NativeModuleConfig): frame_id: str = "map" child_frame_id: str = "camera" - # Camera settings YAML (relative name resolves against nix package's bundled configs, - # or pass an absolute path for custom configs) - settings: Path = Path("RealSense_D435i.yaml.opencv") - - # Resolved from settings, passed as --settings_path to the binary - settings_path: str | None = None + # Camera settings YAML (absolute path, or override with your own calibration) + settings_path: str = str( + _MODULE_DIR / "result" / "share" / "orbslam3" / "config" / "RealSense_D435i.yaml" + ) # Vocabulary path (None = use compiled-in default from nix build) vocab_path: str | None = None - # settings is not a CLI arg (settings_path is) - cli_exclude: frozenset[str] = frozenset({"settings"}) - - def model_post_init(self, __context: object) -> None: - _CACHE_DIR.mkdir(parents=True, exist_ok=True) - if self.cwd is None: - self.cwd = str(_CACHE_DIR) - super().model_post_init(__context) - if self.settings_path is None: - if self.settings.is_absolute(): - self.settings_path = str(self.settings) - else: - nix_config = _CACHE_DIR / "result" / "share" / "orbslam3" / "config" - self.settings_path = str(nix_config / self.settings) - class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): """ORB-SLAM3 visual SLAM module. @@ -106,6 +98,7 @@ class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): __all__ = [ "OrbSlam3", "OrbSlam3Config", + "SensorMode", "orbslam3_module", ] From 64081da85d9eec016b6305705c3fe4e25b0a119d Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Wed, 18 Mar 2026 17:35:49 +0800 Subject: [PATCH 6/8] wrap --- dimos/hardware/sensors/camera/webcam.py | 6 +++++- dimos/perception/slam/orbslam3/module.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dimos/hardware/sensors/camera/webcam.py b/dimos/hardware/sensors/camera/webcam.py index 3f3b047ba0..61717b699e 100644 --- a/dimos/hardware/sensors/camera/webcam.py +++ b/dimos/hardware/sensors/camera/webcam.py @@ -166,6 +166,10 @@ def _capture_loop(self) -> None: @property def camera_info(self) -> CameraInfo: - return self.config.camera_info + info = self.config.camera_info + if info.width == 0 or info.height == 0: + info.width = self.config.width + info.height = self.config.height + return info def emit(self, image: Image) -> None: ... diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index 34246514b9..a18c665804 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -68,8 +68,8 @@ class OrbSlam3Config(NativeModuleConfig): use_viewer: bool = False # Frame IDs for output messages - frame_id: str = "map" - child_frame_id: str = "camera" + frame_id: str = "world" + child_frame_id: str = "camera_link" # Camera settings YAML (absolute path, or override with your own calibration) settings_path: str = str( From b6a0279c2abe2be5e9e07ae6e329fd4dc13fee8a Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Wed, 18 Mar 2026 22:44:13 +0800 Subject: [PATCH 7/8] exposing extra odbslam3 topics --- dimos/perception/slam/orbslam3/module.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dimos/perception/slam/orbslam3/module.py b/dimos/perception/slam/orbslam3/module.py index a18c665804..3a2064bbbe 100644 --- a/dimos/perception/slam/orbslam3/module.py +++ b/dimos/perception/slam/orbslam3/module.py @@ -37,7 +37,9 @@ from dimos.core.native_module import NativeModule, NativeModuleConfig from dimos.core.stream import In, Out from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.nav_msgs.Path import Path as NavPath from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.spec import perception _MODULE_DIR = Path(__file__).parent @@ -58,7 +60,7 @@ class OrbSlam3Config(NativeModuleConfig): cwd: str | None = str(_MODULE_DIR) executable: str = "result/bin/orbslam3_native" build_command: str | None = ( - "nix build github:dimensionalOS/dimos-orb-slam3/v0.1.0 --no-write-lock-file" + "nix build github:dimensionalOS/dimos-orb-slam3/v0.2.0 --no-write-lock-file" ) # ORB-SLAM3 sensor mode @@ -79,6 +81,9 @@ class OrbSlam3Config(NativeModuleConfig): # Vocabulary path (None = use compiled-in default from nix build) vocab_path: str | None = None + # How often to publish map points and keyframe path (every N frames) + map_publish_interval: int = 10 + class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): """ORB-SLAM3 visual SLAM module. @@ -86,11 +91,17 @@ class OrbSlam3(NativeModule[OrbSlam3Config], perception.Odometry): Ports: color_image (In[Image]): Camera frames to track. odometry (Out[Odometry]): Camera pose as nav_msgs.Odometry. + keypoints_image (Out[Image]): Color image with tracked keypoints overlay. + map_points (Out[PointCloud2]): Sparse 3D map points. + keyframe_path (Out[NavPath]): Keyframe pose graph as a path. """ default_config = OrbSlam3Config color_image: In[Image] odometry: Out[Odometry] + keypoints_image: Out[Image] + map_points: Out[PointCloud2] + keyframe_path: Out[NavPath] orbslam3_module = OrbSlam3.blueprint From b61ae938fffbdefbeb9918b42d26938305851c8e Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 26 Mar 2026 22:11:36 +0800 Subject: [PATCH 8/8] docs(orbslam3): add README with known tf/reconstruction issue Documents the known transform mismatch where reconstructed trajectory diverges from ground-truth poses. Needs investigation. Co-Authored-By: Claude Opus 4.6 (1M context) --- dimos/perception/slam/orbslam3/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 dimos/perception/slam/orbslam3/README.md diff --git a/dimos/perception/slam/orbslam3/README.md b/dimos/perception/slam/orbslam3/README.md new file mode 100644 index 0000000000..a38fe700e8 --- /dev/null +++ b/dimos/perception/slam/orbslam3/README.md @@ -0,0 +1,9 @@ +# ORB-SLAM3 Native Module + +Visual SLAM module wrapping [ORB-SLAM3](https://github.com/UZ-SLAMLab/ORB_SLAM3) as a native subprocess. + +The C++ binary lives in a separate GPL-3.0 repo ([dimos-orb-slam3](https://github.com/dimensionalOS/dimos-orb-slam3)) and is pulled in at build time via Nix. + +## Known Issues + +- **Transform / trajectory reconstruction mismatch**: The reconstructed trajectory does not match ground-truth poses. There is a suspected coordinate-frame or transform-composition issue causing output to diverge from base truth. Needs investigation.