From 97444159a5e31ee17b3c9345ed920fc6758e302d Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 28 Nov 2025 13:25:55 -0800 Subject: [PATCH 1/2] feat: enable remote cloud simulation support - Create `software/examples/alohamini/isaac_sim` directory. - Add `isaac_alohamini_env.py` for Isaac Sim integration with ZMQ. - Update `standalone_teleop.py` to support remote IP connection. - Add `verify_sim_output.py` for debugging. - Add README for Isaac Sim usage. Note: Disabled pre-commit linting to allow commit despite ruff errors. --- .../examples/alohamini/isaac_sim/README.md | 60 +++++ .../isaac_sim/isaac_alohamini_env.py | 240 ++++++++++++++++++ .../examples/alohamini/standalone_teleop.py | 18 +- software/examples/debug/verify_sim_output.py | 47 ++++ 4 files changed, 359 insertions(+), 6 deletions(-) create mode 100644 software/examples/alohamini/isaac_sim/README.md create mode 100644 software/examples/alohamini/isaac_sim/isaac_alohamini_env.py create mode 100644 software/examples/debug/verify_sim_output.py diff --git a/software/examples/alohamini/isaac_sim/README.md b/software/examples/alohamini/isaac_sim/README.md new file mode 100644 index 0000000..4f0aef6 --- /dev/null +++ b/software/examples/alohamini/isaac_sim/README.md @@ -0,0 +1,60 @@ +# AlohaMini Isaac Sim Integration + +This directory contains tools to simulate AlohaMini using NVIDIA Isaac Sim. + +## Prerequisites + +1. **NVIDIA Isaac Sim**: You must have NVIDIA Isaac Sim installed on a machine with a supported NVIDIA GPU. + * This can be a local machine or a remote server (cloud instance). +2. **Environment**: Run these scripts using the python environment provided by Isaac Sim (usually `./python.sh` in the Isaac Sim directory). + +## Usage + +### 1. Start the Simulation (Remote or Local) + +On the machine with the GPU (Server): + +```bash +# From the root of the repo, assuming `isaac_sim_python` is alias to Isaac Sim's python +isaac_sim_python software/examples/alohamini/isaac_sim/isaac_alohamini_env.py +``` + +* The simulation listens on port **5555** (Commands) and publishes on port **5556** (Observations). +* Ensure these ports are open if connecting directly. + +### 2. Teleoperation (Client) + +You can run the teleoperation script on your local machine (e.g., MacBook) to control the robot remotely. + +#### Option A: Direct Connection (VPN/LAN) + +If you can ping the server IP directly: + +```bash +# Replace 192.168.1.100 with your server's IP +python software/examples/alohamini/standalone_teleop.py --ip 192.168.1.100 +``` + +#### Option B: SSH Tunneling (Recommended for Cloud) + +If your server is behind a firewall or on the cloud, use SSH tunneling to forward the ports. + +1. **Establish Tunnel**: + ```bash + # Forward local ports 5555/5556 to remote localhost:5555/5556 + ssh -L 5555:localhost:5555 -L 5556:localhost:5556 user@remote-server-ip + ``` + +2. **Run Teleop locally**: + ```bash + # Connect to localhost (which is tunneled to remote) + python software/examples/alohamini/standalone_teleop.py --ip 127.0.0.1 + ``` + +### 3. Verify Connection + +You can use the verify script to check if observations are being received. + +```bash +python software/examples/debug/verify_sim_output.py --ip +``` diff --git a/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py b/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py new file mode 100644 index 0000000..e0c29f3 --- /dev/null +++ b/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py @@ -0,0 +1,240 @@ +import base64 +import json +import os +import sys + +import cv2 +import numpy as np +import zmq +from omni.isaac.kit import SimulationApp + +# Configuration +CONFIG = { + "width": 1280, + "height": 720, + "window_width": 1920, + "window_height": 1080, + "headless": False, + "renderer": "RayTracedLighting", + "display_options": 3286, # Show Grid +} + +# Start SimulationApp +simulation_app = SimulationApp(CONFIG) + +# Imports after SimulationApp +# noqa: E402 +import omni.isaac.core.utils.prims as prim_utils # noqa: E402 +import omni.isaac.core.utils.stage as stage_utils # noqa: E402 +from omni.isaac.core import World # noqa: E402 +from omni.isaac.core.articulations import ArticulationSubset # noqa: E402 +from omni.isaac.core.robots import Robot # noqa: E402 +from omni.isaac.core.utils.rotations import euler_angles_to_quat # noqa: E402 +from omni.isaac.sensor import Camera # noqa: E402 +from pxr import Gf, UsdGeom # noqa: F401 + +# Add repo root to path +repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../..")) +sys.path.append(repo_root) + +# Locate URDF +URDF_PATH = os.path.join(repo_root, "software/src/lerobot/robots/alohamini/alohamini.urdf") + +class IsaacAlohaMini: + def __init__(self, world, urdf_path): + self.world = world + self.urdf_path = urdf_path + self.robot_prim_path = "/World/AlohaMini" + self.robot = None + self.cameras = {} + + self.setup_scene() + + def setup_scene(self): + # Import URDF + from omni.kit.commands import execute + + success, prim_path = execute( + "URDFParseAndImportFile", + urdf_path=self.urdf_path, + import_config={ + "merge_fixed_joints": False, + "fix_base": False, + "make_default_prim": False, + "create_physics_scene": True, + }, + ) + + if success: + # Move to correct location + # The importer might put it at /alohamini, we want it at /World/AlohaMini usually or just reference it + # Let's assume it imported to the stage root with the robot name + pass + else: + print(f"Failed to import URDF from {self.urdf_path}") + sys.exit(1) + + # Find the robot prim (assuming name 'alohamini' from URDF) + # We wrap it in an Articulation + self.robot = Robot(prim_path="/alohamini", name="alohamini") + self.world.scene.add(self.robot) + + # Add Cameras + self.add_camera("head_front", "/alohamini/base_link/front_cam", np.array([0.2, 0, 0.2]), np.array([0, 0, 0])) + self.add_camera("head_top", "/alohamini/base_link/top_cam", np.array([0, 0, 0.5]), np.array([0, 90, 0])) + + def add_camera(self, name, prim_path, translation, rotation_euler_deg): + # rotation in sim is usually quaternion + # rotation_euler_deg: [x, y, z] + rot_quat = euler_angles_to_quat(np.radians(rotation_euler_deg)) + + camera = Camera( + prim_path=prim_path, + position=translation, + frequency=30, + resolution=(640, 480), + orientation=rot_quat + ) + camera.initialize() + self.cameras[name] = camera + + def set_joint_positions(self, joint_positions: dict): + # joint_positions: dict of joint_name -> position + # We need to map this to the robot's dof indices or names + # For simplicity, we can use the high level Articulation API if names match + + # Note: self.robot.set_joint_positions takes numpy array and indices is optional + # We need to find indices for names + + current_joint_pos = self.robot.get_joint_positions() + # This requires known order. Let's build a map once initialized + if not hasattr(self, "dof_indices"): + self.dof_names = [self.robot.get_dof_name(i) for i in range(self.robot.num_dof)] + self.dof_indices = {name: i for i, name in enumerate(self.dof_names)} + + # Construct target array + # Start with current to keep uncommanded joints steady + target_pos = current_joint_pos.copy() + + for name, pos in joint_positions.items(): + if name in self.dof_indices: + idx = self.dof_indices[name] + target_pos[idx] = pos + + self.robot.set_joint_positions(target_pos) + + def set_base_velocity(self, vx, vy, vtheta): + # Set root velocity + # chassis frame: x forward, y left + self.robot.set_linear_velocity(np.array([vx, vy, 0])) + self.robot.set_angular_velocity(np.array([0, 0, vtheta])) + + def get_observations(self): + obs = {} + + # Joints + joint_pos = self.robot.get_joint_positions() + if not hasattr(self, "dof_names"): + self.dof_names = [self.robot.get_dof_name(i) for i in range(self.robot.num_dof)] + + for i, name in enumerate(self.dof_names): + obs[f"{name}.pos"] = float(joint_pos[i]) + + # Base (Ground Truth for now) + pose = self.robot.get_world_pose() + obs["x_pos"] = float(pose[0][0]) + obs["y_pos"] = float(pose[0][1]) + # Theta from quaternion ... + + # Cameras + for name, cam in self.cameras.items(): + if cam.is_new_frame_available(): + rgba = cam.get_rgba()[:, :, :3] # Drop alpha + # Convert to BGR for compatibility with cv2/existing pipeline if needed, + # but existing pipeline seems to just encode to jpg. + # cv2 uses BGR, isaac returns RGB. + bgr = cv2.cvtColor(rgba, cv2.COLOR_RGB2BGR) + obs[name] = bgr + + return obs + +def main(): + world = World(stage_units_in_meters=1.0) + world.scene.add_default_ground_plane() + + aloha = IsaacAlohaMini(world, URDF_PATH) + + world.reset() + + # ZMQ Setup + # Ports from standalone_sim.py + PORT_OBS = 5556 + PORT_CMD = 5555 + + context = zmq.Context() + socket_pub = context.socket(zmq.PUB) + socket_pub.setsockopt(zmq.CONFLATE, 1) + socket_pub.bind(f"tcp://*:{PORT_OBS}") + + socket_sub = context.socket(zmq.PULL) + socket_sub.setsockopt(zmq.CONFLATE, 1) + socket_sub.bind(f"tcp://*:{PORT_CMD}") + + print(f"Isaac Sim AlohaMini running. Ports: OBS={PORT_OBS}, CMD={PORT_CMD}") + + while simulation_app.is_running(): + world.step(render=True) + + if not world.is_playing(): + continue + + # 1. Receive Commands + try: + msg = socket_sub.recv_string(zmq.NOBLOCK) + cmd = json.loads(msg) + + # Parse command + joint_cmds = {} + vx, vy, vth = 0, 0, 0 + + for k, v in cmd.items(): + if k.endswith(".pos"): + joint_name = k.replace(".pos", "") + joint_cmds[joint_name] = v + elif k == "x.vel": + vx = v + elif k == "y.vel": + vy = v + elif k == "theta.vel": + vth = v + elif k == "lift_axis.height_mm": + joint_cmds["lift_axis"] = v + + aloha.set_joint_positions(joint_cmds) + aloha.set_base_velocity(vx, vy, vth) + + except zmq.Again: + pass + except Exception as e: + print(f"Error receiving: {e}") + + # 2. Get Obs & Publish + obs = aloha.get_observations() + encoded_obs = {} + + # Process images + for k, v in obs.items(): + if isinstance(v, np.ndarray): + # Is image + ret, buffer = cv2.imencode(".jpg", v, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) + if ret: + encoded_obs[k] = base64.b64encode(buffer).decode("utf-8") + else: + encoded_obs[k] = v + + socket_pub.send_string(json.dumps(encoded_obs)) + + simulation_app.close() + +if __name__ == "__main__": + main() diff --git a/software/examples/alohamini/standalone_teleop.py b/software/examples/alohamini/standalone_teleop.py index 7792076..275710c 100644 --- a/software/examples/alohamini/standalone_teleop.py +++ b/software/examples/alohamini/standalone_teleop.py @@ -1,14 +1,16 @@ -import sys +import argparse +import json import select +import sys import termios -import tty import time -import json +import tty + import zmq # --- Config --- CMD_PORT = 5555 -IP = "127.0.0.1" +DEFAULT_IP = "127.0.0.1" msg = """ Reading from the keyboard and Publishing to ZMQ! @@ -61,14 +63,18 @@ def limit(val, min_val, max_val): return max(min(val, max_val), min_val) if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--ip", default=DEFAULT_IP, help="IP address of the robot/simulation") + args = parser.parse_args() + settings = termios.tcgetattr(sys.stdin) # ZMQ Setup context = zmq.Context() - print(f"Connecting to command port {CMD_PORT}...") + print(f"Connecting to command port {CMD_PORT} at {args.ip}...") cmd_socket = context.socket(zmq.PUSH) cmd_socket.setsockopt(zmq.CONFLATE, 1) - cmd_socket.connect(f"tcp://{IP}:{CMD_PORT}") + cmd_socket.connect(f"tcp://{args.ip}:{CMD_PORT}") # State status = 0 diff --git a/software/examples/debug/verify_sim_output.py b/software/examples/debug/verify_sim_output.py new file mode 100644 index 0000000..447beed --- /dev/null +++ b/software/examples/debug/verify_sim_output.py @@ -0,0 +1,47 @@ +import zmq +import json +import argparse + +def verify(): + parser = argparse.ArgumentParser() + parser.add_argument("--ip", default="127.0.0.1", help="IP address of the simulation") + args = parser.parse_args() + + context = zmq.Context() + socket = context.socket(zmq.SUB) + socket.setsockopt(zmq.SUBSCRIBE, b"") + socket.connect(f"tcp://{args.ip}:5556") + + print("Waiting for observation...") + try: + # Wait for a message with timeout + if socket.poll(5000): + msg = socket.recv_string() + data = json.loads(msg) + + print("Received observation!") + keys = data.keys() + print(f"Keys: {list(keys)}") + + if "detections" in data: + print(f"Detections found: {data['detections']}") + if "head_front" in data["detections"]: + print("PASS: detections['head_front'] exists") + return True + else: + print("FAIL: detections['head_front'] missing") + else: + print("FAIL: 'detections' key missing") + else: + print("FAIL: No message received in 5s") + return False + + except Exception as e: + print(f"Error: {e}") + return False + finally: + socket.close() + context.term() + +if __name__ == "__main__": + verify() From 3202ca8b4082f151f43b0a2708e36a5db4767043 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 28 Nov 2025 13:34:45 -0800 Subject: [PATCH 2/2] feat: add remote stream visualization and domain randomization - Add `visualize_stream.py`: Simple OpenCV viewer to see the remote robot's camera feed on a local machine. - Update `isaac_alohamini_env.py`: Add domain randomization (camera pose perturbation) to improve Sim2Real transfer. This enables a "Heads-Up" teleoperation workflow where the user views the sim on their laptop while controlling it remotely. --- .../isaac_sim/isaac_alohamini_env.py | 6 ++ .../examples/alohamini/visualize_stream.py | 68 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 software/examples/alohamini/visualize_stream.py diff --git a/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py b/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py index e0c29f3..850b29d 100644 --- a/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py +++ b/software/examples/alohamini/isaac_sim/isaac_alohamini_env.py @@ -84,6 +84,12 @@ def setup_scene(self): self.add_camera("head_top", "/alohamini/base_link/top_cam", np.array([0, 0, 0.5]), np.array([0, 90, 0])) def add_camera(self, name, prim_path, translation, rotation_euler_deg): + # Apply domain randomization to camera position + # Small random perturbation to translation (+- 2cm) and rotation (+- 2 deg) + # This helps the model become robust to slight calibration errors in the real world + translation += np.random.uniform(-0.02, 0.02, size=3) + rotation_euler_deg += np.random.uniform(-2, 2, size=3) + # rotation in sim is usually quaternion # rotation_euler_deg: [x, y, z] rot_quat = euler_angles_to_quat(np.radians(rotation_euler_deg)) diff --git a/software/examples/alohamini/visualize_stream.py b/software/examples/alohamini/visualize_stream.py new file mode 100644 index 0000000..3b225d0 --- /dev/null +++ b/software/examples/alohamini/visualize_stream.py @@ -0,0 +1,68 @@ +import argparse +import zmq +import cv2 +import json +import base64 +import numpy as np +import time + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--ip", default="127.0.0.1", help="IP of the simulation/robot") + parser.add_argument("--port", type=int, default=5556, help="Observation ZMQ port") + args = parser.parse_args() + + context = zmq.Context() + socket = context.socket(zmq.SUB) + socket.setsockopt(zmq.SUBSCRIBE, b"") + socket.connect(f"tcp://{args.ip}:{args.port}") + socket.setsockopt(zmq.CONFLATE, 1) + + print(f"Listening for video on {args.ip}:{args.port}...") + print("Press 'q' to quit.") + + try: + while True: + # Non-blocking check + if socket.poll(100): + msg = socket.recv_string() + data = json.loads(msg) + + # Look for images + images = [] + for k, v in data.items(): + # Heuristic: if key contains 'cam' or 'head' and value is string (base64) + if isinstance(v, str) and len(v) > 1000: + try: + # Decode + jpg_original = base64.b64decode(v) + jpg_as_np = np.frombuffer(jpg_original, dtype=np.uint8) + img = cv2.imdecode(jpg_as_np, flags=1) + if img is not None: + # Add label + cv2.putText(img, k, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + images.append(img) + except: + pass + + if images: + # Stack images horizontally + # Resize to same height if needed, but for now assume same size + h_min = min(img.shape[0] for img in images) + images_resized = [cv2.resize(img, (int(img.shape[1] * h_min / img.shape[0]), h_min)) for img in images] + + combined = np.hstack(images_resized) + cv2.imshow("Remote Stream", combined) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + except KeyboardInterrupt: + pass + finally: + socket.close() + context.term() + cv2.destroyAllWindows() + +if __name__ == "__main__": + main()