diff --git a/README.md b/README.md index 13b508a..fe1612a 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,9 @@ Below is a list of all tutorials available in this repository: - 🔗 [Related Blog Post](https://foxglove.dev/blog/converting-the-lafan1-retargeting-dataset-to-mcap) - 🎥 [Video](https://youtu.be/YlAblmWLVqs) - 📊 [Visualize](https://app.foxglove.dev/~/view?ds=foxglove-sample-stream&ds.recordingId=rec_0dVfPhEze7PkjHHi&layoutId=lay_0dVfPwEqAQ5JMmle) +### [Visualize Open X-Embodiment dataset](datasets/open_x_embodiment/README.md) +- 📝 Use Foxglove SDK to visualize berkeley_autolab_ur5 dataset +- 🔗 [Related Blog Post](https://foxglove.dev/blog/foxglove-open-x-embodiment-visualization) ### [SubPipe Dataset to MCAP](datasets/subpipe_mcap/README.md) - 📝 The underwater dataset, containing SLAM, object detection and image segmentation - 🎥 [Video](https://youtu.be/jJej6aT1jKg) diff --git a/datasets/open_x_embodiment/README.md b/datasets/open_x_embodiment/README.md new file mode 100644 index 0000000..a25317a --- /dev/null +++ b/datasets/open_x_embodiment/README.md @@ -0,0 +1,25 @@ +--- +title: "Visualize Open X-Embodiment dataset" +blog_post_url: "https://foxglove.dev/blog/foxglove-open-x-embodiment-visualization" +short_description: "Use Foxglove SDK to visualize berkeley_autolab_ur5 dataset" +--- +# Visualizing Open X-Embodiment datasets in Foxglove + +These are the tutorial files that show how you can visualize Open-X Embodiment dataset in Foxglove. + +To execute the script, you will need to install the following Python dependencies: + +``` +tensorflow +gcsfs +foxglove-sdk +tensorflow_datasets +``` + +We include a layout `foxglove_tutorials/open_x_embodiment/layouts/berkeley_autloab_ur5_layout.json` that you can use to view all the data that we included in the tutorial. + +## Running the script + +1. Run the Python script: `python3 visualize_berkeley_autolab_ur5.py` +2. In Foxglove, open a remote connection to `ws://localhost:8765` +3. Arrange panels to visualize the data, or use the layout included in this tutorial: `open_x_embodiment/layouts/berkeley_autloab_ur5_layout.json` diff --git a/datasets/open_x_embodiment/layouts/berkeley_autloab_ur5_layout.json b/datasets/open_x_embodiment/layouts/berkeley_autloab_ur5_layout.json new file mode 100644 index 0000000..287fba2 --- /dev/null +++ b/datasets/open_x_embodiment/layouts/berkeley_autloab_ur5_layout.json @@ -0,0 +1,307 @@ +{ + "configById": { + "3D!1dyg4vd": { + "cameraState": { + "perspective": true, + "distance": 2.4417309747357656, + "phi": 58.152958152968374, + "thetaOffset": 125.34632034632459, + "targetOffset": [ + 0.10276355966644965, + 0.23322023388093666, + 9.096916080494186e-17 + ], + "target": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": { + "grid": { + "visible": true, + "frameLocked": true, + "label": "Grid", + "instanceId": "5516aa3b-d32f-499c-a57c-d6ea3ba1930c", + "layerId": "foxglove.Grid", + "size": 10, + "divisions": 10, + "lineWidth": 1, + "color": "#248eff", + "position": [ + 0, + 0, + 0 + ], + "rotation": [ + 0, + 0, + 0 + ] + } + }, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": {} + }, + "Image!3fv1ay8": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/image" + } + }, + "Image!37c1240": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/image_with_depth" + } + }, + "Image!3lzq9ud": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/hand_image" + } + }, + "Plot!3o8fstj": { + "paths": [ + { + "value": "/gripper_state.value", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#f5774d" + } + ], + "showXAxisLabels": true, + "showYAxisLabels": true, + "showLegend": true, + "legendDisplay": "floating", + "showPlotValuesInLegend": false, + "isSynced": true, + "xAxisVal": "timestamp", + "sidebarDimension": 240 + }, + "Plot!3ues8i": { + "paths": [ + { + "value": "/joint_state.joint5", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#4e98e2" + }, + { + "value": "/joint_state.joint0", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#f5774d" + }, + { + "value": "/joint_state.joint1", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#f7df71" + }, + { + "value": "/joint_state.joint2", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#5cd6a9" + }, + { + "value": "/joint_state.joint3", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#61cbff" + }, + { + "value": "/joint_state.joint4", + "enabled": true, + "timestampMethod": "receiveTime", + "color": "#a395e2" + } + ], + "showXAxisLabels": true, + "showYAxisLabels": true, + "showLegend": true, + "legendDisplay": "floating", + "showPlotValuesInLegend": false, + "isSynced": true, + "xAxisVal": "timestamp", + "sidebarDimension": 240 + }, + "RawMessages!26x5ilc": { + "diffEnabled": false, + "diffMethod": "custom", + "diffTopicPath": "", + "showFullMessageForDiff": false, + "topicPath": "/natural_language_instruction.text", + "fontSize": 30 + } + }, + "globalVariables": {}, + "userNodes": {}, + "playbackConfig": { + "speed": 1 + }, + "layout": { + "first": { + "first": "3D!1dyg4vd", + "second": { + "first": "Image!3fv1ay8", + "second": "Image!37c1240", + "direction": "row" + }, + "direction": "row" + }, + "second": { + "first": { + "first": "Image!3lzq9ud", + "second": "Plot!3o8fstj", + "direction": "column", + "splitPercentage": 66.08187134502924 + }, + "second": { + "first": "Plot!3ues8i", + "second": "RawMessages!26x5ilc", + "direction": "column", + "splitPercentage": 82.62322472848788 + }, + "direction": "row" + }, + "direction": "column", + "splitPercentage": 41.10275689223057 + } +} \ No newline at end of file diff --git a/datasets/open_x_embodiment/visualize_berkeley_autolab_ur5.py b/datasets/open_x_embodiment/visualize_berkeley_autolab_ur5.py new file mode 100644 index 0000000..8b77c11 --- /dev/null +++ b/datasets/open_x_embodiment/visualize_berkeley_autolab_ur5.py @@ -0,0 +1,191 @@ +import tensorflow_datasets as tfds +import foxglove +from foxglove import Channel +from foxglove.schemas import ( + RawImage, + FrameTransform, + Vector3, + Quaternion, +) +from foxglove.channels import ( + RawImageChannel, + FrameTransformChannel, +) +import time + +DATASET = "berkeley_autolab_ur5" +TARGET_EPISODE = 341 +CONTROL_RATE_HZ = 5 # Depends on the dataset! + + +def dataset2path(dataset_name): + if dataset_name == "robo_net": + version = "1.0.0" + elif dataset_name == "language_table": + version = "0.0.1" + else: + version = "0.1.0" + return f"gs://gresearch/robotics/{dataset_name}/{version}" + + +def print_step_info(step): + print(f"Step {i}:") + print(f" image shape: {step['observation']['image'].shape}") + print(f" hand_image shape: {step['observation']['hand_image'].shape}") + print(f" image_with_depth shape: {step['observation']['image_with_depth'].shape}") + print( + f" natural language instruction: {step['observation']['natural_language_instruction']}" + ) + print(f" Action rotation delta: {step['action']['rotation_delta']}") + print(f" Action world vector: {step['action']['world_vector']}") + print(f" Robot state: {step['observation']['robot_state']}") + +filename = f"{DATASET}_episode_{TARGET_EPISODE}.mcap" +# writer = foxglove.open_mcap(filename) +server = foxglove.start_server() + + +b = tfds.builder_from_directory(builder_dir=dataset2path(DATASET)) +ds = b.as_dataset(split="train[{}:{}]".format(TARGET_EPISODE, TARGET_EPISODE + 1)) +episode = next(iter(ds)) + +print("Successfully loaded the dataset: ", DATASET) + +assert "steps" in episode, "The dataset does not contain 'steps' key." +print(f"Number of steps in the episode: {len(episode['steps'])}") + + +language_instruction_schema = { + "type": "object", + "properties": { + "text": { + "type": "string", + }, + }, +} +float_schema = { + "type": "object", + "properties": { + "value": { + "type": "number", + "format": "float", + }, + }, +} +joint_state_schema = { + "type": "object", + "properties": { + "joint0": {"type": "number", "format": "float"}, + "joint1": {"type": "number", "format": "float"}, + "joint2": {"type": "number", "format": "float"}, + "joint3": {"type": "number", "format": "float"}, + "joint4": {"type": "number", "format": "float"}, + "joint5": {"type": "number", "format": "float"}, + }, +} + +language_instruction_chan = Channel( + topic="/natural_language_instruction", schema=(language_instruction_schema) +) + +image_chan = RawImageChannel(topic="/image") +hand_image_chan = RawImageChannel(topic="/hand_image") +image_with_depth_chan = RawImageChannel(topic="/image_with_depth") +transform_chan = FrameTransformChannel(topic="/tf") +gripper_chan = Channel( + topic="/gripper_state", + schema=(float_schema) +) +joint_state_chan = Channel( + topic="/joint_state", + schema=(joint_state_schema) +) + +try: + while True: # Uncomment this line to run indefinitely + for i, step in enumerate(episode["steps"]): + print_step_info(step) + + # Publish the natural language instruction + instruction_str = ( + step["observation"]["natural_language_instruction"] + .numpy() + .decode("utf-8") + ) + instruction_msg = {"text": instruction_str} + language_instruction_chan.log(instruction_msg) + + # Publish the image + image_msg = RawImage( + data=step["observation"]["image"].numpy().tobytes(), + width=step["observation"]["image"].shape[1], + height=step["observation"]["image"].shape[0], + step=step["observation"]["image"].shape[1] * 3, # Assuming RGB image + encoding="rgb8", + ) + image_chan.log(image_msg) + + # Publish the hand image + hand_image_msg = RawImage( + data=step["observation"]["hand_image"].numpy().tobytes(), + width=step["observation"]["hand_image"].shape[1], + height=step["observation"]["hand_image"].shape[0], + step=step["observation"]["hand_image"].shape[1] * 3, # Assuming RGB image + encoding="rgb8", + ) + hand_image_chan.log(hand_image_msg) + + # Publish the image with depth + image_with_depth_msg = RawImage( + data=step["observation"]["image_with_depth"].numpy().tobytes(), + width=step["observation"]["image_with_depth"].shape[1], + height=step["observation"]["image_with_depth"].shape[0], + step=step["observation"]["image_with_depth"].shape[1] * 4, # Assuming 32FC1 image + encoding="32FC1", + ) + image_with_depth_chan.log(image_with_depth_msg) + + + # Publish the end-effector transform + robot_state = step["observation"]["robot_state"].numpy() + transform_msg = FrameTransform( + parent_frame_id="robot_base", + child_frame_id="end_effector", + translation=Vector3( + x=float(robot_state[6]), + y=float(robot_state[7]), + z=float(robot_state[8]), + ), + rotation=Quaternion( + x=float(robot_state[9]), + y=float(robot_state[10]), + z=float(robot_state[11]), + w=float(robot_state[12]), + ) + ) + transform_chan.log(transform_msg) + + # Publish the gripper state + gripper_msg = {"value": float(robot_state[13])} + gripper_chan.log(gripper_msg) + + # Publish the joint state + joint_state_msg = { + "joint0": float(robot_state[0]), + "joint1": float(robot_state[1]), + "joint2": float(robot_state[2]), + "joint3": float(robot_state[3]), + "joint4": float(robot_state[4]), + "joint5": float(robot_state[5]), + } + joint_state_chan.log(joint_state_msg) + + time.sleep(1 / CONTROL_RATE_HZ) + +except KeyboardInterrupt: + print("Keyboard interrupt received. Will stop the server.") +finally: + server.stop() + print("Server stopped.") + # writer.close() + print("MCAP file closed.")