From 5d1a85b0fabefd22ea58262c883c3b9fb61b4380 Mon Sep 17 00:00:00 2001
From: Ryan Co <copierceryan@gmail.com>
Date: Mon, 9 Mar 2026 23:15:46 -0400
Subject: [PATCH] pi zarr training configs

---
 egomimic/algo/pi.py                           |  22 ++--
 .../hydra/launcher/submitit_pace.yaml         |   2 +-
 .../model/{pi0.5.yaml => pi0.5_bc_aria.yaml}  |   2 +-
 .../hydra_configs/model/pi0.5_bc_eva.yaml     |  56 +++++++++
 .../hydra_configs/model/pi0.5_bc_mecka.yaml   |  56 +++++++++
 .../hydra_configs/model/pi0.5_bc_scale.yaml   |  56 +++++++++
 .../model/pi0.5_cotrain_eva_aria.yaml         |  64 ++++++++++
 egomimic/hydra_configs/train_zarr_pi.yaml     | 114 ++++++++++++++++++
 egomimic/utils/aws/aws_data_utils.py          |  15 ++-
 9 files changed, 374 insertions(+), 13 deletions(-)
 rename egomimic/hydra_configs/model/{pi0.5.yaml => pi0.5_bc_aria.yaml} (90%)
 create mode 100644 egomimic/hydra_configs/model/pi0.5_bc_eva.yaml
 create mode 100644 egomimic/hydra_configs/model/pi0.5_bc_mecka.yaml
 create mode 100644 egomimic/hydra_configs/model/pi0.5_bc_scale.yaml
 create mode 100644 egomimic/hydra_configs/model/pi0.5_cotrain_eva_aria.yaml
 create mode 100644 egomimic/hydra_configs/train_zarr_pi.yaml

diff --git a/egomimic/algo/pi.py b/egomimic/algo/pi.py
index f28158bb..27fef0d7 100644
--- a/egomimic/algo/pi.py
+++ b/egomimic/algo/pi.py
@@ -86,19 +86,19 @@ def __init__(
             self.camera_keys[embodiment_id] = []
             self.proprio_keys[embodiment_id] = []
             self.lang_keys[embodiment_id] = []
-            for key in data_schematic.keys_of_type("action_keys"):
+            for key in data_schematic.keys_of_type("action_keys", embodiment_id):
                 if (
                     data_schematic.is_key_with_embodiment(key, embodiment_id)
                     and key == self.ac_keys[embodiment]
                 ):
                     self.ac_keys[embodiment_id] = key
-            for key in data_schematic.keys_of_type("camera_keys"):
+            for key in data_schematic.keys_of_type("camera_keys", embodiment_id):
                 if data_schematic.is_key_with_embodiment(key, embodiment_id):
                     self.camera_keys[embodiment_id].append(key)
-            for key in data_schematic.keys_of_type("proprio_keys"):
+            for key in data_schematic.keys_of_type("proprio_keys", embodiment_id):
                 if data_schematic.is_key_with_embodiment(key, embodiment_id):
                     self.proprio_keys[embodiment_id].append(key)
-            for key in data_schematic.keys_of_type("lang_keys"):
+            for key in data_schematic.keys_of_type("lang_keys", embodiment_id):
                 if data_schematic.is_key_with_embodiment(key, embodiment_id):
                     self.lang_keys[embodiment_id].append(key)
 
@@ -165,12 +165,11 @@ def process_batch_for_training(self, batch):
         """
         processed_batch = {}
 
-        for embodiment_id, _batch in batch.items():
+        for embodiment_name, _batch in batch.items():
+            embodiment_id = get_embodiment_id(embodiment_name)
             processed_batch[embodiment_id] = {}
             for key, value in _batch.items():
-                key_name = self.data_schematic.lerobot_key_to_keyname(
-                    key, embodiment_id
-                )
+                key_name = self.data_schematic.zarr_key_to_keyname(key, embodiment_id)
                 if key_name is not None:
                     processed_batch[embodiment_id][key_name] = value
 
@@ -196,6 +195,13 @@ def process_batch_for_training(self, batch):
             processed_batch[embodiment_id] = self.data_schematic.normalize_data(
                 processed_batch[embodiment_id], embodiment_id
             )
+            processed_batch[embodiment_id]["embodiment"] = torch.tensor(
+                [embodiment_id], device=device, dtype=torch.int64
+            )
+
+            for key, value in processed_batch[embodiment_id].items():
+                if isinstance(value, torch.Tensor) and value.dtype == torch.float64:
+                    processed_batch[embodiment_id][key] = value.float()
 
         if not processed_batch:
             raise ValueError(
diff --git a/egomimic/hydra_configs/hydra/launcher/submitit_pace.yaml b/egomimic/hydra_configs/hydra/launcher/submitit_pace.yaml
index 2d9cd957..34a1294b 100644
--- a/egomimic/hydra_configs/hydra/launcher/submitit_pace.yaml
+++ b/egomimic/hydra_configs/hydra/launcher/submitit_pace.yaml
@@ -11,7 +11,7 @@ cpus_per_task: 8                                   # Number of CPUs per task (ma
 nodes: ${launch_params.nodes}                       # Number of nodes
 tasks_per_node: ${launch_params.gpus_per_node}      # Use variable for tasks per node
 gres: "gpu:h200:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count (h100 for H100 GPUs)
-qos: "short"                                        # Slurm QoS
+qos: "inferno"                                        # Slurm QoS
 mem_per_gpu: 250G
 timeout_min: 2880                                   # Timeout in minutes (48 hours)
 # exclude: "protocol, puma"     # Nodes to exclude
diff --git a/egomimic/hydra_configs/model/pi0.5.yaml b/egomimic/hydra_configs/model/pi0.5_bc_aria.yaml
similarity index 90%
rename from egomimic/hydra_configs/model/pi0.5.yaml
rename to egomimic/hydra_configs/model/pi0.5_bc_aria.yaml
index c1af0cc4..0e9a5966 100644
--- a/egomimic/hydra_configs/model/pi0.5.yaml
+++ b/egomimic/hydra_configs/model/pi0.5_bc_aria.yaml
@@ -20,7 +20,7 @@ robomimic_model:
       
   config:      
     pytorch_training_precision: bfloat16
-    pytorch_weight_path: /storage/home/hcoda1/5/rpunamiya6/cedar-dx/rpunamiya6/Projects/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
+    pytorch_weight_path: /storage/project/r-dxu345-0/rco3/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
     model:
       pi05: true
       action_dim: 32
diff --git a/egomimic/hydra_configs/model/pi0.5_bc_eva.yaml b/egomimic/hydra_configs/model/pi0.5_bc_eva.yaml
new file mode 100644
index 00000000..d1886d53
--- /dev/null
+++ b/egomimic/hydra_configs/model/pi0.5_bc_eva.yaml
@@ -0,0 +1,56 @@
+_target_: egomimic.pl_utils.pl_model.ModelWrapper
+robomimic_model:
+  _target_: egomimic.algo.pi.PI
+  data_schematic: _${data.dataset.data_schematic}
+  camera_transforms:
+    _target_: egomimic.utils.egomimicUtils.CameraTransforms
+    intrinsics_key: "base" # change to base_half if using half res 
+    extrinsics_key: "x5Dec13_2"
+  ac_keys:
+    eva_bimanual: "actions_cartesian"
+  domains: ["eva_bimanual"]
+
+  action_converters:
+    rules:
+      EVA_BIMANUAL:
+        _target_: egomimic.utils.action_utils.RobotBimanualCartesianEuler
+    # optional fallback if no match is found
+    fallback:
+      _target_: egomimic.utils.action_utils.BaseActionConverter
+      
+  config:      
+    pytorch_training_precision: bfloat16
+    pytorch_weight_path: /storage/project/r-dxu345-0/rco3/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
+    model:
+      pi05: true
+      action_dim: 32
+      action_horizon: 100
+      max_token_len: 180
+
+  train_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+  eval_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+
+optimizer:
+  _target_: torch.optim.AdamW
+  _partial_: true
+  lr: 5e-4
+  betas: [0.9, 0.999]
+  eps: 1e-8
+  weight_decay: 0.0
+
+scheduler:
+  _target_: transformers.get_cosine_schedule_with_warmup
+  _partial_: true
+  num_warmup_steps: 1000
+  num_training_steps: 100000
+  num_cycles: 0.5
\ No newline at end of file
diff --git a/egomimic/hydra_configs/model/pi0.5_bc_mecka.yaml b/egomimic/hydra_configs/model/pi0.5_bc_mecka.yaml
new file mode 100644
index 00000000..e7d2117c
--- /dev/null
+++ b/egomimic/hydra_configs/model/pi0.5_bc_mecka.yaml
@@ -0,0 +1,56 @@
+_target_: egomimic.pl_utils.pl_model.ModelWrapper
+robomimic_model:
+  _target_: egomimic.algo.pi.PI
+  data_schematic: _${data.dataset.data_schematic}
+  camera_transforms:
+    _target_: egomimic.utils.egomimicUtils.CameraTransforms
+    intrinsics_key: "mecka" # change to base_half if using half res 
+    extrinsics_key: "mecka"
+  ac_keys:
+    mecka_bimanual: "actions_cartesian"
+  domains: ["mecka_bimanual"]
+
+  action_converters:
+    rules:
+      MECKA_BIMANUAL:
+        _target_: egomimic.utils.action_utils.HumanBimanualCartesianEuler
+    # optional fallback if no match is found
+    fallback:
+      _target_: egomimic.utils.action_utils.BaseActionConverter
+      
+  config:      
+    pytorch_training_precision: bfloat16
+    pytorch_weight_path: /storage/project/r-dxu345-0/rco3/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
+    model:
+      pi05: true
+      action_dim: 32
+      action_horizon: 100
+      max_token_len: 180
+
+  train_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+  eval_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+
+optimizer:
+  _target_: torch.optim.AdamW
+  _partial_: true
+  lr: 5e-4
+  betas: [0.9, 0.999]
+  eps: 1e-8
+  weight_decay: 0.0
+
+scheduler:
+  _target_: transformers.get_cosine_schedule_with_warmup
+  _partial_: true
+  num_warmup_steps: 1000
+  num_training_steps: 100000
+  num_cycles: 0.5
\ No newline at end of file
diff --git a/egomimic/hydra_configs/model/pi0.5_bc_scale.yaml b/egomimic/hydra_configs/model/pi0.5_bc_scale.yaml
new file mode 100644
index 00000000..521abbaf
--- /dev/null
+++ b/egomimic/hydra_configs/model/pi0.5_bc_scale.yaml
@@ -0,0 +1,56 @@
+_target_: egomimic.pl_utils.pl_model.ModelWrapper
+robomimic_model:
+  _target_: egomimic.algo.pi.PI
+  data_schematic: _${data.dataset.data_schematic}
+  camera_transforms:
+    _target_: egomimic.utils.egomimicUtils.CameraTransforms
+    intrinsics_key: "scale" # change to base_half if using half res 
+    extrinsics_key: "scale"
+  ac_keys:
+    scale_bimanual: "actions_cartesian"
+  domains: ["scale_bimanual"]
+
+  action_converters:
+    rules:
+      SCALE_BIMANUAL:
+        _target_: egomimic.utils.action_utils.HumanBimanualCartesianEuler
+    # optional fallback if no match is found
+    fallback:
+      _target_: egomimic.utils.action_utils.BaseActionConverter
+      
+  config:      
+    pytorch_training_precision: bfloat16
+    pytorch_weight_path: /storage/project/r-dxu345-0/rco3/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
+    model:
+      pi05: true
+      action_dim: 32
+      action_horizon: 100
+      max_token_len: 180
+
+  train_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+  eval_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+
+optimizer:
+  _target_: torch.optim.AdamW
+  _partial_: true
+  lr: 5e-4
+  betas: [0.9, 0.999]
+  eps: 1e-8
+  weight_decay: 0.0
+
+scheduler:
+  _target_: transformers.get_cosine_schedule_with_warmup
+  _partial_: true
+  num_warmup_steps: 1000
+  num_training_steps: 100000
+  num_cycles: 0.5
\ No newline at end of file
diff --git a/egomimic/hydra_configs/model/pi0.5_cotrain_eva_aria.yaml b/egomimic/hydra_configs/model/pi0.5_cotrain_eva_aria.yaml
new file mode 100644
index 00000000..c0521f56
--- /dev/null
+++ b/egomimic/hydra_configs/model/pi0.5_cotrain_eva_aria.yaml
@@ -0,0 +1,64 @@
+_target_: egomimic.pl_utils.pl_model.ModelWrapper
+robomimic_model:
+  _target_: egomimic.algo.pi.PI
+  data_schematic: _${data.dataset.data_schematic}
+  camera_transforms:
+    eva_bimanual:
+      _target_: egomimic.utils.egomimicUtils.CameraTransforms
+      intrinsics_key: "base" # change to base_half if using half res 
+      extrinsics_key: "x5Dec13_2"
+    aria_bimanual:
+      _target_: egomimic.utils.egomimicUtils.CameraTransforms
+      intrinsics_key: "base" # change to base_half if using half res 
+      extrinsics_key: "ariaJun7"
+  ac_keys:
+    eva_bimanual: "actions_cartesian"
+    aria_bimanual: "actions_cartesian"
+  domains: ["eva_bimanual", "aria_bimanual"]
+
+  action_converters:
+    rules:
+      EVA_BIMANUAL:
+        _target_: egomimic.utils.action_utils.RobotBimanualCartesianEuler
+      ARIA_BIMANUAL:
+        _target_: egomimic.utils.action_utils.HumanBimanualCartesianEuler
+    # optional fallback if no match is found
+    fallback:
+      _target_: egomimic.utils.action_utils.BaseActionConverter
+      
+  config:      
+    pytorch_training_precision: bfloat16
+    pytorch_weight_path: /storage/project/r-dxu345-0/rco3/EgoVerse/egomimic/algo/pi_checkpoints/pi05_base_pytorch
+    model:
+      pi05: true
+      action_dim: 32
+      action_horizon: 100
+      max_token_len: 180
+
+  train_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+  eval_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Resize
+        size: 224
+        interpolation: 3
+
+optimizer:
+  _target_: torch.optim.AdamW
+  _partial_: true
+  lr: 5e-4
+  betas: [0.9, 0.999]
+  eps: 1e-8
+  weight_decay: 0.0
+
+scheduler:
+  _target_: transformers.get_cosine_schedule_with_warmup
+  _partial_: true
+  num_warmup_steps: 1000
+  num_training_steps: 100000
+  num_cycles: 0.5
\ No newline at end of file
diff --git a/egomimic/hydra_configs/train_zarr_pi.yaml b/egomimic/hydra_configs/train_zarr_pi.yaml
new file mode 100644
index 00000000..c7d0e0b1
--- /dev/null
+++ b/egomimic/hydra_configs/train_zarr_pi.yaml
@@ -0,0 +1,114 @@
+defaults:
+  - model: pi0.5_bc_eva
+  - paths: default
+  - trainer: debug
+  - debug: null
+  - logger: debug
+  - data: eva
+  - callbacks: checkpoints
+  - override hydra/launcher: submitit
+  - _self_
+
+name: test
+description: test
+ckpt_path: null
+train: true
+eval: false
+
+eval_class:
+  _target_: egomimic.scripts.evaluation.Eve
+  mode: real
+  arm: both
+  eval_path: "./logs/eval/${name}_${now:%Y-%m-%d_%H-%M-%S}"
+
+hydra:
+  run:
+    # Dir should be experiment_name/description_{timestamp}
+    dir: ./logs/${name}/${description}_${now:%Y-%m-%d_%H-%M-%S}
+  sweep:
+    dir: ./logs/${name}/${description}_${now:%Y-%m-%d_%H-%M-%S}
+
+launch_params:
+  gpus_per_node: 1
+  nodes: 1
+
+
+data_schematic: # Dynamically fill in these shapes from the dataset
+  _target_: egomimic.rldb.zarr.utils.DataSchematic
+  norm_mode: quantile
+  schematic_dict:
+    eva_bimanual:
+      base_0_rgb: #batch key
+        key_type: camera_keys # key type
+        zarr_key: observations.images.front_img_1 # dataset key
+      right_wrist_0_rgb:
+        key_type: camera_keys
+        zarr_key: observations.images.right_wrist_img
+      left_wrist_0_rgb:
+        key_type: camera_keys
+        zarr_key: observations.images.left_wrist_img
+      ee_pose:
+        key_type: proprio_keys
+        zarr_key: observations.state.ee_pose
+      joint_positions:
+        key_type: proprio_keys
+        zarr_key: observations.state.joint_positions
+      actions_joints:
+        key_type: action_keys
+        zarr_key: actions_joints
+      actions_cartesian:
+        key_type: action_keys
+        zarr_key: actions_cartesian
+      embodiment:
+        key_type: metadata_keys
+        zarr_key: metadata.embodiment
+    aria_bimanual:
+      base_0_rgb:
+        key_type: camera_keys
+        zarr_key: observations.images.front_img_1
+      ee_pose:
+        key_type: proprio_keys
+        zarr_key: observations.state.ee_pose
+      actions_cartesian:
+        key_type: action_keys
+        zarr_key: actions_cartesian
+      embodiment:
+        key_type: metadata_keys
+        zarr_key: metadata.embodiment
+    mecka_bimanual:
+      base_0_rgb:
+        key_type: camera_keys
+        zarr_key: observations.images.front_img_1
+      ee_pose:
+        key_type: proprio_keys
+        zarr_key: observations.state.ee_pose
+      actions_cartesian:
+        key_type: action_keys
+        zarr_key: actions_cartesian
+      embodiment:
+        key_type: metadata_keys
+        zarr_key: metadata.embodiment
+    scale_bimanual:
+      base_0_rgb:
+        key_type: camera_keys
+        zarr_key: observations.images.front_img_1
+      ee_pose:
+        key_type: proprio_keys
+        zarr_key: observations.state.ee_pose
+      actions_cartesian:
+        key_type: action_keys
+        zarr_key: actions_cartesian
+      embodiment:
+        key_type: metadata_keys
+        zarr_key: metadata.embodiment
+  viz_img_key:
+    eva_bimanual:
+      base_0_rgb
+    aria_bimanual:
+      base_0_rgb
+    mecka_bimanual:
+      base_0_rgb
+    scale_bimanual:
+      base_0_rgb
+
+seed: 42
\ No newline at end of file
diff --git a/egomimic/utils/aws/aws_data_utils.py b/egomimic/utils/aws/aws_data_utils.py
index 2cc7f631..ba0602a2 100644
--- a/egomimic/utils/aws/aws_data_utils.py
+++ b/egomimic/utils/aws/aws_data_utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import warnings
 from pathlib import Path
 
 import boto3
@@ -13,12 +14,20 @@ def _uses_r2_endpoint(endpoint_url: str | None) -> bool:
     return bool(endpoint_url and "r2.cloudflarestorage.com" in endpoint_url)
 
 
-def load_env(path="~/.egoverse_env"):
+def load_env(path="~/.egoverse_env", required: bool = False):
     p = Path(path).expanduser()
     if not p.exists():
-        raise ValueError(
-            f"Env file {p} does not exist, run ./egomimic/utils/aws/setup_secret.sh"
+        if required:
+            raise ValueError(
+                f"Env file {p} does not exist, run ./egomimic/utils/aws/setup_secret.sh"
+            )
+        warnings.warn(
+            f"Env file {p} does not exist; AWS/R2 env vars not set. "
+            "Run ./egomimic/utils/aws/setup_secret.sh if you need S3/R2.",
+            UserWarning,
+            stacklevel=2,
         )
+        return
     for line in p.read_text().splitlines():
         line = line.strip()
         if not line or line.startswith("#") or "=" not in line: