diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py index d6d174c..d4583bc 100644 --- a/strands_robots_sim/envs/env_libero.py +++ b/strands_robots_sim/envs/env_libero.py @@ -198,7 +198,8 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]: init_states = self.task_suite_instance.get_task_init_states(task_id) if init_states: self.env.set_init_state(init_states[0]) - obs = self.env.reset() # Reset again with initial state + # NOTE: do NOT call reset() again after set_init_state — it re-terminates the + # episode and causes "executing action in terminated episode" errors downstream. except Exception as init_error: # Handle PyTorch loading issues with initial states logger.warning(f"⚠️ Could not load initial states: {init_error}") @@ -209,6 +210,9 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]: self.current_task_name = task.language print(f"🔄 Libero environment reset to task: {task.name}") + # NOTE: obs is captured before set_init_state() is applied above. + # Callers should not rely on this obs for policy execution; instead, + # re-fetch the observation after the physics warm-up steps. return self._process_observation(obs) except Exception as e: diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py index dfbb9a7..02cdfe6 100644 --- a/strands_robots_sim/sim_env.py +++ b/strands_robots_sim/sim_env.py @@ -82,6 +82,8 @@ def _suppress_egl_excepthook(exc_type, exc_value, exc_traceback): logger = logging.getLogger(__name__) +NUM_PHYSICS_WARMUP_STEPS = 10 + # Monkey-patch sys.stderr to suppress EGL errors during cleanup class EGLErrorFilter: @@ -367,6 +369,17 @@ async def _execute_task_async( # Reset environment for new episode observation = await self.sim_env.reset(task_name) + + # Wait for physics to settle before running policy. + # Gripper -1 (closed) matches LIBERO task initial states and mirrors + # Isaac-GR00T/examples/Libero/eval/run_libero_eval.py warm-up convention. + # Note: action[6] is a delta command, not gripper_qpos — do not substitute + # observation state here as the units differ. + for _ in range(NUM_PHYSICS_WARMUP_STEPS): + observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]}) + if done: + break + episode_reward = 0.0 episode_steps = 0 episode_done = False # Track episode termination