strands-labs · yinsong1986 · May 2, 2026 · Apr 5, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py
@@ -198,7 +198,8 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]:
                 init_states = self.task_suite_instance.get_task_init_states(task_id)
                 if init_states:
                     self.env.set_init_state(init_states[0])
-                    obs = self.env.reset()  # Reset again with initial state
+                    # NOTE: do NOT call reset() again after set_init_state — it re-terminates the
+                    # episode and causes "executing action in terminated episode" errors downstream.
             except Exception as init_error:
                 # Handle PyTorch loading issues with initial states
                 logger.warning(f"⚠️ Could not load initial states: {init_error}")
@@ -209,6 +210,9 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]:
             self.current_task_name = task.language
 
             print(f"🔄 Libero environment reset to task: {task.name}")
+            # NOTE: obs is captured before set_init_state() is applied above.
+            # Callers should not rely on this obs for policy execution; instead,
+            # re-fetch the observation after the physics warm-up steps.
             return self._process_observation(obs)
 
         except Exception as e:

diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py
@@ -82,6 +82,8 @@ def _suppress_egl_excepthook(exc_type, exc_value, exc_traceback):
 
 logger = logging.getLogger(__name__)
 
+NUM_PHYSICS_WARMUP_STEPS = 10
+
 
 # Monkey-patch sys.stderr to suppress EGL errors during cleanup
 class EGLErrorFilter:
@@ -367,6 +369,17 @@ async def _execute_task_async(
 
                 # Reset environment for new episode
                 observation = await self.sim_env.reset(task_name)
+
+                # Wait for physics to settle before running policy.
+                # Gripper -1 (closed) matches LIBERO task initial states and mirrors
+                # Isaac-GR00T/examples/Libero/eval/run_libero_eval.py warm-up convention.
+                # Note: action[6] is a delta command, not gripper_qpos — do not substitute
+                # observation state here as the units differ.
+                for _ in range(NUM_PHYSICS_WARMUP_STEPS):
+                    observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]})
+                    if done:
+                        break
+
                 episode_reward = 0.0
                 episode_steps = 0
                 episode_done = False  # Track episode termination