From d4abd2bc3f44ee8f093a8341775c6ca8a4b6864f Mon Sep 17 00:00:00 2001 From: Byong-Wu Chong Date: Sun, 5 Apr 2026 19:44:39 +0900 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20libero=20eval=2090%=20success=20rate?= =?UTF-8?q?=20=EB=8B=AC=EC=84=B1=ED=95=9C=20=EC=88=98=EC=A0=95=EC=82=AC?= =?UTF-8?q?=ED=95=AD=20=EB=B0=98=EC=98=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - env_libero.py: set_init_state 후 reset() 재호출 제거 → "executing action in terminated episode" 버그 방지 - sim_env.py: reset 후 num_steps_wait=10 dummy action 추가 → 물리 시뮬레이션 안정화 대기 (핵심 수정, 0%→100% success) Ref: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py Validated: libero_spatial 10 tasks × 3 episodes = 90% (27/30) on g6.12xlarge --- strands_robots_sim/envs/env_libero.py | 3 ++- strands_robots_sim/sim_env.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py index d6d174c..067e54c 100644 --- a/strands_robots_sim/envs/env_libero.py +++ b/strands_robots_sim/envs/env_libero.py @@ -198,7 +198,8 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]: init_states = self.task_suite_instance.get_task_init_states(task_id) if init_states: self.env.set_init_state(init_states[0]) - obs = self.env.reset() # Reset again with initial state + # NOTE: do NOT call reset() again after set_init_state — it re-terminates the + # episode and causes "executing action in terminated episode" errors downstream. except Exception as init_error: # Handle PyTorch loading issues with initial states logger.warning(f"⚠️ Could not load initial states: {init_error}") diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py index dfbb9a7..f7c93cd 100644 --- a/strands_robots_sim/sim_env.py +++ b/strands_robots_sim/sim_env.py @@ -367,6 +367,12 @@ async def _execute_task_async( # Reset environment for new episode observation = await self.sim_env.reset(task_name) + + # Wait for physics to settle before running policy + # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10 + for _ in range(10): + observation, _, _, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]}) + episode_reward = 0.0 episode_steps = 0 episode_done = False # Track episode termination From 9ee80c4af017ebbbf836b7c2e7195262186ea4f0 Mon Sep 17 00:00:00 2001 From: Byong-Wu Chong Date: Thu, 30 Apr 2026 21:43:20 +0900 Subject: [PATCH 2/3] fix: address review feedback on libero eval warm-up loop - Add NUM_PHYSICS_WARMUP_STEPS=10 module-level constant (no magic number) - Guard warm-up loop with done check to prevent stepping in terminated episode - Add comment in env_libero.reset() clarifying obs is pre-set_init_state Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- strands_robots_sim/envs/env_libero.py | 3 +++ strands_robots_sim/sim_env.py | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py index 067e54c..d4583bc 100644 --- a/strands_robots_sim/envs/env_libero.py +++ b/strands_robots_sim/envs/env_libero.py @@ -210,6 +210,9 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]: self.current_task_name = task.language print(f"🔄 Libero environment reset to task: {task.name}") + # NOTE: obs is captured before set_init_state() is applied above. + # Callers should not rely on this obs for policy execution; instead, + # re-fetch the observation after the physics warm-up steps. return self._process_observation(obs) except Exception as e: diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py index f7c93cd..94d03f7 100644 --- a/strands_robots_sim/sim_env.py +++ b/strands_robots_sim/sim_env.py @@ -82,6 +82,8 @@ def _suppress_egl_excepthook(exc_type, exc_value, exc_traceback): logger = logging.getLogger(__name__) +NUM_PHYSICS_WARMUP_STEPS = 10 + # Monkey-patch sys.stderr to suppress EGL errors during cleanup class EGLErrorFilter: @@ -370,8 +372,10 @@ async def _execute_task_async( # Wait for physics to settle before running policy # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10 - for _ in range(10): - observation, _, _, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]}) + for _ in range(NUM_PHYSICS_WARMUP_STEPS): + observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]}) + if done: + break episode_reward = 0.0 episode_steps = 0 From ba5f4968ce3d5d481ed9fcbc412238c97931cd93 Mon Sep 17 00:00:00 2001 From: Byong-Wu Chong Date: Thu, 30 Apr 2026 22:57:51 +0900 Subject: [PATCH 3/3] docs: explain gripper -1 warm-up action and delta-vs-qpos distinction Gripper closed (-1) matches LIBERO task initial states and the Isaac-GR00T eval convention. Add note clarifying action[6] is a delta command, not gripper_qpos, so observation state cannot be substituted directly. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- strands_robots_sim/sim_env.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py index 94d03f7..02cdfe6 100644 --- a/strands_robots_sim/sim_env.py +++ b/strands_robots_sim/sim_env.py @@ -370,8 +370,11 @@ async def _execute_task_async( # Reset environment for new episode observation = await self.sim_env.reset(task_name) - # Wait for physics to settle before running policy - # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10 + # Wait for physics to settle before running policy. + # Gripper -1 (closed) matches LIBERO task initial states and mirrors + # Isaac-GR00T/examples/Libero/eval/run_libero_eval.py warm-up convention. + # Note: action[6] is a delta command, not gripper_qpos — do not substitute + # observation state here as the units differ. for _ in range(NUM_PHYSICS_WARMUP_STEPS): observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]}) if done: