From d4abd2bc3f44ee8f093a8341775c6ca8a4b6864f Mon Sep 17 00:00:00 2001
From: Byong-Wu Chong <byochong@amazon.com>
Date: Sun, 5 Apr 2026 19:44:39 +0900
Subject: [PATCH 1/3] =?UTF-8?q?fix:=20libero=20eval=2090%=20success=20rate?=
 =?UTF-8?q?=20=EB=8B=AC=EC=84=B1=ED=95=9C=20=EC=88=98=EC=A0=95=EC=82=AC?=
 =?UTF-8?q?=ED=95=AD=20=EB=B0=98=EC=98=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- env_libero.py: set_init_state 후 reset() 재호출 제거
  → "executing action in terminated episode" 버그 방지
- sim_env.py: reset 후 num_steps_wait=10 dummy action 추가
  → 물리 시뮬레이션 안정화 대기 (핵심 수정, 0%→100% success)

Ref: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py
Validated: libero_spatial 10 tasks × 3 episodes = 90% (27/30) on g6.12xlarge
---
 strands_robots_sim/envs/env_libero.py | 3 ++-
 strands_robots_sim/sim_env.py         | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py
index d6d174c..067e54c 100644
--- a/strands_robots_sim/envs/env_libero.py
+++ b/strands_robots_sim/envs/env_libero.py
@@ -198,7 +198,8 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]:
                 init_states = self.task_suite_instance.get_task_init_states(task_id)
                 if init_states:
                     self.env.set_init_state(init_states[0])
-                    obs = self.env.reset()  # Reset again with initial state
+                    # NOTE: do NOT call reset() again after set_init_state — it re-terminates the
+                    # episode and causes "executing action in terminated episode" errors downstream.
             except Exception as init_error:
                 # Handle PyTorch loading issues with initial states
                 logger.warning(f"⚠️ Could not load initial states: {init_error}")
diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py
index dfbb9a7..f7c93cd 100644
--- a/strands_robots_sim/sim_env.py
+++ b/strands_robots_sim/sim_env.py
@@ -367,6 +367,12 @@ async def _execute_task_async(
 
                 # Reset environment for new episode
                 observation = await self.sim_env.reset(task_name)
+
+                # Wait for physics to settle before running policy
+                # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10
+                for _ in range(10):
+                    observation, _, _, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]})
+
                 episode_reward = 0.0
                 episode_steps = 0
                 episode_done = False  # Track episode termination

From 9ee80c4af017ebbbf836b7c2e7195262186ea4f0 Mon Sep 17 00:00:00 2001
From: Byong-Wu Chong <byochong@amazon.com>
Date: Thu, 30 Apr 2026 21:43:20 +0900
Subject: [PATCH 2/3] fix: address review feedback on libero eval warm-up loop

- Add NUM_PHYSICS_WARMUP_STEPS=10 module-level constant (no magic number)
- Guard warm-up loop with done check to prevent stepping in terminated episode
- Add comment in env_libero.reset() clarifying obs is pre-set_init_state

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
---
 strands_robots_sim/envs/env_libero.py | 3 +++
 strands_robots_sim/sim_env.py         | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/strands_robots_sim/envs/env_libero.py b/strands_robots_sim/envs/env_libero.py
index 067e54c..d4583bc 100644
--- a/strands_robots_sim/envs/env_libero.py
+++ b/strands_robots_sim/envs/env_libero.py
@@ -210,6 +210,9 @@ async def reset(self, task_name: Optional[str] = None) -> Dict[str, Any]:
             self.current_task_name = task.language
 
             print(f"🔄 Libero environment reset to task: {task.name}")
+            # NOTE: obs is captured before set_init_state() is applied above.
+            # Callers should not rely on this obs for policy execution; instead,
+            # re-fetch the observation after the physics warm-up steps.
             return self._process_observation(obs)
 
         except Exception as e:
diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py
index f7c93cd..94d03f7 100644
--- a/strands_robots_sim/sim_env.py
+++ b/strands_robots_sim/sim_env.py
@@ -82,6 +82,8 @@ def _suppress_egl_excepthook(exc_type, exc_value, exc_traceback):
 
 logger = logging.getLogger(__name__)
 
+NUM_PHYSICS_WARMUP_STEPS = 10
+
 
 # Monkey-patch sys.stderr to suppress EGL errors during cleanup
 class EGLErrorFilter:
@@ -370,8 +372,10 @@ async def _execute_task_async(
 
                 # Wait for physics to settle before running policy
                 # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10
-                for _ in range(10):
-                    observation, _, _, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]})
+                for _ in range(NUM_PHYSICS_WARMUP_STEPS):
+                    observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]})
+                    if done:
+                        break
 
                 episode_reward = 0.0
                 episode_steps = 0

From ba5f4968ce3d5d481ed9fcbc412238c97931cd93 Mon Sep 17 00:00:00 2001
From: Byong-Wu Chong <byochong@amazon.com>
Date: Thu, 30 Apr 2026 22:57:51 +0900
Subject: [PATCH 3/3] docs: explain gripper -1 warm-up action and delta-vs-qpos
 distinction

Gripper closed (-1) matches LIBERO task initial states and the Isaac-GR00T
eval convention. Add note clarifying action[6] is a delta command, not
gripper_qpos, so observation state cannot be substituted directly.

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
---
 strands_robots_sim/sim_env.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/strands_robots_sim/sim_env.py b/strands_robots_sim/sim_env.py
index 94d03f7..02cdfe6 100644
--- a/strands_robots_sim/sim_env.py
+++ b/strands_robots_sim/sim_env.py
@@ -370,8 +370,11 @@ async def _execute_task_async(
                 # Reset environment for new episode
                 observation = await self.sim_env.reset(task_name)
 
-                # Wait for physics to settle before running policy
-                # Reference: Isaac-GR00T/examples/Libero/eval/run_libero_eval.py num_steps_wait=10
+                # Wait for physics to settle before running policy.
+                # Gripper -1 (closed) matches LIBERO task initial states and mirrors
+                # Isaac-GR00T/examples/Libero/eval/run_libero_eval.py warm-up convention.
+                # Note: action[6] is a delta command, not gripper_qpos — do not substitute
+                # observation state here as the units differ.
                 for _ in range(NUM_PHYSICS_WARMUP_STEPS):
                     observation, _, done, _ = await self.sim_env.step({"action": [0, 0, 0, 0, 0, 0, -1]})
                     if done: