diff --git a/.gitignore b/.gitignore index a5173684..af617ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,6 @@ frontend/ reddit_python_posts.json -test/ \ No newline at end of file +test/ +venvrun.sh +run.sh diff --git a/droidrun/agent/codeact/codeact_agent.py b/droidrun/agent/codeact/codeact_agent.py index 9a00a9a0..59a71714 100644 --- a/droidrun/agent/codeact/codeact_agent.py +++ b/droidrun/agent/codeact/codeact_agent.py @@ -343,6 +343,9 @@ async def handle_llm_input( activity_name=phone_state.get("currentApp", "Unknown"), ) + # Store ui_state so it's available during code execution + await ctx.store.set("ui_state", a11y_tree) + # Stream formatted state for trajectory ctx.write_event_to_stream(RecordUIStateEvent(ui_state=a11y_tree)) diff --git a/droidrun/config/prompts/executor/system.jinja2 b/droidrun/config/prompts/executor/system.jinja2 index d8d41d2e..ff357e9d 100644 --- a/droidrun/config/prompts/executor/system.jinja2 +++ b/droidrun/config/prompts/executor/system.jinja2 @@ -93,8 +93,10 @@ No actions have been taken yet. Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action. IMPORTANT: -1. Do NOT repeat previously failed actions multiple times. Try changing to another action. +1. Do NOT repeat previously failed actions multiple times. If an action failed, try a DIFFERENT action or approach. 2. Must do the current subgoal. +3. If you have tried the same action 2+ times and it keeps failing, try a completely different approach. If truly stuck with no viable action, use `{"action": "wait", "duration": 1.0}` as a fallback and explain why in the Description (e.g., "No actionable element found for subgoal"). +4. ALWAYS output a valid action. There is no "skip" or "do nothing" option — use `wait` with duration 1.0 if uncertain. Provide your output in the following format, which contains three parts: