From 268956c18368ec3f42684bec6c7991f906bea622 Mon Sep 17 00:00:00 2001 From: Samir mlika <105347215+mlikasam-askui@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:03:33 +0200 Subject: [PATCH 1/3] Add Android Agent Template --- src/basic-android-agent/.vscode/settings.json | 41 ++++ src/basic-android-agent/README.md | 196 ++++++++++++++++++ src/basic-android-agent/agent.yml | 5 + src/basic-android-agent/main.py | 107 ++++++++++ src/basic-android-agent/requirements.txt | 1 + 5 files changed, 350 insertions(+) create mode 100644 src/basic-android-agent/.vscode/settings.json create mode 100644 src/basic-android-agent/README.md create mode 100644 src/basic-android-agent/agent.yml create mode 100644 src/basic-android-agent/main.py create mode 100644 src/basic-android-agent/requirements.txt diff --git a/src/basic-android-agent/.vscode/settings.json b/src/basic-android-agent/.vscode/settings.json new file mode 100644 index 0000000..c845225 --- /dev/null +++ b/src/basic-android-agent/.vscode/settings.json @@ -0,0 +1,41 @@ +{ + "terminal.integrated.profiles.windows": { + "askui-shell": { + "path": [ + "${env:ASKUI_INSTALLATION_DIRECTORY}\\Tools\\askui-shell.cmd" + ], + "icon": "robot", + "overrideName": true, + "color": "terminal.ansiMagenta", + } + }, + "terminal.integrated.profiles.osx": { + "askui-shell": { + "path": "/bin/zsh", + "args": [ + "-l", + "-c", + "$ASKUI_INSTALLATION_DIRECTORY/Tools/askui-shell" + ], + "icon": "robot", + "overrideName": true, + "color": "terminal.ansiMagenta" + } + }, + "terminal.integrated.profiles.linux": { + "askui-shell": { + "path": "/bin/bash", + "args": [ + "-l", + "-c", + "$ASKUI_INSTALLATION_DIRECTORY/Tools/askui-shell" + ], + "icon": "robot", + "overrideName": true, + "color": "terminal.ansiMagenta" + } + }, + "terminal.integrated.defaultProfile.windows": "askui-shell", + "terminal.integrated.defaultProfile.linux": "askui-shell", + "terminal.integrated.defaultProfile.osx": "askui-shell" +} diff --git a/src/basic-android-agent/README.md b/src/basic-android-agent/README.md new file mode 100644 index 0000000..e6f31f7 --- /dev/null +++ b/src/basic-android-agent/README.md @@ -0,0 +1,196 @@ +# AskUI Android Agent Demo + +A comprehensive demonstration of the AskUI Android Vision Agent library, showcasing how to automate Android devices using natural language commands and programmatic controls. + +## 🚀 What is AskUI? + +AskUI is a powerful Android automation library that allows you to interact with Android devices using: +- **Natural language commands** - Tell the agent what you want to do in plain English +- **Vision-based interaction** - The agent "sees" what's on screen and can interact accordingly +- **Programmatic controls** - Direct API calls for precise automation +- **Shell command execution** - Run system commands on the device + +## ✨ Key Features Demonstrated + +### 1. Screen Analysis +- **Natural Language Queries**: Ask what's visible on screen +- **Boolean Assertions**: Check if specific elements exist +- **Contextual Understanding**: The agent understands screen content + +### 2. Touch Interactions +- **Tap**: Click on specific elements or text +- **Drag & Drop**: Move elements around the screen +- **Swipe**: Navigate through content +- **Key Combinations**: Execute complex key sequences + +### 3. Text Input +- **Direct Typing**: Type text directly into input fields +- **Natural Language**: Describe what you want to type + +### 4. Device Navigation +- **Home Button**: Return to home screen +- **Back Button**: Navigate back +- **Key Combinations**: Execute multiple key presses + +### 5. Shell Commands +- **System Commands**: Execute shell commands on the device +- **File Operations**: List files, check system status + +### 6. Agentic Behavior +- **Natural Language Instructions**: Give high-level commands +- **Autonomous Execution**: Agent figures out how to accomplish tasks +- **Context Awareness**: Understands current screen state + +## ��️ Installation + +### Prerequisites +- AskUI Shell installed +- ADB (Android Debug Bridge) installed and configured + +### Setup +1. **Clone or download this demo** + ```bash + git clone + cd basic-android-agent + ``` + +2. **Install dependencies** + ```bash + pip install -r requirements.txt + ``` + +3. **Connect your Android device** + - Enable Developer Options and USB Debugging + - Connect via USB or start an emulator + - Verify connection: `adb devices` + +## 🎯 Usage + +### Basic Usage +```bash +python main.py +``` + +### Customization +Edit `main.py` to: +- Change device selection (uncomment and modify the serial number line) +- Add your own automation scenarios +- Modify timing and delays +- Add error handling for your specific use cases + +## 📱 Demo Scenarios + +The demo includes several scenarios that showcase different capabilities: + +1. **Screen Analysis**: Ask what's visible on the current screen +2. **App Interaction**: Find and tap on the Gmail app +3. **Navigation**: Use device buttons and gestures +4. **Text Input**: Type text into input fields +5. **Shell Commands**: Execute system commands +6. **Agentic Behavior**: Give natural language instructions + +## �� Configuration + +### Device Selection +If you have multiple devices connected, uncomment and modify this line in `main.py`: +```python +agent.set_device_by_serial_number("your-device-serial") +``` + +### Timing Adjustments +Modify the `time.sleep()` calls to adjust delays based on your device's performance. + +## 🎨 Customization Examples + +### Adding Your Own Scenarios +```python +# Example: Open a specific app +agent.act("Open the Settings app") + +# Example: Fill a form +agent.type("your-email@example.com") +agent.tap(loc.Text("Submit")) + +# Example: Navigate through menus +agent.tap(loc.Text("Menu")) +agent.tap(loc.Text("Settings")) +``` + +### Error Handling +```python +try: + agent.tap(loc.Text("Some Button")) +except Exception as e: + print(f"Button not found: {e}") + # Fallback action +``` + +## 🐛 Troubleshooting + +### Common Issues + +1. **Device Not Found** + - Ensure ADB is properly installed + - Check device connection: `adb devices` + - Enable USB debugging on device + +2. **Permission Errors** + - Grant necessary permissions on the Android device + - Check if the device is authorized for debugging + +3. **Element Not Found** + - Verify the element text/description is correct + - Check if the element is visible on screen + - Try using different locator strategies + +4. **Timing Issues** + - Increase delays for slower devices + - Add wait conditions for dynamic content + +### Debug Mode +Enable verbose logging by modifying the agent initialization: +```python +with AndroidVisionAgent( + log_level=logging.DEBUG +) as agent: +``` + +## �� API Reference + +### Core Methods + +- `agent.get(question, response_schema=bool)` - Ask questions about screen content +- `agent.tap(locator)` - Tap on elements +- `agent.type(text)` - Type text +- `agent.key_tap(key)` - Press device keys +- `agent.swipe(x1, y1, x2, y2)` - Swipe gesture +- `agent.drag_and_drop(x1, y1, x2, y2)` - Drag and drop +- `agent.shell(command)` - Execute shell commands +- `agent.act(instruction)` - Natural language instructions + +### Locators +- `loc.Text("text")` - Find by text content +- `loc.Id("id")` - Find by element ID +- `loc.Class("class")` - Find by CSS class + +## �� Contributing + +Feel free to: +- Add new demo scenarios +- Improve error handling +- Add more documentation +- Report issues and suggest features + +## �� License + +This demo is provided as-is for educational and demonstration purposes. + +## 🔗 Resources + +- [AskUI Documentation](https://docs.askui.com) +- [Android Developer Guide](https://developer.android.com) +- [ADB Documentation](https://developer.android.com/studio/command-line/adb) + +--- + +**Happy Automating! 🚀** \ No newline at end of file diff --git a/src/basic-android-agent/agent.yml b/src/basic-android-agent/agent.yml new file mode 100644 index 0000000..cb5023a --- /dev/null +++ b/src/basic-android-agent/agent.yml @@ -0,0 +1,5 @@ +template: + name: "Basic Android Agent" + description: "This agent contains a basic Python Android Agent." + +entrypoint: python main.py diff --git a/src/basic-android-agent/main.py b/src/basic-android-agent/main.py new file mode 100644 index 0000000..fb2cd3c --- /dev/null +++ b/src/basic-android-agent/main.py @@ -0,0 +1,107 @@ +""" +AskUI Android Agent Demo - Interactive Android Device Automation + +This script demonstrates the core capabilities of the AskUI Android Vision Agent, +showing how to interact with Android devices using natural language commands +and programmatic controls. +""" + +from askui import AndroidVisionAgent +from askui import locators as loc +import time + +def main(): + """ + Main demonstration function showcasing AskUI Android agent capabilities. + """ + print("🤖 Starting AskUI Android Agent Demo...") + print("=" * 50) + + # Initialize your agent context manager + with AndroidVisionAgent() as agent: + print("✅ Agent initialized successfully!") + + # Select device by serial number in case of multiple devices + # Uncomment and modify if you have multiple devices connected + # agent.set_device_by_serial_number("emulator-5554") + + # Demo 1: Natural Language Screen Analysis + print("\n📱 Demo 1: Screen Analysis") + print("-" * 30) + screen_description = agent.get('What can you see on the screen?') + print(f"Screen contains: {screen_description}") + + # Demo 2: Assertion-based Testing + print("\n✅ Demo 2: Assertion Testing") + print("-" * 30) + try: + has_gmail = agent.get('Does the screen contain the text "Gmail"?', response_schema=bool) + print(f"Gmail text found: {has_gmail}") + + if has_gmail: + print("🎯 Tapping on Gmail app...") + agent.tap(loc.Text("Gmail")) + time.sleep(2) # Wait for app to open + except Exception as e: + print(f"⚠️ Gmail not found or error: {e}") + + # Demo 3: Device Navigation + print("\n🏠 Demo 3: Device Navigation") + print("-" * 30) + + # Go to home screen + print("Pressing HOME button...") + agent.key_tap('HOME') + time.sleep(1) + + # Demonstrate key combinations + print("Pressing HOME + BACK combination...") + agent.key_combination(['HOME', 'BACK'], duration_in_ms=1000) + time.sleep(1) + + # Demo 4: Touch Gestures + print("\n Demo 4: Touch Gestures") + print("-" * 30) + + # Drag and drop demonstration + print("Performing drag and drop gesture...") + agent.drag_and_drop(x1=100, y1=100, x2=200, y2=200, duration_in_ms=1000) + time.sleep(1) + + # Swipe demonstration + print("Performing swipe gesture...") + agent.swipe(x1=100, y1=100, x2=200, y2=200, duration_in_ms=1000) + time.sleep(1) + + # Demo 5: Text Input + print("\n⌨️ Demo 5: Text Input") + print("-" * 30) + print("Typing 'Hello AskUI World'...") + agent.type('Hello AskUI World') + time.sleep(1) + + # Demo 6: Shell Commands + print("\n Demo 6: Shell Commands") + print("-" * 30) + try: + print("Executing shell command: ls -l") + shell_response = agent.shell('ls -l') + print(f"Shell output: {shell_response}") + except Exception as e: + print(f"⚠️ Shell command failed: {e}") + + # Demo 7: Agentic Behavior (Natural Language Commands) + print("\n🧠 Demo 7: Agentic Behavior") + print("-" * 30) + print("Instructing agent to search for AskUI company...") + try: + agent.act("Search for the company AskUI in the browser, and open the first result.") + print("✅ Agent successfully executed the search command!") + except Exception as e: + print(f"⚠️ Agentic command failed: {e}") + + print("\n🎉 Demo completed successfully!") + print("=" * 50) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/basic-android-agent/requirements.txt b/src/basic-android-agent/requirements.txt new file mode 100644 index 0000000..628d95d --- /dev/null +++ b/src/basic-android-agent/requirements.txt @@ -0,0 +1 @@ +askui[all]===0.9.3 From 9f80ed979b2825bd9c002b768b90b24b60b70be5 Mon Sep 17 00:00:00 2001 From: Samir mlika <105347215+mlikasam-askui@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:09:41 +0200 Subject: [PATCH 2/3] update Read me --- src/basic-android-agent/README.md | 44 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/basic-android-agent/README.md b/src/basic-android-agent/README.md index e6f31f7..1c004b2 100644 --- a/src/basic-android-agent/README.md +++ b/src/basic-android-agent/README.md @@ -155,6 +155,38 @@ with AndroidVisionAgent( ) as agent: ``` +## AskUI Chat + +You can test the agentic behavior directly through AskUI's web-based chat interface without writing any code. This allows you to interact with your Android device using natural language commands through a user-friendly chat interface. + +### Getting Started with AskUI Chat + +1. **Start the Chat Server** + ```bash + python -m askui.chat + ``` + +2. **Access the Web Interface** + - Open your web browser and navigate to [hub.askui.com](https://hub.askui.com) + - Use the chat interface to interact with your connected Android device + +### What You Can Do + +- **Natural Language Commands**: "Open Gmail and check for new messages" +- **Screen Analysis**: "What apps are currently visible on my home screen?" +- **Device Control**: "Go back to the home screen and open Settings" +- **Interactive Testing**: Experiment with different commands and see how the agent responds + +### Benefits + +- **No Coding Required**: Test automation ideas quickly without writing Python code +- **Real-time Interaction**: See immediate responses and device actions +- **Learning Tool**: Understand how the agent interprets and executes commands +- **Rapid Prototyping**: Validate automation workflows before implementing them in code + +This is perfect for exploring AskUI's capabilities or demonstrating the technology to others! + + ## �� API Reference ### Core Methods @@ -173,18 +205,6 @@ with AndroidVisionAgent( - `loc.Id("id")` - Find by element ID - `loc.Class("class")` - Find by CSS class -## �� Contributing - -Feel free to: -- Add new demo scenarios -- Improve error handling -- Add more documentation -- Report issues and suggest features - -## �� License - -This demo is provided as-is for educational and demonstration purposes. - ## 🔗 Resources - [AskUI Documentation](https://docs.askui.com) From e5639ce9467e403d3575c770689cba606aa573c7 Mon Sep 17 00:00:00 2001 From: Samir Mlika Date: Mon, 4 Aug 2025 10:02:41 +0200 Subject: [PATCH 3/3] implement review remarks --- src/basic-android-agent/README.md | 17 +++----- src/basic-android-agent/main.py | 52 +++++++++++++----------- src/basic-android-agent/requirements.txt | 2 +- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/basic-android-agent/README.md b/src/basic-android-agent/README.md index 1c004b2..f795383 100644 --- a/src/basic-android-agent/README.md +++ b/src/basic-android-agent/README.md @@ -41,28 +41,23 @@ AskUI is a powerful Android automation library that allows you to interact with - **Autonomous Execution**: Agent figures out how to accomplish tasks - **Context Awareness**: Understands current screen state -## ��️ Installation +## Installation ### Prerequisites - AskUI Shell installed - ADB (Android Debug Bridge) installed and configured ### Setup -1. **Clone or download this demo** - ```bash - git clone - cd basic-android-agent - ``` -2. **Install dependencies** +1. **Install dependencies** ```bash pip install -r requirements.txt ``` -3. **Connect your Android device** +2. **Connect your Android device** - Enable Developer Options and USB Debugging - Connect via USB or start an emulator - - Verify connection: `adb devices` + - Verify connection: `adb devices` (it should show a list of connected devices) ## 🎯 Usage @@ -89,7 +84,7 @@ The demo includes several scenarios that showcase different capabilities: 5. **Shell Commands**: Execute system commands 6. **Agentic Behavior**: Give natural language instructions -## �� Configuration +## Configuration ### Device Selection If you have multiple devices connected, uncomment and modify this line in `main.py`: @@ -213,4 +208,4 @@ This is perfect for exploring AskUI's capabilities or demonstrating the technolo --- -**Happy Automating! 🚀** \ No newline at end of file +**Happy Automating! 🚀** diff --git a/src/basic-android-agent/main.py b/src/basic-android-agent/main.py index fb2cd3c..31f1532 100644 --- a/src/basic-android-agent/main.py +++ b/src/basic-android-agent/main.py @@ -10,98 +10,104 @@ from askui import locators as loc import time + def main(): """ Main demonstration function showcasing AskUI Android agent capabilities. """ print("🤖 Starting AskUI Android Agent Demo...") print("=" * 50) - + # Initialize your agent context manager with AndroidVisionAgent() as agent: print("✅ Agent initialized successfully!") - + # Select device by serial number in case of multiple devices # Uncomment and modify if you have multiple devices connected # agent.set_device_by_serial_number("emulator-5554") - + # Demo 1: Natural Language Screen Analysis print("\n📱 Demo 1: Screen Analysis") print("-" * 30) - screen_description = agent.get('What can you see on the screen?') + screen_description = agent.get("What can you see on the screen?") print(f"Screen contains: {screen_description}") - + # Demo 2: Assertion-based Testing print("\n✅ Demo 2: Assertion Testing") print("-" * 30) try: - has_gmail = agent.get('Does the screen contain the text "Gmail"?', response_schema=bool) + has_gmail = agent.get( + 'Does the screen contain the text "Gmail"?', response_schema=bool + ) print(f"Gmail text found: {has_gmail}") - + if has_gmail: print("🎯 Tapping on Gmail app...") agent.tap(loc.Text("Gmail")) time.sleep(2) # Wait for app to open except Exception as e: print(f"⚠️ Gmail not found or error: {e}") - + # Demo 3: Device Navigation print("\n🏠 Demo 3: Device Navigation") print("-" * 30) - + # Go to home screen print("Pressing HOME button...") - agent.key_tap('HOME') + agent.key_tap("HOME") time.sleep(1) - + # Demonstrate key combinations print("Pressing HOME + BACK combination...") - agent.key_combination(['HOME', 'BACK'], duration_in_ms=1000) + agent.key_combination(["HOME", "BACK"], duration_in_ms=1000) time.sleep(1) - + # Demo 4: Touch Gestures print("\n Demo 4: Touch Gestures") print("-" * 30) - + # Drag and drop demonstration print("Performing drag and drop gesture...") agent.drag_and_drop(x1=100, y1=100, x2=200, y2=200, duration_in_ms=1000) time.sleep(1) - + # Swipe demonstration print("Performing swipe gesture...") agent.swipe(x1=100, y1=100, x2=200, y2=200, duration_in_ms=1000) time.sleep(1) - + # Demo 5: Text Input print("\n⌨️ Demo 5: Text Input") print("-" * 30) print("Typing 'Hello AskUI World'...") - agent.type('Hello AskUI World') + agent.type("Hello AskUI World") time.sleep(1) - + # Demo 6: Shell Commands print("\n Demo 6: Shell Commands") print("-" * 30) try: print("Executing shell command: ls -l") - shell_response = agent.shell('ls -l') + shell_response = agent.shell("ls -l") print(f"Shell output: {shell_response}") except Exception as e: print(f"⚠️ Shell command failed: {e}") - + # Demo 7: Agentic Behavior (Natural Language Commands) print("\n🧠 Demo 7: Agentic Behavior") print("-" * 30) print("Instructing agent to search for AskUI company...") try: - agent.act("Search for the company AskUI in the browser, and open the first result.") + agent.act( + "Search for the company AskUI in the browser, and open the first result." + ) print("✅ Agent successfully executed the search command!") except Exception as e: print(f"⚠️ Agentic command failed: {e}") - + print("\n🎉 Demo completed successfully!") print("=" * 50) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/basic-android-agent/requirements.txt b/src/basic-android-agent/requirements.txt index 628d95d..c83af6d 100644 --- a/src/basic-android-agent/requirements.txt +++ b/src/basic-android-agent/requirements.txt @@ -1 +1 @@ -askui[all]===0.9.3 +askui[all]