diff --git a/.gitignore b/.gitignore
index 29ead01a..c561ab5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,7 @@ coverage.xml
.env
.venv
venv/
+myenv/
ENV/
env.bak/
venv.bak/
diff --git a/examples/browsergym_custom_example.py b/examples/browsergym_custom_example.py
new file mode 100644
index 00000000..c7a7a189
--- /dev/null
+++ b/examples/browsergym_custom_example.py
@@ -0,0 +1,134 @@
+"""Example usage of custom BrowserGym tasks.
+
+This script demonstrates how to create and use custom tasks with the
+BrowserGym environment wrapper in OpenEnv.
+"""
+
+import sys
+import os
+import time
+
+# Add src to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..'))
+
+from envs.browsergym_env.server.browsergym_environment import BrowserGymEnvironment
+from envs.browsergym_env.models import BrowserGymAction
+
+
+def multi_tab_copy_paste_example():
+ """Run the multi-tab copy-paste example."""
+
+ print("Multi-Tab Copy-Paste Task Example")
+ print("-" * 80)
+
+ # Create environment
+ env = BrowserGymEnvironment(
+ benchmark="custom",
+ task_name="copy-paste-multitab",
+ headless=False,
+ viewport_width=1280,
+ viewport_height=720,
+ timeout=10000.0,
+ )
+
+ # Reset environment
+ obs = env.reset()
+ print(f"Goal: {obs.goal}\n")
+
+ # Solve the multi-tab task- simulates user actions
+ steps = [
+ ("Select source text", "click('#source-text')"),
+ ("Select all text", "press('Control+A')"),
+ ("Copy text", "press('Control+C')"),
+ ("Navigate to target page", "click('#open-target-btn')"),
+ ("Click target input field", "click('#target-text')"),
+ ("Paste text", "press('Control+V')"),
+ ("Submit form", "click('#submit-btn')"),
+ ]
+
+ for i, (description, action_str) in enumerate(steps, 1):
+ print(f"Step {i}: {description}")
+ action = BrowserGymAction(action_str=action_str)
+ obs = env.step(action)
+
+ # Show which page we're on
+ current_page = "unknown"
+ if obs.metadata and 'custom_data' in obs.metadata:
+ current_page = obs.metadata['custom_data'].get('current_page', 'unknown')
+
+ print(f" Reward: {obs.reward}, Done: {obs.done}, Page: {current_page}")
+
+ # Add delay to see the browser actions
+ time.sleep(1)
+
+ if obs.done:
+ print(f"\nā Task completed! Total reward: {env.state.cum_reward}")
+ break
+
+ env.close()
+ print("-" * 80)
+
+def single_tab_copy_paste_example():
+ """Run the single-tab copy-paste example."""
+
+ print("Custom BrowserGym Task Example: Copy-Paste")
+ print("-" * 80)
+
+ # Create environment
+ env = BrowserGymEnvironment(
+ benchmark="custom",
+ task_name="copy-paste",
+ headless=False,
+ viewport_width=1280,
+ viewport_height=720,
+ timeout=10000.0,
+ )
+
+ # Reset environment
+ obs = env.reset()
+ print(f"Goal: {obs.goal}\n")
+
+ # Solve the task
+ steps = [
+ ("Click source text field", "click('#source-text')"),
+ ("Select all text", "press('Control+A')"),
+ ("Copy text", "press('Control+C')"),
+ ("Click target field", "click('#target-text')"),
+ ("Paste text", "press('Control+V')"),
+ ("Click submit button", "click('#submit-btn')"),
+ ]
+
+ for i, (description, action_str) in enumerate(steps, 1):
+ print(f"Step {i}: {description}")
+ action = BrowserGymAction(action_str=action_str)
+ obs = env.step(action)
+ print(f" Reward: {obs.reward}, Done: {obs.done}")
+
+ # Add delay to see the browser actions
+ time.sleep(1)
+
+ if obs.done:
+ print(f"\nā Task completed! Total reward: {env.state.cum_reward}")
+ break
+
+ env.close()
+ print("-" * 80)
+
+def main():
+ """Run the custom task example."""
+
+ # Run single-tab copy-paste example
+ print("Single-Tab Copy-Paste")
+ single_tab_copy_paste_example()
+
+ time.sleep(3)
+
+ # Run multi-tab copy-paste example
+ print("\nMulti-Tab Copy-Paste")
+ multi_tab_copy_paste_example()
+
+
+if __name__ == "__main__":
+ # Run main example
+ main()
+
diff --git a/src/envs/browsergym_env/README.md b/src/envs/browsergym_env/README.md
index d730ce37..ca3a531b 100644
--- a/src/envs/browsergym_env/README.md
+++ b/src/envs/browsergym_env/README.md
@@ -25,7 +25,7 @@ BrowserGym provides a complete pipeline for developing web agents: train on simp
## Quick Start - Training (MiniWoB)
-### No Setup Required! š
+### No Setup Required!
```python
from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
@@ -58,7 +58,350 @@ for episode in range(1000):
env.close()
```
-### Available Tasks by Benchmark
+## Custom Tasks - Create Your Own Benchmarks
+
+In addition to official BrowserGym benchmarks (MiniWoB, WebArena, etc.), you can create **custom tasks** for domain-specific training or prototyping.
+
+### Why Custom Tasks?
+
+**Official Benchmarks** (miniwob, webarena):
+- Established, well-tested tasks
+- Standardized evaluation
+- Community benchmarks
+- Fixed task set - can't add your own
+- Requires BrowserGym package installation
+- Must integrate with BrowserGym's registration system
+
+**Custom Tasks**:
+- Create unlimited domain-specific tasks
+- No BrowserGym package needed
+- No registration complexity
+- Full control over HTML, rewards, termination
+- Rapid prototyping and iteration
+- Not standardized (for research/training only)
+
+### Quick Start - Custom Tasks
+
+```python
+from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+
+# Use a custom task (no BrowserGym installation needed!)
+env = BrowserGymEnv.from_docker_image(
+ "ghcr.io/openenv/browsergym-env:latest",
+ environment={
+ "BROWSERGYM_BENCHMARK": "custom",
+ "BROWSERGYM_TASK_NAME": "copy-paste", # or "copy-paste-multitab"
+ }
+)
+
+# Train on your custom task
+result = env.reset()
+print(f"Goal: {result.observation.goal}")
+
+action = BrowserGymAction(action_str="click('#source-text')")
+result = env.step(action)
+print(f"Reward: {result.reward}")
+
+env.close()
+```
+
+### Available Custom Tasks
+
+| Task Name | Description | Difficulty | Multi-Page |
+|-----------|-------------|------------|------------|
+| `copy-paste` | Copy text from one field to another | Easy | No |
+| `copy-paste-multitab` | Copy text across two pages | Medium | Yes |
+
+### Action Format Reference
+
+Custom tasks support BrowserGym-style action strings:
+
+- **Click**: `click('button')` or `click('#submit')` or `click('.classname')`
+- **Fill**: `fill('input[name="username"]', 'john@example.com')`
+- **Navigate**: `goto('https://example.com')` or `goto('file:///path/to/page.html')`
+- **Press key**: `press('Enter')` or `press('Control+C')`
+- **Scroll**: `scroll('down')` or `scroll('up')`
+- **Custom JavaScript**: Any other string is executed as JavaScript in the browser context
+
+**Examples:**
+```python
+# Click actions
+BrowserGymAction(action_str="click('#submit-btn')")
+BrowserGymAction(action_str="click('button.primary')")
+
+# Fill forms
+BrowserGymAction(action_str="fill('#email', 'user@example.com')")
+BrowserGymAction(action_str="fill('input[name=\"password\"]', 'secret123')")
+
+# Keyboard
+BrowserGymAction(action_str="press('Tab')")
+BrowserGymAction(action_str="press('Control+A')")
+
+# Navigation
+BrowserGymAction(action_str="goto('https://example.com')")
+
+# JavaScript (for complex interactions)
+BrowserGymAction(action_str="document.querySelector('#dropdown').value = 'option2'")
+```
+
+### Creating Custom Tasks
+
+Custom tasks are defined in `server/custom/custom_tasks.py`. Each task needs:
+
+1. **Task HTML** - Minimal HTML page(s) with your UI
+2. **Python Task Class** - Defines behavior, rewards, termination
+3. **Registration** - Add to task registry
+
+**File Structure:**
+```
+server/custom/
+ custom_models.py # CustomGymAction, CustomGymObservation, CustomGymState
+ custom_base.py # Base class for custom environments
+ custom_tasks.py # Task registry and implementations
+ tasks/ # HTML files for tasks
+ copy-paste.html
+ copy-paste-source.html
+ copy-paste-target.html
+```
+
+**Design Philosophy** (Following Official Benchmarks):
+
+ **DO:**
+- Keep HTML minimal and functional (like MiniWoB)
+- Let agents figure out what to do from task description
+- Use simple, clean styling
+- Focus on task logic, not visual appeal
+
+ **DON'T:**
+- Add step-by-step instructions in HTML
+- Use fancy animations or gradients
+- Add visual hints or progress indicators
+- Use emojis or decorative elements
+
+**Example: Single-Page Task**
+
+```python
+# In server/custom/custom_tasks.py
+from custom_base import CustomBrowserGymEnvironment
+
+class MyCustomTask(CustomBrowserGymEnvironment):
+ def _get_task_url(self) -> str:
+ """Return path to your HTML file."""
+ import os
+ task_html = os.path.join(
+ os.path.dirname(__file__),
+ "tasks",
+ "my-task.html"
+ )
+ return f"file://{task_html}"
+
+ def _get_goal_description(self) -> str:
+ """Return task instruction for the agent."""
+ return "Click the submit button after filling the form"
+
+ async def _extract_observation(self, page) -> dict:
+ """Extract state from the page."""
+ content = await page.content()
+ form_valid = await page.evaluate(
+ "document.querySelector('form')?.checkValidity() || false"
+ )
+
+ return {
+ "text": content, # Full HTML for agent
+ "pruned_html": content[:1000], # Truncated version
+ "custom_data": {
+ "form_valid": form_valid,
+ }
+ }
+
+ def _calculate_reward(self, page_data, action, error=None) -> float:
+ """Calculate reward based on page state."""
+ if error:
+ return -0.1 # Small penalty for errors
+
+ custom_data = page_data.get("custom_data", {})
+ if custom_data.get("form_valid"):
+ return 1.0 # Success!
+
+ return 0.0 # No progress
+
+ def _check_done(self, page_data) -> bool:
+ """Check if task is complete."""
+ custom_data = page_data.get("custom_data", {})
+ return custom_data.get("form_valid", False)
+
+# Register your task in server/custom/custom_tasks.py
+register_custom_task("my-task", MyCustomTask)
+```
+
+**Step-by-Step Registration:**
+1. Create your task class in `server/custom/custom_tasks.py` (or import it)
+2. Call `register_custom_task("task-name", YourTaskClass)` at the bottom of the file
+3. Create HTML file(s) in `server/custom/tasks/` directory if needed
+4. Use with `BROWSERGYM_TASK_NAME="task-name"`
+
+**Example: Multi-Page Task**
+
+```python
+class MyMultiPageTask(CustomBrowserGymEnvironment):
+ async def _extract_observation(self, page) -> dict:
+ content = await page.content()
+ current_url = page.url
+
+ # Determine which page we're on
+ if "page1" in current_url:
+ data = await page.evaluate("getPage1Data()")
+ return {
+ "text": content,
+ "custom_data": {"current_page": "page1", **data}
+ }
+ elif "page2" in current_url:
+ data = await page.evaluate("getPage2Data()")
+ return {
+ "text": content,
+ "custom_data": {"current_page": "page2", **data}
+ }
+
+ return {"text": content, "custom_data": {}}
+
+ def _calculate_reward(self, page_data, action, error=None) -> float:
+ """Reward for navigation and completion."""
+ custom_data = page_data.get("custom_data", {})
+ current_page = custom_data.get("current_page")
+
+ # Reward for successfully navigating to page2
+ if current_page == "page2" and "goto" in action.lower():
+ return 0.3
+
+ # Reward for task completion on page2
+ if current_page == "page2" and custom_data.get("task_complete"):
+ return 1.0
+
+ return 0.0
+
+register_custom_task("my-multitab-task", MyMultiPageTask)
+```
+
+### Custom Task HTML Guidelines
+
+Follow official benchmark style (MiniWoB, WebArena):
+
+```html
+
+
+
+ My Task
+
+
+
+
+
+
+
+
+ Success!
+ Error: Please fill all fields.
+
+
+
+
+```
+
+**Key Principles:**
+- No visual hints or progress bars
+- No step-by-step instructions in HTML
+- No emojis or decorative elements
+- Simple, clean, functional UI
+- Agent figures out task from goal description
+
+### Custom vs Official Benchmarks
+
+| Aspect | Official (miniwob, webarena) | Custom |
+|--------|----------------------------|--------|
+| **Installation** | Requires browsergym-{benchmark} | No packages needed |
+| **Task Creation** | Fixed task set | Unlimited custom tasks |
+| **Registration** | gym.make() system | Simple Python registry |
+| **Browser Control** | BrowserGym internals | Playwright directly |
+| **HTML Location** | BrowserGym package | Local server/custom/ directory |
+| **Use Case** | Standardized evaluation | Rapid prototyping, domain-specific training |
+| **Community** | Established benchmarks | Your own tasks |
+
+### When to Use Custom Tasks
+
+ **Use Custom Tasks For:**
+- Rapid prototyping of new task ideas
+- Domain-specific training (e.g., corporate workflows, specialized forms)
+- Testing new agent architectures quickly
+- Educational purposes
+- Tasks not covered by official benchmarks
+
+ **Use Official Benchmarks For:**
+- Publishing research results
+- Comparing with other papers
+- Standardized evaluation
+- Established task benchmarks
+
+### Advanced: Custom Task Features
+
+**Dynamic Task Generation:**
+```python
+class DynamicFormTask(CustomBrowserGymEnvironment):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.num_fields = self.custom_params.get("num_fields", 3)
+
+ def _get_task_url(self) -> str:
+ # Generate HTML dynamically
+ html = self._generate_form_html(self.num_fields)
+ # Use data: URL
+ return f"data:text/html,{html}"
+
+# Use with custom parameters
+env = BrowserGymEnv(environment={
+ "BROWSERGYM_BENCHMARK": "custom",
+ "BROWSERGYM_TASK_NAME": "dynamic-form",
+ "num_fields": "5" # Custom parameter
+})
+```
+
+**State Persistence Across Pages:**
+```python
+class MultiPageWithState(CustomBrowserGymEnvironment):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.task_state = {} # Persistent state
+
+ def _calculate_reward(self, page_data, action, error=None) -> float:
+ # Access state from previous pages
+ if self.task_state.get("collected_item"):
+ return 1.0
+ return 0.0
+```
+
+**See also:**
+- `server/custom/README.md` - Detailed custom task documentation
+- `server/custom/custom_tasks.py` - Example implementations
+- `examples/browsergym_custom_example.py` - Usage examples
+
+---
+
+## Evaluation (WebArena)
#### MiniWoB++ Tasks (Training - 100+ tasks)
@@ -67,72 +410,72 @@ MiniWoB tasks are organized by difficulty and type. Here are the main categories
**Click Tasks** (Basic interaction)
| Task Name | Description | Difficulty |
|-----------|-------------|------------|
-| `click-test` | Click a single button | ā Easy |
-| `click-button` | Click button with specific text | ā Easy |
-| `click-button-sequence` | Click buttons in order | āā Medium |
-| `click-checkboxes` | Select specific checkboxes | āā Medium |
-| `click-checkboxes-soft` | Select checkboxes (multiple valid) | āā Medium |
-| `click-checkboxes-large` | Many checkboxes to select from | āā Medium |
-| `click-checkboxes-transfer` | Transfer learning variation | āā Medium |
-| `click-dialog` | Click correct button in dialog | ā Easy |
-| `click-dialog-2` | More complex dialog | āā Medium |
-| `click-link` | Click on a link | ā Easy |
-| `click-option` | Select from dropdown | āā Medium |
-| `click-pie` | Click on pie chart slice | āā Medium |
-| `click-scroll-list` | Click item in scrollable list | āāā Hard |
-| `click-shades` | Click on specific color shade | āā Medium |
-| `click-shape` | Click on specific shape | āā Medium |
-| `click-tab` | Switch between tabs | āā Medium |
-| `click-tab-2` | More complex tab switching | āāā Hard |
-| `click-widget` | Click on UI widget | āā Medium |
+| `click-test` | Click a single button | Easy |
+| `click-button` | Click button with specific text | Easy |
+| `click-button-sequence` | Click buttons in order | Medium |
+| `click-checkboxes` | Select specific checkboxes | Medium |
+| `click-checkboxes-soft` | Select checkboxes (multiple valid) | Medium |
+| `click-checkboxes-large` | Many checkboxes to select from | Medium |
+| `click-checkboxes-transfer` | Transfer learning variation | Medium |
+| `click-dialog` | Click correct button in dialog | Easy |
+| `click-dialog-2` | More complex dialog | Medium |
+| `click-link` | Click on a link | Easy |
+| `click-option` | Select from dropdown | Medium |
+| `click-pie` | Click on pie chart slice | Medium |
+| `click-scroll-list` | Click item in scrollable list | Hard |
+| `click-shades` | Click on specific color shade | Medium |
+| `click-shape` | Click on specific shape | Medium |
+| `click-tab` | Switch between tabs | Medium |
+| `click-tab-2` | More complex tab switching | Hard |
+| `click-widget` | Click on UI widget | Medium |
**Text Entry Tasks** (Typing and forms)
| Task Name | Description | Difficulty |
|-----------|-------------|------------|
-| `enter-text` | Type text into input field | ā Easy |
-| `enter-text-dynamic` | Dynamic text entry | āā Medium |
-| `enter-text-2` | Multiple text fields | āā Medium |
-| `enter-password` | Fill password field | ā Easy |
-| `enter-date` | Enter a date | āā Medium |
-| `enter-time` | Enter a time | āā Medium |
-| `login-user` | Complete login form | āā Medium |
-| `login-user-popup` | Login via popup | āāā Hard |
+| `enter-text` | Type text into input field | Easy |
+| `enter-text-dynamic` | Dynamic text entry | Medium |
+| `enter-text-2` | Multiple text fields | Medium |
+| `enter-password` | Fill password field | Easy |
+| `enter-date` | Enter a date | Medium |
+| `enter-time` | Enter a time | Medium |
+| `login-user` | Complete login form | Medium |
+| `login-user-popup` | Login via popup | Hard |
**Navigation Tasks** (Multi-step interaction)
| Task Name | Description | Difficulty |
|-----------|-------------|------------|
-| `navigate-tree` | Navigate through tree structure | āāā Hard |
-| `search-engine` | Use search interface | āā Medium |
-| `use-autocomplete` | Interact with autocomplete | āāā Hard |
-| `book-flight` | Book a flight (complex form) | āāāā Very Hard |
-| `choose-date` | Pick date from calendar | āāā Hard |
-| `choose-date-easy` | Simplified date picker | āā Medium |
-| `choose-date-medium` | Medium difficulty date picker | āāā Hard |
-| `choose-list` | Select from long list | āā Medium |
+| `navigate-tree` | Navigate through tree structure | Hard |
+| `search-engine` | Use search interface | Medium |
+| `use-autocomplete` | Interact with autocomplete | Hard |
+| `book-flight` | Book a flight (complex form) | Very Hard |
+| `choose-date` | Pick date from calendar | Hard |
+| `choose-date-easy` | Simplified date picker | Medium |
+| `choose-date-medium` | Medium difficulty date picker | Hard |
+| `choose-list` | Select from long list | Medium |
**Visual/Spatial Tasks** (Requires visual understanding)
| Task Name | Description | Difficulty |
|-----------|-------------|------------|
-| `count-sides` | Count sides of shape | āā Medium |
-| `count-shape` | Count specific shapes | āā Medium |
-| `find-word` | Find word in text | āā Medium |
-| `focus-text` | Focus on text element | ā Easy |
-| `focus-text-2` | More complex focus task | āā Medium |
-| `grid-coordinate` | Click grid coordinate | āā Medium |
-| `guess-number` | Guess a number game | āāā Hard |
-| `identify-shape` | Identify shape type | āā Medium |
-| `read-table` | Extract info from table | āāā Hard |
-| `read-table-2` | More complex table reading | āāā Hard |
+| `count-sides` | Count sides of shape | Medium |
+| `count-shape` | Count specific shapes | Medium |
+| `find-word` | Find word in text | Medium |
+| `focus-text` | Focus on text element | Easy |
+| `focus-text-2` | More complex focus task | Medium |
+| `grid-coordinate` | Click grid coordinate | Medium |
+| `guess-number` | Guess a number game | Hard |
+| `identify-shape` | Identify shape type | Medium |
+| `read-table` | Extract info from table | Hard |
+| `read-table-2` | More complex table reading | Hard |
**Email/Social Tasks** (Realistic scenarios)
| Task Name | Description | Difficulty |
|-----------|-------------|------------|
-| `email-inbox` | Manage email inbox | āāāā Very Hard |
-| `email-inbox-forward` | Forward emails | āāāā Very Hard |
-| `email-inbox-nl` | Natural language email task | āāāā Very Hard |
-| `email-inbox-star-reply` | Star and reply to emails | āāāā Very Hard |
-| `social-media` | Social media interaction | āāāā Very Hard |
-| `social-media-some` | Partial social media task | āāā Hard |
+| `email-inbox` | Manage email inbox | Very Hard |
+| `email-inbox-forward` | Forward emails | Very Hard |
+| `email-inbox-nl` | Natural language email task | Very Hard |
+| `email-inbox-star-reply` | Star and reply to emails | Very Hard |
+| `social-media` | Social media interaction | Very Hard |
+| `social-media-some` | Partial social media task | Hard |
**Total:** 100+ tasks across all categories
@@ -416,7 +759,26 @@ Environment variables:
## Supported Benchmarks
-### 1. MiniWoB++ (Training) ā
Recommended for Training
+### 1. Custom Tasks (Rapid Prototyping) For Development
+
+- **Unlimited tasks**: Create domain-specific tasks
+- **No dependencies**: No BrowserGym package needed
+- **Instant iteration**: Modify HTML and logic quickly
+- **Full control**: Define rewards, termination, UI
+- **Fast setup**: Just add Python class and HTML file
+
+**Use Case**: Rapid prototyping, domain-specific training, testing new ideas
+
+**Tasks**: `copy-paste`, `copy-paste-multitab`, *[your tasks here]*
+
+```python
+env = BrowserGymEnv(environment={
+ "BROWSERGYM_BENCHMARK": "custom",
+ "BROWSERGYM_TASK_NAME": "copy-paste"
+})
+```
+
+### 2. MiniWoB++ (Training) Recommended for Training
- **100+ tasks** ranging from simple (click buttons) to complex (form filling, navigation)
- **Fast**: Instant resets, quick episodes
@@ -426,7 +788,7 @@ Environment variables:
**Use Case**: Train agents on fundamental web navigation skills
-### 2. WebArena (Evaluation) š Benchmark
+### 3. WebArena (Evaluation) Benchmark
- **812 realistic tasks** across 6 websites
- **Complex**: Multi-step reasoning, real web interfaces
@@ -436,7 +798,7 @@ Environment variables:
**Use Case**: Evaluate agents on realistic web tasks
-### 3. VisualWebArena (Evaluation) šļø Visual Benchmark
+### 4. VisualWebArena (Evaluation) Visual Benchmark
- **910 tasks** requiring visual understanding
- **Multimodal**: Both text and visual observations
@@ -445,7 +807,7 @@ Environment variables:
**Use Case**: Test visual web navigation capabilities
-### 4. WorkArena (Evaluation) š¼ Enterprise Benchmark
+### 5. WorkArena (Evaluation) Enterprise Benchmark
- **Enterprise tasks**: CRM, project management, etc.
- **Realistic workflows**: Real enterprise software
@@ -517,16 +879,16 @@ python app.py
```
browsergym_env/
-āāā __init__.py # Module exports
-āāā models.py # Action, Observation, State dataclasses
-āāā client.py # HTTPEnvClient implementation
-āāā README.md # This file
-āāā server/
- āāā __init__.py
- āāā app.py # FastAPI application
- āāā browsergym_environment.py # Environment implementation
- āāā Dockerfile # Container specification
- āāā requirements.txt # Python dependencies
+ __init__.py # Module exports
+ models.py # Action, Observation, State dataclasses
+ client.py # HTTPEnvClient implementation
+ README.md # This file
+ server/
+ __init__.py
+ app.py # FastAPI application
+ browsergym_environment.py # Environment implementation
+ Dockerfile # Container specification
+ requirements.txt # Python dependencies
```
## References
diff --git a/src/envs/browsergym_env/server/browsergym_environment.py b/src/envs/browsergym_env/server/browsergym_environment.py
index 42f30a6f..da132bcf 100644
--- a/src/envs/browsergym_env/server/browsergym_environment.py
+++ b/src/envs/browsergym_env/server/browsergym_environment.py
@@ -9,7 +9,9 @@
"""
import importlib
-from typing import Any, Dict, Optional
+import os
+import sys
+from typing import Any, Dict, Optional, TYPE_CHECKING
from uuid import uuid4
import gymnasium as gym
@@ -21,6 +23,39 @@
BrowserGymState,
)
+# Add the server directory to sys.path to allow custom module imports
+_SERVER_DIR = os.path.dirname(os.path.abspath(__file__))
+if _SERVER_DIR not in sys.path:
+ sys.path.insert(0, _SERVER_DIR)
+
+# Import custom models for custom benchmark
+# Use TYPE_CHECKING to avoid runtime import issues with type hints
+if TYPE_CHECKING:
+ from custom.custom_models import (
+ CustomGymAction,
+ CustomGymObservation,
+ CustomGymState,
+ )
+
+try:
+ from custom.custom_models import (
+ CustomGymAction as _CustomGymAction,
+ CustomGymObservation as _CustomGymObservation,
+ CustomGymState as _CustomGymState,
+ )
+ CUSTOM_AVAILABLE = True
+ CustomGymAction = _CustomGymAction
+ CustomGymObservation = _CustomGymObservation
+ CustomGymState = _CustomGymState
+ _CUSTOM_IMPORT_ERROR = None
+except ImportError as e:
+ CUSTOM_AVAILABLE = False
+ CustomGymAction = None # type: ignore
+ CustomGymObservation = None # type: ignore
+ CustomGymState = None # type: ignore
+ _CUSTOM_IMPORT_ERROR = str(e)
+
+
class BrowserGymEnvironment(Environment):
"""BrowserGym environment wrapper for OpenEnv.
@@ -61,53 +96,99 @@ def __init__(
self.timeout = timeout
self.gym_kwargs = gym_kwargs
- # Build environment ID
- if task_name:
- self.env_id = f"browsergym/{benchmark}.{task_name}"
+ # Check if this is a custom benchmark
+ self.is_custom = benchmark == "custom"
+
+ if self.is_custom:
+ # Handle custom benchmark differently
+ if not CUSTOM_AVAILABLE:
+ raise ValueError(
+ f"Custom benchmark requested but custom models not available.\n"
+ f"Import error: {_CUSTOM_IMPORT_ERROR}\n"
+ f"Make sure custom/custom_models.py exists in {_SERVER_DIR}/custom/"
+ )
+
+ if not task_name:
+ raise ValueError("task_name is required for custom benchmark")
+
+ # Import and instantiate custom environment
+ try:
+ from custom.custom_tasks import get_custom_task
+ self.custom_env = get_custom_task(
+ task_name=task_name,
+ headless=headless,
+ viewport_width=viewport_width,
+ viewport_height=viewport_height,
+ timeout=timeout,
+ **gym_kwargs
+ )
+ except ImportError as e:
+ raise ValueError(
+ f"Failed to import custom task '{task_name}': {e}\n"
+ f"Make sure the task is registered in custom/custom_tasks.py"
+ ) from e
+
+ self.gym_env = None
+ self.env_id = f"custom/{task_name}"
+
+ # Use CustomGymState for custom benchmarks
+ self._state = CustomGymState(
+ episode_id=str(uuid4()),
+ step_count=0,
+ benchmark="custom",
+ task_name=task_name,
+ )
else:
- self.env_id = f"browsergym/{benchmark}"
-
- # force import the benchmark module
- benchmark_modules = {
- "miniwob": "browsergym.envs.miniwob",
- "webarena": "browsergym.envs.webarena",
- "visualwebarena": "browsergym.envs.visualwebarena",
- "workarena": "browsergym.envs.workarena",
- }
- module_path = benchmark_modules.get(benchmark)
- try:
- if module_path:
- importlib.import_module(module_path)
+ # Original BrowserGym benchmark handling
+ # Build environment ID
+ if task_name:
+ self.env_id = f"browsergym/{benchmark}.{task_name}"
else:
- importlib.import_module("browsergym")
- except ModuleNotFoundError as import_error:
- raise ValueError(
- f"Failed to import BrowserGym benchmark '{benchmark}': {import_error}\n"
- f"Make sure the package browsergym-{benchmark} is installed."
- ) from import_error
-
- # Create the BrowserGym environment
- try:
- self.gym_env = gym.make(
- self.env_id,
- headless=headless,
- viewport={"width": viewport_width, "height": viewport_height},
- timeout=timeout,
- **gym_kwargs,
+ self.env_id = f"browsergym/{benchmark}"
+
+ # force import the benchmark module
+ benchmark_modules = {
+ "miniwob": "browsergym.envs.miniwob",
+ "webarena": "browsergym.envs.webarena",
+ "visualwebarena": "browsergym.envs.visualwebarena",
+ "workarena": "browsergym.envs.workarena",
+ }
+ module_path = benchmark_modules.get(benchmark)
+ try:
+ if module_path:
+ importlib.import_module(module_path)
+ else:
+ importlib.import_module("browsergym")
+ except ModuleNotFoundError as import_error:
+ raise ValueError(
+ f"Failed to import BrowserGym benchmark '{benchmark}': {import_error}\n"
+ f"Make sure the package browsergym-{benchmark} is installed."
+ ) from import_error
+
+ # Create the BrowserGym environment
+ try:
+ self.gym_env = gym.make(
+ self.env_id,
+ headless=headless,
+ viewport={"width": viewport_width, "height": viewport_height},
+ timeout=timeout,
+ **gym_kwargs,
+ )
+ except Exception as e:
+ raise ValueError(
+ f"Failed to create BrowserGym environment '{self.env_id}': {e}\n"
+ f"Make sure the benchmark is installed (e.g., pip install browsergym-{benchmark})"
+ ) from e
+
+ # State tracking for standard benchmarks
+ self._state = BrowserGymState(
+ episode_id=str(uuid4()),
+ step_count=0,
+ benchmark=benchmark,
+ task_name=task_name or "",
)
- except Exception as e:
- raise ValueError(
- f"Failed to create BrowserGym environment '{self.env_id}': {e}\n"
- f"Make sure the benchmark is installed (e.g., pip install browsergym-{benchmark})"
- )
-
- # State tracking
- self._state = BrowserGymState(
- episode_id=str(uuid4()),
- step_count=0,
- benchmark=benchmark,
- task_name=task_name or "",
- )
+
+ self.custom_env = None
self._last_obs: Optional[Dict[str, Any]] = None
self._last_info: Optional[Dict[str, Any]] = None
@@ -126,6 +207,14 @@ def reset(
Returns:
Initial observation for the task
"""
+ if self.is_custom:
+ # Handle custom environment reset
+ obs = self.custom_env.reset(seed=seed)
+ self._state = self.custom_env.state
+ # Convert CustomGymObservation to BrowserGymObservation
+ return self._convert_custom_observation(obs)
+
+ # Original BrowserGym handling
# Generate new episode ID
self._state = BrowserGymState(
episode_id=str(uuid4()),
@@ -157,6 +246,15 @@ def step(self, action: BrowserGymAction) -> BrowserGymObservation:
Returns:
Observation after executing the action
"""
+ if self.is_custom:
+ # Convert BrowserGymAction to CustomGymAction
+ custom_action = CustomGymAction(action_str=action.action_str)
+ obs = self.custom_env.step(custom_action)
+ self._state = self.custom_env.state
+ # Convert CustomGymObservation to BrowserGymObservation
+ return self._convert_custom_observation(obs)
+
+ # Original BrowserGym handling
self._state.step_count += 1
# Execute action in gym environment
@@ -260,6 +358,34 @@ def _create_observation(
metadata=browsergym_metadata,
)
+ def _convert_custom_observation(
+ self, custom_obs: "CustomGymObservation" # type: ignore
+ ) -> BrowserGymObservation:
+ """Convert CustomGymObservation to BrowserGymObservation.
+
+ Args:
+ custom_obs: Custom observation to convert
+
+ Returns:
+ BrowserGymObservation
+ """
+ return BrowserGymObservation(
+ text=custom_obs.text,
+ url=custom_obs.url,
+ screenshot=custom_obs.screenshot,
+ goal=custom_obs.goal,
+ axtree_txt=custom_obs.axtree_txt,
+ pruned_html=custom_obs.pruned_html,
+ error=custom_obs.error,
+ last_action_error=custom_obs.last_action_error,
+ done=custom_obs.done,
+ reward=custom_obs.reward,
+ metadata={
+ "custom_data": custom_obs.custom_data,
+ **(custom_obs.metadata or {}),
+ },
+ )
+
@property
def state(self) -> BrowserGymState:
"""Get the current environment state."""
@@ -267,5 +393,9 @@ def state(self) -> BrowserGymState:
def close(self) -> None:
"""Clean up environment resources."""
- if hasattr(self, "gym_env"):
- self.gym_env.close()
+ if self.is_custom:
+ if self.custom_env:
+ self.custom_env.close()
+ else:
+ if hasattr(self, "gym_env"):
+ self.gym_env.close()
diff --git a/src/envs/browsergym_env/server/custom/__init__.py b/src/envs/browsergym_env/server/custom/__init__.py
new file mode 100644
index 00000000..885973c4
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/__init__.py
@@ -0,0 +1,9 @@
+"""Custom BrowserGym tasks module.
+
+This module provides custom task functionality for BrowserGym environments.
+Custom tasks are registered in custom_tasks.py and can be used by setting
+benchmark="custom" in BrowserGymEnvironment.
+"""
+
+# The custom tasks are registered in custom_tasks.py
+# No need to import anything here - imports happen when needed
diff --git a/src/envs/browsergym_env/server/custom/custom_base.py b/src/envs/browsergym_env/server/custom/custom_base.py
new file mode 100644
index 00000000..043ffccd
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/custom_base.py
@@ -0,0 +1,346 @@
+"""Base custom environment for BrowserGym custom tasks.
+
+This module provides a base class for creating custom BrowserGym tasks that
+are not part of the official benchmarks. It simulates the BrowserGym gym
+environment interface using Playwright directly.
+"""
+
+import asyncio
+import sys
+import os
+from abc import abstractmethod
+from typing import Any, Dict, Optional
+from uuid import uuid4
+
+from playwright.async_api import async_playwright, Browser, Page, Playwright
+
+# Add current directory to path for relative imports
+_CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+if _CURRENT_DIR not in sys.path:
+ sys.path.insert(0, _CURRENT_DIR)
+
+from custom_models import (
+ CustomGymAction,
+ CustomGymObservation,
+ CustomGymState,
+)
+
+
+class CustomBrowserGymEnvironment:
+ """Base class for custom BrowserGym environments.
+
+ This class provides the basic Gym-like interface (reset, step, close)
+ but uses Playwright directly instead of going through BrowserGym's
+ registration system.
+
+ To create a custom task:
+ 1. Subclass this class
+ 2. Implement _get_task_url() to return the starting URL
+ 3. Implement _extract_observation() to parse page state
+ 4. Implement _calculate_reward() to compute rewards
+ 5. Implement _check_done() to determine episode termination
+ """
+
+ def __init__(
+ self,
+ task_name: str,
+ headless: bool = True,
+ viewport_width: int = 1280,
+ viewport_height: int = 720,
+ timeout: float = 10000.0,
+ max_steps: int = 50,
+ **kwargs: Any,
+ ):
+ """Initialize the custom environment.
+
+ Args:
+ task_name: Name of your custom task
+ headless: Whether to run browser in headless mode
+ viewport_width: Browser viewport width
+ viewport_height: Browser viewport height
+ timeout: Action timeout in milliseconds
+ max_steps: Maximum steps per episode
+ **kwargs: Additional custom parameters
+ """
+ self.task_name = task_name
+ self.headless = headless
+ self.viewport_width = viewport_width
+ self.viewport_height = viewport_height
+ self.timeout = timeout
+ self.max_steps = max_steps
+ self.custom_params = kwargs
+
+ # Playwright objects (initialized in reset)
+ self._playwright: Optional[Playwright] = None
+ self._browser: Optional[Browser] = None
+ self._page: Optional[Page] = None
+ self._event_loop: Optional[asyncio.AbstractEventLoop] = None
+
+ # State tracking
+ self._state = CustomGymState(
+ episode_id=str(uuid4()),
+ step_count=0,
+ benchmark="custom",
+ task_name=task_name,
+ max_steps=max_steps,
+ )
+
+ @abstractmethod
+ def _get_task_url(self) -> str:
+ """Get the starting URL for this task.
+
+ Returns:
+ URL to navigate to when resetting the environment
+ """
+ pass
+
+ @abstractmethod
+ def _get_goal_description(self) -> str:
+ """Get the goal/instruction for this task.
+
+ Returns:
+ Human-readable description of the task goal
+ """
+ pass
+
+ @abstractmethod
+ async def _extract_observation(self, page: Page) -> Dict[str, Any]:
+ """Extract observation data from the current page state.
+
+ Args:
+ page: Playwright Page object
+
+ Returns:
+ Dictionary with observation data (text, axtree_txt, etc.)
+ """
+ pass
+
+ @abstractmethod
+ def _calculate_reward(
+ self,
+ page_data: Dict[str, Any],
+ action: str,
+ error: Optional[str] = None
+ ) -> float:
+ """Calculate reward for the current step.
+
+ Args:
+ page_data: Data extracted from _extract_observation
+ action: Action that was executed
+ error: Error message if action failed
+
+ Returns:
+ Reward value
+ """
+ pass
+
+ @abstractmethod
+ def _check_done(self, page_data: Dict[str, Any]) -> bool:
+ """Check if the episode should terminate.
+
+ Args:
+ page_data: Data extracted from _extract_observation
+
+ Returns:
+ True if episode should end, False otherwise
+ """
+ pass
+
+ def _get_or_create_event_loop(self) -> asyncio.AbstractEventLoop:
+ """Get or create an event loop for async operations."""
+ try:
+ loop = asyncio.get_event_loop()
+ if loop.is_closed():
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ except RuntimeError:
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ return loop
+
+ async def _async_reset(self, seed: Optional[int] = None) -> CustomGymObservation:
+ """Async implementation of reset."""
+ # Generate new episode ID
+ self._state = CustomGymState(
+ episode_id=str(uuid4()),
+ step_count=0,
+ benchmark="custom",
+ task_name=self.task_name,
+ max_steps=self.max_steps,
+ )
+
+ # Initialize Playwright if needed
+ if self._playwright is None:
+ self._playwright = await async_playwright().start()
+ self._browser = await self._playwright.chromium.launch(
+ headless=self.headless
+ )
+
+ # Create new page
+ if self._page:
+ await self._page.close()
+
+ self._page = await self._browser.new_page(
+ viewport={
+ "width": self.viewport_width,
+ "height": self.viewport_height,
+ }
+ )
+
+ # Set timeout
+ self._page.set_default_timeout(self.timeout)
+
+ # Navigate to task URL
+ task_url = self._get_task_url()
+ await self._page.goto(task_url)
+
+ # Extract initial observation
+ page_data = await self._extract_observation(self._page)
+ goal = self._get_goal_description()
+
+ self._state.current_url = self._page.url
+ self._state.goal = goal
+
+ return CustomGymObservation(
+ text=page_data.get("text", ""),
+ url=self._page.url,
+ screenshot=page_data.get("screenshot"),
+ goal=goal,
+ axtree_txt=page_data.get("axtree_txt", ""),
+ pruned_html=page_data.get("pruned_html", ""),
+ error="",
+ last_action_error=False,
+ done=False,
+ reward=0.0,
+ custom_data=page_data.get("custom_data"),
+ )
+
+ async def _async_step(self, action_str: str) -> CustomGymObservation:
+ """Async implementation of step."""
+ self._state.step_count += 1
+
+ error_msg = ""
+ last_action_error = False
+
+ try:
+ # Execute the action
+ # BrowserGym actions are Python-like function calls
+ # We need to parse and execute them
+ await self._execute_action(action_str)
+
+ except Exception as e:
+ error_msg = str(e)
+ last_action_error = True
+
+ # Extract observation
+ page_data = await self._extract_observation(self._page)
+
+ # Calculate reward
+ reward = self._calculate_reward(page_data, action_str, error_msg)
+ self._state.cum_reward += reward
+
+ # Check if done
+ done = self._check_done(page_data) or self._state.step_count >= self.max_steps
+
+ # Update state
+ self._state.current_url = self._page.url
+
+ return CustomGymObservation(
+ text=page_data.get("text", ""),
+ url=self._page.url,
+ screenshot=page_data.get("screenshot"),
+ goal=self._state.goal,
+ axtree_txt=page_data.get("axtree_txt", ""),
+ pruned_html=page_data.get("pruned_html", ""),
+ error=error_msg,
+ last_action_error=last_action_error,
+ done=done,
+ reward=reward,
+ custom_data=page_data.get("custom_data"),
+ )
+
+ async def _execute_action(self, action_str: str) -> None:
+ """Execute a BrowserGym-style action string.
+
+ Args:
+ action_str: Action string like "click('button')" or "fill('input', 'text')"
+ """
+ # Simple action parser - you can make this more sophisticated
+ action_str = action_str.strip()
+
+ if action_str.startswith("click("):
+ # Extract selector from click('selector')
+ selector = action_str[6:-1].strip("'\"")
+ await self._page.click(selector)
+
+ elif action_str.startswith("fill("):
+ # Extract selector and text from fill('selector', 'text')
+ parts = action_str[5:-1].split(",", 1)
+ selector = parts[0].strip().strip("'\"")
+ text = parts[1].strip().strip("'\"") if len(parts) > 1 else ""
+ await self._page.fill(selector, text)
+
+ elif action_str.startswith("goto("):
+ # Extract URL from goto('url')
+ url = action_str[5:-1].strip("'\"")
+ await self._page.goto(url)
+
+ elif action_str.startswith("press("):
+ # Extract key from press('key')
+ key = action_str[6:-1].strip("'\"")
+ await self._page.keyboard.press(key)
+
+ elif action_str.startswith("scroll("):
+ # Extract direction from scroll('direction')
+ direction = action_str[7:-1].strip("'\"")
+ if direction == "down":
+ await self._page.mouse.wheel(0, 500)
+ elif direction == "up":
+ await self._page.mouse.wheel(0, -500)
+
+ else:
+ # Try to execute as JavaScript if not recognized
+ await self._page.evaluate(action_str)
+
+ def reset(self, seed: Optional[int] = None) -> CustomGymObservation:
+ """Reset the environment.
+
+ Args:
+ seed: Random seed for reproducibility
+
+ Returns:
+ Initial observation
+ """
+ loop = self._get_or_create_event_loop()
+ return loop.run_until_complete(self._async_reset(seed))
+
+ def step(self, action: CustomGymAction) -> CustomGymObservation:
+ """Execute an action.
+
+ Args:
+ action: Action to execute
+
+ Returns:
+ Observation after executing the action
+ """
+ loop = self._get_or_create_event_loop()
+ return loop.run_until_complete(self._async_step(action.action_str))
+
+ @property
+ def state(self) -> CustomGymState:
+ """Get the current environment state."""
+ return self._state
+
+ def close(self) -> None:
+ """Clean up environment resources."""
+ async def _async_close():
+ if self._page:
+ await self._page.close()
+ if self._browser:
+ await self._browser.close()
+ if self._playwright:
+ await self._playwright.stop()
+
+ if self._playwright:
+ loop = self._get_or_create_event_loop()
+ loop.run_until_complete(_async_close())
diff --git a/src/envs/browsergym_env/server/custom/custom_models.py b/src/envs/browsergym_env/server/custom/custom_models.py
new file mode 100644
index 00000000..3356b5a0
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/custom_models.py
@@ -0,0 +1,107 @@
+"""Data models for custom BrowserGym tasks.
+
+These models are used specifically for custom tasks that are not part of the
+official BrowserGym benchmarks (miniwob, webarena, visualwebarena, workarena).
+"""
+
+import sys
+import os
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Any
+
+# Add src directory to path for core imports
+_SRC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', '..'))
+if _SRC_DIR not in sys.path:
+ sys.path.insert(0, _SRC_DIR)
+
+from core.env_server.types import Action, Observation, State
+
+
+@dataclass(kw_only=True)
+class CustomGymAction(Action):
+ """Action to be executed in a custom BrowserGym environment.
+
+ Custom actions support the same BrowserGym action format but may include
+ additional custom fields specific to your task.
+
+ Example actions:
+ - "click('Submit button')"
+ - "fill('username', 'john@example.com')"
+ - "goto('https://example.com')"
+ - "scroll(down)"
+ - "send_keys('Enter')"
+ """
+
+ action_str: str
+ """Natural language action string (e.g., "click('Submit')")"""
+
+ metadata: Optional[Dict[str, Any]] = None
+ """Optional metadata for custom task-specific data"""
+
+
+@dataclass(kw_only=True)
+class CustomGymObservation(Observation):
+ """Observation returned from a custom BrowserGym environment.
+
+ Contains multiple observation modalities including text (accessibility tree
+ or DOM), visual (screenshot), and page metadata, plus custom fields.
+ """
+
+ text: str = ""
+ """Text representation of the page (accessibility tree or DOM)"""
+
+ url: str = ""
+ """Current URL of the page"""
+
+ screenshot: Optional[List[List[List[int]]]] = None
+ """Screenshot as numpy array [height, width, channels] (if visual observation enabled)"""
+
+ goal: str = ""
+ """Task goal/instruction for the current episode"""
+
+ axtree_txt: str = ""
+ """Full accessibility tree as text"""
+
+ pruned_html: str = ""
+ """Pruned HTML content (interactive elements only)"""
+
+ error: str = ""
+ """Error message if action execution failed"""
+
+ last_action_error: bool = False
+ """Whether the last action resulted in an error"""
+
+ custom_data: Optional[Dict[str, Any]] = None
+ """Optional custom data specific to your task"""
+
+
+@dataclass
+class CustomGymState(State):
+ """State of a custom BrowserGym environment.
+
+ Tracks the current task, and progress through an episode, plus custom state fields.
+ """
+
+ benchmark: str = "custom"
+ """Benchmark name (always 'custom' for custom tasks)"""
+
+ task_name: str = ""
+ """Specific custom task name (e.g., 'copy-paste', 'data-entry')"""
+
+ task_id: Optional[str] = None
+ """Task ID for custom task tracking"""
+
+ goal: str = ""
+ """Task goal/instruction"""
+
+ current_url: str = ""
+ """Current URL of the active page"""
+
+ max_steps: Optional[int] = None
+ """Maximum steps allowed for this task"""
+
+ cum_reward: float = 0.0
+ """Cumulative reward for the current episode"""
+
+ custom_state: Optional[Dict[str, Any]] = None
+ """Optional custom state data specific to your task"""
diff --git a/src/envs/browsergym_env/server/custom/custom_tasks.py b/src/envs/browsergym_env/server/custom/custom_tasks.py
new file mode 100644
index 00000000..59a9798c
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/custom_tasks.py
@@ -0,0 +1,320 @@
+"""Registry for custom BrowserGym tasks.
+
+This module provides a central place to register and retrieve custom tasks.
+Add your custom tasks here to make them available through the BrowserGym environment.
+"""
+
+import sys
+import os
+from typing import Any, Dict
+
+# Add current directory to path for relative imports
+_CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+if _CURRENT_DIR not in sys.path:
+ sys.path.insert(0, _CURRENT_DIR)
+
+from custom_base import CustomBrowserGymEnvironment
+
+
+# Registry of custom tasks
+_CUSTOM_TASKS: Dict[str, type] = {}
+
+
+def register_custom_task(name: str, task_class: type) -> None:
+ """Register a custom task.
+
+ Args:
+ name: Task name (e.g., 'copy-paste', 'data-entry')
+ task_class: Class that extends CustomBrowserGymEnvironment
+ """
+ if not issubclass(task_class, CustomBrowserGymEnvironment):
+ raise ValueError(
+ f"Task class must extend CustomBrowserGymEnvironment, got {task_class}"
+ )
+ _CUSTOM_TASKS[name] = task_class
+
+
+def get_custom_task(task_name: str, **kwargs: Any) -> CustomBrowserGymEnvironment:
+ """Get a custom task instance.
+
+ Args:
+ task_name: Name of the task to retrieve
+ **kwargs: Arguments to pass to the task constructor
+
+ Returns:
+ Instance of the custom task
+
+ Raises:
+ ValueError: If task is not registered
+ """
+ if task_name not in _CUSTOM_TASKS:
+ available = ", ".join(_CUSTOM_TASKS.keys()) or "none"
+ raise ValueError(
+ f"Custom task '{task_name}' not found. "
+ f"Available tasks: {available}. "
+ f"Register your task using register_custom_task()."
+ )
+
+ task_class = _CUSTOM_TASKS[task_name]
+ return task_class(task_name=task_name, **kwargs)
+
+
+def list_custom_tasks() -> list[str]:
+ """List all registered custom tasks.
+
+ Returns:
+ List of task names
+ """
+ return list(_CUSTOM_TASKS.keys())
+
+
+# ============================================================================
+# Copy-Paste in a single page HTML task
+# ============================================================================
+
+class CopyPasteTask(CustomBrowserGymEnvironment):
+ """Copy text from one field and paste into another."""
+
+ def _get_task_url(self) -> str:
+ """Get the URL for the copy-paste task."""
+ # This should point to a local HTML file or a URL hosting the task
+ import os
+ task_html = os.path.join(
+ os.path.dirname(__file__),
+ "tasks",
+ "copy-paste.html"
+ )
+ return f"file://{task_html}"
+
+ def _get_goal_description(self) -> str:
+ """Get the goal description."""
+ return "Copy the text from the source field and paste it into the target field, then click Submit."
+
+ async def _extract_observation(self, page) -> dict:
+ """Extract observation from the page."""
+ # Get the accessibility tree or HTML
+ try:
+ # Try to get the page content
+ content = await page.content()
+
+ # Get the current values of source and target fields
+ source_value = await page.evaluate(
+ "document.querySelector('#source-text')?.value || ''"
+ )
+ target_value = await page.evaluate(
+ "document.querySelector('#target-text')?.value || ''"
+ )
+
+ # Get success message if visible
+ success_msg = await page.evaluate(
+ "document.querySelector('#success-message')?.textContent || ''"
+ )
+
+ return {
+ "text": content,
+ "pruned_html": content[:1000], # Truncate for observation
+ "custom_data": {
+ "source_value": source_value,
+ "target_value": target_value,
+ "success_message": success_msg,
+ }
+ }
+ except Exception as e:
+ return {
+ "text": f"Error extracting observation: {e}",
+ "custom_data": {"error": str(e)}
+ }
+
+ def _calculate_reward(
+ self,
+ page_data: dict,
+ action: str,
+ error: str | None = None
+ ) -> float:
+ """Calculate reward based on page state."""
+ if error:
+ return -0.1 # Small penalty for errors
+
+ custom_data = page_data.get("custom_data", {})
+
+ # Check if task is completed successfully
+ if "Success!" in custom_data.get("success_message", ""):
+ return 1.0
+
+ # Partial reward if text is copied correctly
+ source = custom_data.get("source_value", "")
+ target = custom_data.get("target_value", "")
+
+ if source and target and source == target:
+ return 0.5
+
+ return 0.0
+
+ def _check_done(self, page_data: dict) -> bool:
+ """Check if the task is complete."""
+ custom_data = page_data.get("custom_data", {})
+ # Task is done if success message is shown
+ return "Success!" in custom_data.get("success_message", "")
+
+
+# Register the example task
+register_custom_task("copy-paste", CopyPasteTask)
+
+
+# ============================================================================
+# Multi-Tab Copy-Paste Task
+# ============================================================================
+
+class CopyPasteMultiTabTask(CustomBrowserGymEnvironment):
+ """Copy text from one tab and paste it into another tab.
+
+ This task demonstrates handling multiple browser tabs/pages.
+ The agent needs to:
+ 1. Copy text from the source page (tab 1)
+ 2. Navigate/switch to the target page (tab 2)
+ 3. Paste the text into the target field
+ 4. Submit the form
+ """
+
+ def _get_task_url(self) -> str:
+ """Get the URL for the first tab (source page)."""
+ import os
+ task_html = os.path.join(
+ os.path.dirname(__file__),
+ "tasks",
+ "copy-paste-source.html"
+ )
+ return f"file://{task_html}"
+
+ def _get_goal_description(self) -> str:
+ """Get the goal description."""
+ return (
+ "Copy the text from the source page, then navigate to the target page "
+ "(click 'Open Target Page' button), paste the text into the input field, "
+ "and click Submit."
+ )
+
+ async def _extract_observation(self, page) -> dict:
+ """Extract observation from the current page."""
+ try:
+ content = await page.content()
+ current_url = page.url
+
+ # Determine which page we're on
+ if "source" in current_url:
+ # On source page
+ source_value = await page.evaluate(
+ "document.querySelector('#source-text')?.textContent || ''"
+ )
+
+ return {
+ "text": content,
+ "pruned_html": content[:1000],
+ "custom_data": {
+ "current_page": "source",
+ "source_value": source_value,
+ "task_step": "copy_from_source",
+ }
+ }
+
+ elif "target" in current_url:
+ # On target page
+ target_value = await page.evaluate(
+ "document.querySelector('#target-text')?.value || ''"
+ )
+ success_msg = await page.evaluate(
+ "document.querySelector('#success-message')?.textContent || ''"
+ )
+
+ return {
+ "text": content,
+ "pruned_html": content[:1000],
+ "custom_data": {
+ "current_page": "target",
+ "target_value": target_value,
+ "success_message": success_msg,
+ "task_step": "paste_to_target",
+ }
+ }
+
+ else:
+ # Unknown page
+ return {
+ "text": content,
+ "custom_data": {
+ "current_page": "unknown",
+ "error": "Not on source or target page"
+ }
+ }
+
+ except Exception as e:
+ return {
+ "text": f"Error extracting observation: {e}",
+ "custom_data": {"error": str(e)}
+ }
+
+ def _calculate_reward(
+ self,
+ page_data: dict,
+ action: str,
+ error: str | None = None
+ ) -> float:
+ """Calculate reward based on page state and action."""
+ if error:
+ return -0.1
+
+ custom_data = page_data.get("custom_data", {})
+ current_page = custom_data.get("current_page", "")
+
+ # Big reward for completing the task
+ if "Success!" in custom_data.get("success_message", ""):
+ return 1.0
+
+ # Small reward for successfully navigating to target page
+ if current_page == "target" and "goto" in action.lower():
+ return 0.3
+
+ # Medium reward if text is pasted correctly in target
+ if current_page == "target":
+ target_value = custom_data.get("target_value", "")
+ # The expected text from source page
+ if target_value and "Hello from the source page!" in target_value:
+ return 0.6
+
+ return 0.0
+
+ def _check_done(self, page_data: dict) -> bool:
+ """Check if the task is complete."""
+ custom_data = page_data.get("custom_data", {})
+ return "Success!" in custom_data.get("success_message", "")
+
+
+# Register the multi-tab task
+register_custom_task("copy-paste-multitab", CopyPasteMultiTabTask)
+
+
+# ============================================================================
+# Add your own custom tasks below by:
+# 1. Creating a class that extends CustomBrowserGymEnvironment
+# 2. Implementing the required methods
+# 3. Registering it with register_custom_task()
+# ============================================================================
+
+# Example:
+# class MyCustomTask(CustomBrowserGymEnvironment):
+# def _get_task_url(self) -> str:
+# return "https://my-task-url.com"
+#
+# def _get_goal_description(self) -> str:
+# return "Do something amazing"
+#
+# async def _extract_observation(self, page) -> dict:
+# return {"text": await page.content()}
+#
+# def _calculate_reward(self, page_data, action, error=None) -> float:
+# return 1.0 if some_condition else 0.0
+#
+# def _check_done(self, page_data) -> bool:
+# return some_completion_check
+#
+# register_custom_task("my-task", MyCustomTask)
diff --git a/src/envs/browsergym_env/server/custom/tasks/copy-paste-source.html b/src/envs/browsergym_env/server/custom/tasks/copy-paste-source.html
new file mode 100644
index 00000000..137b65fe
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/tasks/copy-paste-source.html
@@ -0,0 +1,37 @@
+
+
+
+
+
+ Source Page
+
+
+
+ Source Page
+ Text to copy:
+ Hello from the source page!
+
+
+
+
+
diff --git a/src/envs/browsergym_env/server/custom/tasks/copy-paste-target.html b/src/envs/browsergym_env/server/custom/tasks/copy-paste-target.html
new file mode 100644
index 00000000..a20c5267
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/tasks/copy-paste-target.html
@@ -0,0 +1,94 @@
+
+
+
+
+
+ Target Page
+
+
+
+ Back to Source Page
+
+ Target Page
+
+
+
+
+
+
+
+
+ Success! You've completed the multi-tab copy-paste task correctly!
+ Error: The text doesn't match. Please copy the correct text from the source page.
+
+
+
+
diff --git a/src/envs/browsergym_env/server/custom/tasks/copy-paste.html b/src/envs/browsergym_env/server/custom/tasks/copy-paste.html
new file mode 100644
index 00000000..18eb2c3b
--- /dev/null
+++ b/src/envs/browsergym_env/server/custom/tasks/copy-paste.html
@@ -0,0 +1,93 @@
+
+
+
+
+
+ Copy-Paste Task
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Success! You've completed the task correctly.
+ Error: The text doesn't match. Please try again.
+
+
+
+
\ No newline at end of file