browserbase · jay-sahnan · Nov 13, 2025 · Nov 17, 2025 · Nov 24, 2025 · Nov 27, 2025
diff --git a/python/job-application/README.md b/python/job-application/README.md
@@ -0,0 +1,82 @@
+# Stagehand + Browserbase: Automated Job Application Agent
+
+## AT A GLANCE
+- Goal: Automate job applications by discovering job listings and submitting applications with unique agent identifiers.
+- Concurrent Processing: applies to multiple jobs in parallel with configurable concurrency limits based on Browserbase project settings.
+- Dynamic Data Generation: generates unique agent IDs and email addresses for each application.
+- File Upload Support: automatically uploads resume PDF from a remote URL during the application process.
+- Docs → https://docs.stagehand.dev/basics/agent
+
+## GLOSSARY
+- agent: create an autonomous AI agent that can execute complex multi-step tasks
+  Docs → https://docs.stagehand.dev/basics/agent#what-is-agent
+- act: perform UI actions from a prompt (click, type, fill forms)
+  Docs → https://docs.stagehand.dev/basics/act
+- extract: extract structured data from web pages using natural language instructions
+  Docs → https://docs.stagehand.dev/basics/extract
+- observe: analyze a page and return selectors or action plans before executing
+  Docs → https://docs.stagehand.dev/basics/observe
+- asyncio.Semaphore: concurrency control mechanism to limit parallel job applications based on project limits
+
+## QUICKSTART
+1) python -m venv venv
+2) source venv/bin/activate  # On Windows: venv\Scripts\activate
+3) uvx install stagehand browserbase pydantic python-dotenv httpx
+4) cp .env.example .env
+5) Add required API keys/IDs to .env (BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, GOOGLE_GENERATIVE_AI_API_KEY)
+6) python main.py
+
+## EXPECTED OUTPUT
+- Fetches project concurrency limit from Browserbase (maxed at 5)
+- Initializes main Stagehand session with Browserbase
+- Displays live session link for monitoring
+- Navigates to agent job board
+- Clicks "View Jobs" button
+- Extracts all job listings with titles and URLs using Pydantic schema validation
+- Closes main session
+- Creates asyncio.Semaphore for concurrency control
+- Applies to all jobs in parallel (respecting concurrency limit)
+- For each job application:
+  - Generates unique agent ID and email
+  - Navigates to job page
+  - Clicks on specific job
+  - Fills agent identifier field
+  - Fills contact endpoint (email) field
+  - Fills deployment region field
+  - Uploads resume PDF from remote URL using httpx
+  - Selects multi-region deployment option
+  - Submits application
+- Displays completion message when all applications are finished
+
+## COMMON PITFALLS
+- "ModuleNotFoundError": ensure all dependencies are installed via uvx install
+- Missing credentials: verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_GENERATIVE_AI_API_KEY
+- Google API access: ensure you have access to Google's gemini-2.5-flash model
+- Concurrency limits: script automatically respects Browserbase project concurrency (capped at 5)
+- Resume URL: ensure the resume URL (https://agent-job-board.vercel.app/Agent%20Resume.pdf) is accessible
+- Job detection: verify that job listings are visible on the page and match expected structure
+- Network issues: check internet connection and website accessibility
+- Import errors: activate your virtual environment if you created one
+- Find more information on your Browserbase dashboard -> https://www.browserbase.com/sign-in
+
+## USE CASES
+• Bulk job applications: Automate applying to multiple job postings simultaneously with unique credentials for each application.
+• Agent deployment automation: Streamline the process of deploying multiple AI agents by automating the application and registration workflow.
+• Testing & QA: Validate job application forms and workflows across multiple listings to ensure consistent functionality.
+• Recruitment automation: Scale agent recruitment processes by programmatically submitting applications with generated identifiers.
+
+## NEXT STEPS
+• Add filtering: Implement job filtering by title keywords, location, or other criteria before applying.
+• Error handling: Add retry logic for failed applications and better error reporting with job-specific logs.
+• Resume customization: Support multiple resume versions or dynamic resume generation based on job requirements.
+• Application tracking: Store application status, timestamps, and results in a database for tracking and follow-up.
+• Rate limiting: Add delays between applications to avoid overwhelming the target system.
+• Multi-site support: Extend to support multiple job boards with site-specific form field mappings.
+
+## HELPFUL RESOURCES
+📚 Stagehand Docs:     https://docs.stagehand.dev/v2/first-steps/introduction
+🎮 Browserbase:        https://www.browserbase.com
+💡 Try it out:         https://www.browserbase.com/playground
+🔧 Templates:          https://www.browserbase.com/templates
+📧 Need help?          support@browserbase.com
+
diff --git a/python/job-application/main.py b/python/job-application/main.py
@@ -0,0 +1,243 @@
+# Stagehand + Browserbase: Job Application Automation - See README.md for full documentation
+
+import os
+import asyncio
+import time
+import random
+from typing import List
+from dotenv import load_dotenv
+from stagehand import Stagehand, StagehandConfig
+from browserbase import Browserbase
+from pydantic import BaseModel, Field, HttpUrl
+import httpx
+
+# Load environment variables
+load_dotenv()
+
+
+# Define Pydantic schemas for structured data extraction
+# Using schemas ensures consistent data extraction even if page layout changes
+class JobInfo(BaseModel):
+    url: HttpUrl = Field(..., description="Job URL")
+    title: str = Field(..., description="Job title")
+
+
+class JobsData(BaseModel):
+    jobs: List[JobInfo]
+
+
+async def get_project_concurrency() -> int:
+    """
+    Fetch project concurrency limit from Browserbase SDK.
+
+    Retrieves the maximum concurrent sessions allowed for the project,
+    capped at 5.
+    """
+    bb = Browserbase(api_key=os.environ.get("BROWSERBASE_API_KEY"))
+
+    # Use asyncio.to_thread to run synchronous SDK call in thread pool
+    project = await asyncio.to_thread(
+        bb.projects.retrieve,
+        os.environ.get("BROWSERBASE_PROJECT_ID")
+    )
+    return min(project.concurrency, 5)
+
+
+def generate_random_email() -> str:
+    """
+    Generate a random email address for form submission.
+
+    """
+    random_string = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=8))
+    return f"agent-{random_string}@example.com"
+
+
+def generate_agent_id() -> str:
+    """
+    Generate a unique agent identifier for job applications.
+
+    Combines timestamp and random string to ensure uniqueness across
+    multiple job applications and sessions.
+    """
+    timestamp = int(time.time() * 1000)
+    random_string = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=7))
+    return f"agent-{timestamp}-{random_string}"
+
+
+async def apply_to_job(job_info: JobInfo, semaphore: asyncio.Semaphore):
+    """
+    Apply to a single job posting with automated form filling.
+
+    Uses Stagehand to navigate to job page, fill out application form,
+    upload resume, and submit the application.
+    """
+    # Semaphore ensures we don't exceed project concurrency limits
+    async with semaphore:
+        # Initialize Stagehand with Browserbase for cloud-based browser automation
+        config = StagehandConfig(
+            env="BROWSERBASE",
+            api_key=os.environ.get("BROWSERBASE_API_KEY"),
+            project_id=os.environ.get("BROWSERBASE_PROJECT_ID"),
+            model_name="google/gemini-2.5-flash",
+            model_api_key=os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY")
+        )
+
+        try:
+            # Use async context manager for automatic resource management
+            async with Stagehand(config) as stagehand:
+                print(f"[{job_info.title}] Session Started")
+
+                # Get session ID for live viewing/debugging
+                session_id = None
+                if hasattr(stagehand, 'session_id'):
+                    session_id = stagehand.session_id
+                elif hasattr(stagehand, 'browserbase_session_id'):
+                    session_id = stagehand.browserbase_session_id
+
+                if session_id:
+                    print(f"[{job_info.title}] Watch live: https://browserbase.com/sessions/{session_id}")
+
+                page = stagehand.page
+
+                # Navigate to job URL
+                await page.goto(str(job_info.url))
+                print(f"[{job_info.title}] Navigated to job page")
+
+                # Click on the specific job listing to open application form
+                await page.act(f"click on {job_info.title}")
+                print(f"[{job_info.title}] Clicked on job")
+
+                # Generate unique identifiers for this application
+                agent_id = generate_agent_id()
+                email = generate_random_email()
+
+                print(f"[{job_info.title}] Agent ID: {agent_id}")
+                print(f"[{job_info.title}] Email: {email}")
+
+                # Fill out application form fields using natural language actions
+                # Stagehand's act() method understands natural language instructions
+                await page.act(f"type '{agent_id}' into the agent identifier field")
+
+                await page.act(f"type '{email}' into the contact endpoint field")
+
+                await page.act(f"type 'us-west-2' into the deployment region field")
+
+                # Upload agent profile/resume file
+                # Using observe() to find the upload button, then setting files programmatically
+                upload_actions = await page.observe("find the file upload button for agent profile")
+                if upload_actions and len(upload_actions) > 0:
+                    upload_action = upload_actions[0]
+                    upload_selector = str(upload_action.selector)
+                    if upload_selector:
+                        file_input = page.locator(upload_selector)
+
+                        # Fetch resume PDF from remote URL
+                        # Using httpx to download the file before uploading
+                        resume_url = "https://agent-job-board.vercel.app/Agent%20Resume.pdf"
+                        async with httpx.AsyncClient() as client:
+                            response = await client.get(resume_url)
+                            if response.status_code != 200:
+                                raise Exception(f"Failed to fetch resume: {response.status_code}")
+                            resume_buffer = response.content
+
+                        # Upload file using Playwright's set_input_files with buffer
+                        await file_input.set_input_files({
+                            "name": "Agent Resume.pdf",
+                            "mimeType": "application/pdf",
+                            "buffer": resume_buffer,
+                        })
+                        print(f"[{job_info.title}] Uploaded resume from {resume_url}")
+
+                # Select multi-region deployment option
+                await page.act("select 'Yes' for multi region deployment")
+
+                # Submit the application form
+                await page.act("click deploy agent button")
+
+                print(f"[{job_info.title}] Application submitted successfully!")
+
+        except Exception as error:
+            print(f"[{job_info.title}] Error: {error}")
+            raise error
+
+
+async def main():
+    """
+    Main application entry point.
+
+    Orchestrates the job application process:
+    1. Fetches project concurrency limits
+    2. Scrapes job listings from the job board
+    3. Applies to all jobs in parallel with concurrency control
+    """
+    print("Starting Job Application Automation...")
+
+    # Get project concurrency limit to control parallel execution
+    max_concurrency = await get_project_concurrency()
+    print(f"Executing with concurrency limit: {max_concurrency}")
+
+    # Initialize Stagehand with Browserbase for cloud-based browser automation
+    config = StagehandConfig(
+        env="BROWSERBASE",
+        api_key=os.environ.get("BROWSERBASE_API_KEY"),
+        project_id=os.environ.get("BROWSERBASE_PROJECT_ID"),
+        model_name="google/gemini-2.5-flash",
+        model_api_key=os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY")
+    )
+
+    # Use async context manager for automatic resource management
+    async with Stagehand(config) as stagehand:
+        print("Main Stagehand Session Started")
+
+        # Get session ID for live viewing/debugging
+        session_id = None
+        if hasattr(stagehand, 'session_id'):
+            session_id = stagehand.session_id
+        elif hasattr(stagehand, 'browserbase_session_id'):
+            session_id = stagehand.browserbase_session_id
+
+        if session_id:
+            print(f"Watch live: https://browserbase.com/sessions/{session_id}")
+
+        page = stagehand.page
+
+        # Navigate to agent job board homepage
+        await page.goto("https://agent-job-board.vercel.app/")
+        print("Navigated to agent-job-board.vercel.app")
+
+        # Click on "View Jobs" button to access job listings
+        await page.act("click on the view jobs button")
+        print("Clicked on view jobs button")
+
+        # Extract all job listings with titles and URLs using structured schema
+        # Using extract() with Pydantic schema ensures consistent data extraction
+        jobs_result = await page.extract(
+            "extract all job listings with their titles and URLs",
+            schema=JobsData
+        )
+
+        jobs_data = jobs_result.jobs
+        print(f"Found {len(jobs_data)} jobs")
+
+    # Create semaphore with concurrency limit to control parallel job applications
+    # Semaphore ensures we don't exceed Browserbase project limits
+    semaphore = asyncio.Semaphore(max_concurrency)
+
+    # Apply to all jobs in parallel with concurrency control
+    # Using asyncio.gather() to run all applications concurrently
+    print(f"Starting to apply to {len(jobs_data)} jobs with max concurrency of {max_concurrency}")
+
+    application_tasks = [apply_to_job(job, semaphore) for job in jobs_data]
+
+    # Wait for all applications to complete
+    await asyncio.gather(*application_tasks)
+
+    print("All applications completed!")
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except Exception as err:
+        print(f"Error: {err}")
+        exit(1)