Tutorials › AI Agents and Automation › Browser Automation Agents

Browser Automation Agents

6 min read

Browser agents use Playwright and vision models to navigate, click, and scrape websites autonomously.

Browser Automation Agents

from playwright.async_api import async_playwright
import anthropic, base64

client = anthropic.Anthropic()

async def browser_agent(task: str):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page    = await browser.new_page()

        messages = [{"role":"user","content":task}]

        for _ in range(10):
            # Take screenshot for vision
            screenshot = await page.screenshot()
            b64 = base64.b64encode(screenshot).decode()

            resp = client.messages.create(
                model="claude-opus-4-5", max_tokens=1024,
                messages=messages + [{"role":"user","content":[
                    {"type":"image","source":{"type":"base64","media_type":"image/png","data":b64}},
                    {"type":"text","text":"What action should I take next? Respond with: CLICK selector | TYPE selector text | NAVIGATE url | DONE"}
                ]}]
            )
            action = resp.content[0].text

            if "DONE" in action: break
            elif "CLICK"    in action: await page.click(action.split()[1])
            elif "TYPE"     in action: await page.fill(action.split()[1], " ".join(action.split()[2:]))
            elif "NAVIGATE" in action: await page.goto(action.split()[1])

        await browser.close()

← Code Execution Agents Next: File System Agents →

Quick Access

Browser Automation Agents

Browser Automation Agents