Browser agents use Playwright and vision models to navigate, click, and scrape websites autonomously.
Browser Automation Agents
from playwright.async_api import async_playwright
import anthropic, base64
client = anthropic.Anthropic()
async def browser_agent(task: str):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
messages = [{"role":"user","content":task}]
for _ in range(10):
# Take screenshot for vision
screenshot = await page.screenshot()
b64 = base64.b64encode(screenshot).decode()
resp = client.messages.create(
model="claude-opus-4-5", max_tokens=1024,
messages=messages + [{"role":"user","content":[
{"type":"image","source":{"type":"base64","media_type":"image/png","data":b64}},
{"type":"text","text":"What action should I take next? Respond with: CLICK selector | TYPE selector text | NAVIGATE url | DONE"}
]}]
)
action = resp.content[0].text
if "DONE" in action: break
elif "CLICK" in action: await page.click(action.split()[1])
elif "TYPE" in action: await page.fill(action.split()[1], " ".join(action.split()[2:]))
elif "NAVIGATE" in action: await page.goto(action.split()[1])
await browser.close()