LLM-powered RPA uses vision and reasoning to automate browser tasks without hard-coded selectors.
LLM-Powered Robotic Process Automation
# Traditional RPA: brittle, breaks on UI changes
# LLM RPA: understands intent, adapts to changes
import anthropic
from playwright.async_api import async_playwright
import base64
client = anthropic.Anthropic()
async def llm_rpa_agent(task: str, url: str):
"""LLM-guided browser automation."""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
await page.goto(url)
history = []
for step in range(20):
# Capture current state
screenshot = base64.b64encode(await page.screenshot()).decode()
dom_text = await page.evaluate("document.body.innerText")
# Ask LLM what to do
resp = client.messages.create(
model="claude-opus-4-5", max_tokens=500,
messages=[{"role":"user","content":[
{"type":"image","source":{"type":"base64","media_type":"image/png","data":screenshot}},
{"type":"text","text":f"Task: {task}
History: {history}
Dom snippet: {dom_text[:1000]}
Next action (CLICK css_selector | TYPE css_selector text | SCROLL | DONE):"}
]}]
)
action = resp.content[0].text.strip()
history.append(action)
if action.startswith("DONE"): break
await execute_browser_action(page, action)
return history