Streaming delivers tokens as generated instead of waiting for the complete response — essential for good UX.
# Anthropic streaming
import anthropic
client = anthropic.Anthropic()
with client.messages.stream(
model='claude-opus-4-5',
max_tokens=1024,
messages=[{'role':'user','content':'Write a poem about Python'}]
) as stream:
for text in stream.text_stream:
print(text, end='', flush=True)
# OpenAI streaming
from openai import OpenAI
client = OpenAI()
stream = client.chat.completions.create(
model='gpt-4o',
messages=[{'role':'user','content':'Write a poem'}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end='', flush=True)
# FastAPI streaming endpoint
from fastapi.responses import StreamingResponse
@app.post('/chat')
async def chat(msg: str):
async def generate():
async with client.messages.stream(
model='claude-opus-4-5', max_tokens=500,
messages=[{'role':'user','content':msg}]
) as stream:
async for text in stream.text_stream:
yield text
return StreamingResponse(generate(), media_type='text/plain')