Monitor LLM apps with Langfuse or LangSmith — trace every request with latency, tokens, and quality scores.
LLM Observability and Monitoring
# Key metrics to track
# Latency: time-to-first-token, total response time
# Cost: input/output tokens per request, daily spend
# Quality: user ratings, LLM-judge scores
# Error rate: API failures, timeouts, rate limit hits
# Hallucination rate: % responses flagged by NLI check
# Langfuse (open source tracing)
from langfuse.decorators import observe, langfuse_context
import time
@observe()
def my_llm_call(question: str) -> str:
start = time.time()
response = client.messages.create(
model='claude-opus-4-5', max_tokens=500,
messages=[{'role':'user','content':question}]
)
text = response.content[0].text
langfuse_context.update_current_observation(
input=question,
output=text,
usage={
'input': response.usage.input_tokens,
'output': response.usage.output_tokens
},
metadata={'latency': time.time() - start}
)
return text
# LangSmith (if using LangChain)
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = 'your-key'
# All LangChain calls auto-traced