RAG combines semantic retrieval with LLM generation to answer questions grounded in your own documents.
# Complete RAG implementation
import anthropic
from sentence_transformers import SentenceTransformer
import chromadb
embedder = SentenceTransformer('all-MiniLM-L6-v2')
chroma = chromadb.Client()
collection = chroma.create_collection('knowledge')
client = anthropic.Anthropic()
# --- INDEXING (run once) ---
def index_docs(docs: list[dict]):
texts = [d['text'] for d in docs]
embeds = embedder.encode(texts).tolist()
ids = [d['id'] for d in docs]
collection.add(documents=texts, embeddings=embeds, ids=ids)
# --- QUERY (on each user question) ---
def ask(question: str) -> str:
# 1. Embed question
q_embed = embedder.encode([question]).tolist()[0]
# 2. Retrieve top-3 relevant chunks
results = collection.query(query_embeddings=[q_embed], n_results=3)
context = '
---
'.join(results['documents'][0])
# 3. Generate grounded answer
prompt = 'Context:
' + context + '
Question: ' + question
response = client.messages.create(
model='claude-opus-4-5', max_tokens=500,
system='Answer ONLY from context. Say you don not know if absent.',
messages=[{'role':'user','content':prompt}]
)
return response.content[0].text