Vector databases store embeddings and enable fast approximate nearest-neighbour search to find similar documents.
# Chroma (local dev — no API key needed)
import chromadb
client = chromadb.Client()
coll = client.create_collection('docs')
coll.add(
documents=['Docker containers are isolated environments',
'Kubernetes orchestrates container workloads'],
embeddings=[[0.1,0.2,...],[0.15,0.25,...]],
ids=['doc1','doc2'],
metadatas=[{'source':'docker.md'},{'source':'k8s.md'}]
)
results = coll.query(
query_embeddings=[[0.12,0.22,...]],
n_results=2
)
# Pinecone (production managed)
from pinecone import Pinecone
pc = Pinecone(api_key='your-key')
index = pc.Index('my-index')
index.upsert(vectors=[{
'id':'doc1',
'values':embedding,
'metadata':{'text':'...','source':'docker.md'}
}])
results = index.query(vector=q_embed, top_k=5, include_metadata=True)
# Options: Chroma (dev) | Pinecone | Qdrant | Weaviate | pgvector