Embeddings convert text to dense numeric vectors capturing semantic meaning. Similar text = similar vectors.
# OpenAI embeddings
from openai import OpenAI
import numpy as np
client = OpenAI()
def embed(text: str) -> list[float]:
response = client.embeddings.create(
model='text-embedding-3-small', # 1536 dimensions
input=text
)
return response.data[0].embedding
# Local embeddings (free, no API)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2') # 384 dims
vectors = model.encode(['Hello world', 'Hi there'])
# Cosine similarity
def cosine_sim(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
e1 = embed('The cat sat on the mat')
e2 = embed('A feline rested on the rug')
e3 = embed('Stock market closes up 2%')
print(cosine_sim(e1, e2)) # ~0.92 (semantically similar)
print(cosine_sim(e1, e3)) # ~0.18 (unrelated)