Force structured JSON outputs with JSON mode or Instructor+Pydantic for type-safe LLM data extraction.
Structured JSON Output from LLMs
# Method 1: Prompt-based JSON
system = 'Respond with valid JSON only. No markdown, no preamble.'
prompt = 'Extract: name, email, phone from: John Smith, j@test.com, 555-1234'
response = client.messages.create(
model='claude-opus-4-5', max_tokens=200,
system=system,
messages=[{'role':'user','content':prompt}]
)
import json
data = json.loads(response.content[0].text)
# Method 2: OpenAI JSON mode
response = client.chat.completions.create(
model='gpt-4o',
response_format={'type':'json_object'},
messages=[{'role':'user','content':'Extract as JSON: John, john@test.com'}]
)
# Method 3: Instructor + Pydantic (most robust)
import instructor
from pydantic import BaseModel
from openai import OpenAI
class Contact(BaseModel):
name: str
email: str
phone: str | None = None
client_i = instructor.from_openai(OpenAI())
contact = client_i.chat.completions.create(
model='gpt-4o',
response_model=Contact,
messages=[{'role':'user','content':'John Smith, john@test.com, 555-1234'}]
)
print(contact.name) # John Smith (typed Python object!)