Fine-tuning adapts a pre-trained LLM on task-specific examples to improve performance and reduce prompt length.
# When to fine-tune vs prompt engineer:
# Fine-tune: consistent style/format, domain knowledge, reduce tokens
# Prompt eng: faster, cheaper, more flexible, no data needed
# Training data format (JSONL)
# fine_tune_data.jsonl
{"messages":[{"role":"system","content":"You are a SQL expert."},
{"role":"user","content":"Get all users over 30"},
{"role":"assistant","content":"SELECT * FROM users WHERE age > 30;"}]}
# OpenAI fine-tuning
from openai import OpenAI
client = OpenAI()
file = client.files.create(file=open('data.jsonl','rb'), purpose='fine-tune')
job = client.fine_tuning.jobs.create(training_file=file.id, model='gpt-4o-mini')
print(client.fine_tuning.jobs.retrieve(job.id).status)
# LoRA (Parameter-Efficient Fine-Tuning)
# pip install peft transformers
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf')
config = LoraConfig(r=8, lora_alpha=32, target_modules=['q_proj','v_proj'])
model = get_peft_model(model, config)
# Only trains ~0.1% of parameters -- much faster and cheaper