LLMs are stateless per API call — applications must manage conversation history explicitly.
# 1. In-memory sliding window
class ConversationManager:
def __init__(self, max_turns: int = 10):
self.history = []
self.max_turns = max_turns
def add(self, role: str, content: str):
self.history.append({'role': role, 'content': content})
if len(self.history) > self.max_turns * 2:
self.history = self.history[-self.max_turns * 2:]
def get(self) -> list:
return self.history
# 2. Redis-backed persistent memory
import redis, json
r = redis.Redis(decode_responses=True)
def load_history(session_id: str) -> list:
return json.loads(r.get('chat:' + session_id) or '[]')
def save_history(session_id: str, history: list):
r.setex('chat:' + session_id, 3600, json.dumps(history))
# 3. Summarisation memory (for very long conversations)
def summarise_history(history: list) -> str:
prompt = 'Summarise this conversation in 3 sentences: ' + str(history)
return llm.complete(prompt)
# When history gets too long:
# summary = summarise_history(old_history)
# new_history = [{'role':'system','content':'Previous: ' + summary}]