Use category dtype, vectorised ops, eval/query, read only needed columns, and chunking for speed.
Pandas Performance Tips
import pandas as pd
import numpy as np
# Use appropriate dtypes
df['age'] = df['age'].astype(np.int8) # 1 byte
df['cat'] = df['cat'].astype('category') # saves memory for low-cardinality
# Check memory usage
df.info(memory_usage='deep')
df.memory_usage(deep=True).sum() / 1e6 # MB
# Use vectorised ops, not loops
# SLOW:
result = [row['a'] + row['b'] for _, row in df.iterrows()]
# FAST:
result = df['a'] + df['b']
# eval and query for large DataFrames
df.eval('c = a + b * 2', inplace=True)
df.query('a > 100 and b < 50')
# Read only needed columns
df = pd.read_csv('big.csv', usecols=['id','name','amount'])
# Chunk large files
for chunk in pd.read_csv('huge.csv', chunksize=10000):
process(chunk)