GroupBy aggregates data by category — use agg, transform, and filter for powerful group analytics.
GroupBy Operations
import pandas as pd
df = pd.read_csv('data.csv')
# Basic groupby
df.groupby('dept')['salary'].mean()
df.groupby('dept')['salary'].sum()
df.groupby('dept')['salary'].agg(['mean','min','max','count'])
# Multiple columns
df.groupby(['dept','level'])['salary'].mean()
# agg with different functions per column
df.groupby('dept').agg(
avg_salary=('salary', 'mean'),
max_age=('age', 'max'),
headcount=('id', 'count')
)
# Transform — add group result back to original rows
df['dept_avg'] = df.groupby('dept')['salary'].transform('mean')
df['above_avg'] = df['salary'] > df['dept_avg']
# Filter groups
df.groupby('dept').filter(lambda g: g['salary'].mean() > 50000)