Kafka Streams is a Java library for processing and transforming Kafka data in real-time without a separate processing cluster.
# Key operations:
# filter — keep matching records
# map — transform each record
# groupBy — group for aggregation
# count — count per key
# reduce — combine values
# join — join two streams
# Python alternative: Faust
pip install faust-streaming
import faust
app = faust.App('order-processor', broker='kafka://localhost:9092')
orders_topic = app.topic('orders', value_type=dict)
results_topic = app.topic('order-results', value_type=dict)
@app.agent(orders_topic)
async def process_orders(orders):
async for order in orders:
# Process each order
result = {'order_id': order['id'], 'status': 'processed'}
await results_topic.send(value=result)
if __name__ == '__main__':
app.main()