Great Expectations defines and validates business rules — not_null, unique, set membership, range, regex.
Data Quality with Great Expectations
pip install great_expectations
import great_expectations as gx
context = gx.get_context()
# Create expectation suite for orders data
suite = context.add_expectation_suite("orders.critical")
# Define expectations (business rules)
batch = context.get_batch({"path":"orders.csv"}, suite)
batch.expect_column_to_exist("order_id")
batch.expect_column_values_to_not_be_null("order_id")
batch.expect_column_values_to_be_unique("order_id")
batch.expect_column_values_to_not_be_null("customer_id")
batch.expect_column_values_to_be_in_set(
"status", {"pending","completed","cancelled","refunded"}
)
batch.expect_column_values_to_be_between(
"amount", min_value=0, max_value=1000000
)
batch.expect_column_values_to_match_regex(
"email", r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}$"
)
batch.expect_table_row_count_to_be_between(
min_value=1000, max_value=10000000
)
# Validate
results = context.run_validation_operator(
"action_list_operator",
assets_to_validate=[batch]
)
print(results.success) # True/False
# Generate HTML data docs
context.build_data_docs()