BigQuery partitioning, clustering, slot management, materialised views, and cost monitoring queries.
BigQuery Advanced Features
-- BigQuery: serverless, columnar, petabyte scale
-- Partitioned tables (reduce scan cost)
CREATE TABLE myproject.myds.orders
PARTITION BY DATE(created_at)
CLUSTER BY customer_id, status -- further sort within partition
AS SELECT * FROM staging.orders;
-- Partition pruning in queries
SELECT * FROM orders
WHERE created_at BETWEEN '2025-01-01' AND '2025-01-31' -- only reads jan partition
AND customer_id = 'C001'; -- cluster key reduces rows scanned
-- Slots (compute units): flex, on-demand, reservations
-- Cost: $5 per TB scanned (on-demand)
-- INFORMATION_SCHEMA for cost monitoring
SELECT
job_id, total_bytes_processed/1e9 AS gb_scanned,
total_bytes_processed/1e9 * 0.005 AS cost_usd,
query
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
WHERE creation_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
ORDER BY total_bytes_processed DESC LIMIT 20;
-- Materialised views
CREATE MATERIALIZED VIEW mv_daily_revenue AS
SELECT DATE(created_at) AS date, SUM(amount) AS revenue
FROM orders GROUP BY 1;
-- Auto-refreshed when base table changes
-- BigQuery ML
CREATE MODEL myds.churn_model OPTIONS (model_type='logistic_reg') AS
SELECT * FROM training_data;