Customer Churn Prediction
Objective
Build a model to predict which customers are likely to churn based on their account characteristics and usage patterns.
Step 1: Create Tables and Load Data
Create main customer data table
CREATE TABLE customer_data (
customer_id INT PRIMARY KEY,
age INT,
tenure_months INT,
monthly_charges DECIMAL(10,2),
total_charges DECIMAL(10,2),
num_services INT,
contract_type VARCHAR(20),
payment_method VARCHAR(50),
has_phone_service BOOLEAN,
has_internet BOOLEAN,
churned BOOLEAN
);
Insert training data
INSERT INTO customer_data (
customer_id, age, tenure_months, monthly_charges, total_charges,
num_services, contract_type, payment_method, has_phone_service,
has_internet, churned
) VALUES
(1, 29, 1, 29.85, 29.85, 1, 'Month-to-month', 'Electronic check', true, false, true),
(2, 34, 56, 80.85, 4532.60, 4, 'Two year', 'Bank transfer', true, true, false),
(3, 48, 21, 98.75, 2074.75, 6, 'One year', 'Credit card', true, true, false),
(4, 55, 72, 105.50, 7596.00, 5, 'Two year', 'Credit card', true, true, false),
(5, 23, 3, 45.25, 135.75, 2, 'Month-to-month', 'Electronic check', false, true, true),
(6, 67, 48, 89.95, 4317.60, 4, 'One year', 'Bank transfer', true, true, false),
(7, 31, 12, 55.00, 660.00, 3, 'Month-to-month', 'Mailed check', true, true, true),
(8, 42, 36, 75.45, 2716.20, 5, 'Two year', 'Credit card', true, true, false),
(9, 28, 8, 39.99, 319.92, 2, 'Month-to-month', 'Electronic check', false, true, true),
(10, 59, 65, 110.25, 7166.25, 6, 'Two year', 'Bank transfer', true, true, false),
(11, 37, 24, 65.30, 1567.20, 3, 'One year', 'Credit card', true, true, false),
(12, 44, 15, 49.95, 749.25, 2, 'Month-to-month', 'Mailed check', true, false, true),
(13, 52, 42, 95.00, 3990.00, 5, 'One year', 'Bank transfer', true, true, false),
(14, 26, 6, 35.50, 213.00, 1, 'Month-to-month', 'Electronic check', false, true, true),
(15, 61, 60, 120.75, 7245.00, 7, 'Two year', 'Credit card', true, true, false);
Create table for new customers to predict
CREATE TABLE new_customers (
customer_id INT PRIMARY KEY,
age INT,
tenure_months INT,
monthly_charges DECIMAL(10,2),
total_charges DECIMAL(10,2),
num_services INT,
contract_type VARCHAR(20),
payment_method VARCHAR(50),
has_phone_service BOOLEAN,
has_internet BOOLEAN
);
Insert new customers for prediction (no 'churned' column)
INSERT INTO new_customers (
customer_id, age, tenure_months, monthly_charges, total_charges,
num_services, contract_type, payment_method, has_phone_service, has_internet
) VALUES
(101, 35, 2, 45.50, 91.00, 3, 'Month-to-month', 'Electronic check', true, true),
(102, 42, 18, 78.90, 1420.20, 5, 'One year', 'Credit card', true, true),
(103, 28, 5, 32.75, 163.75, 2, 'Month-to-month', 'Mailed check', false, true),
(104, 51, 30, 92.00, 2760.00, 4, 'Two year', 'Bank transfer', true, true),
(105, 39, 10, 65.25, 652.50, 3, 'Month-to-month', 'Electronic check', true, true);
Create test customers table for validation
CREATE TABLE test_customers (
customer_id INT PRIMARY KEY,
age INT,
tenure_months INT,
monthly_charges DECIMAL(10,2),
total_charges DECIMAL(10,2),
num_services INT,
contract_type VARCHAR(20),
payment_method VARCHAR(50),
has_phone_service BOOLEAN,
has_internet BOOLEAN,
churned BOOLEAN -- Include actual values for testing accuracy
);
Insert test customers with known outcomes
INSERT INTO test_customers (
customer_id, age, tenure_months, monthly_charges, total_charges,
num_services, contract_type, payment_method, has_phone_service,
has_internet, churned
) VALUES
(201, 29, 2, 35.50, 71.00, 2, 'Month-to-month', 'Electronic check', true, false, true),
(202, 45, 40, 85.75, 3430.00, 5, 'Two year', 'Bank transfer', true, true, false),
(203, 31, 6, 42.25, 253.50, 2, 'Month-to-month', 'Mailed check', false, true, true),
(204, 55, 58, 99.99, 5799.42, 6, 'Two year', 'Credit card', true, true, false),
(205, 26, 3, 48.00, 144.00, 3, 'Month-to-month', 'Electronic check', true, true, true);
Step 2: Create and Run AutoML Experiment
CREATE EXPERIMENT churn_prediction_model AS
SELECT
age,
tenure_months,
monthly_charges,
total_charges,
num_services,
contract_type,
payment_method,
has_phone_service,
has_internet,
churned as target
FROM customer_data
WITH (
task_type = 'binary_classification',
target_column = 'target',
optimization_metric = 'auc',
max_trials = 50,
time_budget_seconds = 3600,
algorithms = ['logistic_regression', 'random_forest', 'gradient_boosting', 'neural_network'],
validation_strategy = 'stratified_kfold',
n_folds = 5,
feature_engineering = true,
hyperparameter_strategy = 'bayesian'
);
Alternative: Test with all algorithms including KNN
CREATE EXPERIMENT test_with_knn AS
SELECT
age,
tenure_months,
monthly_charges,
total_charges,
num_services,
contract_type,
payment_method,
has_phone_service,
has_internet,
churned as target
FROM customer_data
WITH (
target_column = 'target',
task_type = 'binary_classification',
algorithm_selection = 'all',
max_trials = 10
);
Step 3: Monitor Experiment Progress
SELECT * FROM automl_experiments
WHERE name = 'churn_prediction_model';
View trials and their performance
SELECT
trial_id,
model_type,
hyperparameters,
cv_score,
training_time
FROM automl_trials
WHERE experiment_id = (
SELECT id FROM automl_experiments
WHERE name = 'churn_prediction_model'
)
ORDER BY cv_score DESC
LIMIT 10;
Check algorithms tried (for KNN test)
SELECT algorithms_tried, best_algorithm
FROM automl_experiments
WHERE name = 'test_with_knn';
Step 4: Deploy Model
-- Deploy the best model
DEPLOY MODEL churn_predictor FROM EXPERIMENT churn_prediction_model;
Step 5: Make Predictions
This is the query that you would use to make predictions.
PREDICT churn_probability USING churn_predictor AS
SELECT
customer_id,
age,
tenure_months,
monthly_charges,
total_charges,
num_services,
contract_type,
payment_method,
has_phone_service,
has_internet
FROM new_customers;
Test batch predictions with validation
CREATE TABLE prediction_results AS
PREDICT churn_probability USING churn_predictor AS
SELECT * FROM test_customers;
Analyze prediction distribution
SELECT
CASE
WHEN churn_probability < 0.3 THEN 'Low Risk'
WHEN churn_probability < 0.7 THEN 'Medium Risk'
ELSE 'High Risk'
END as risk_category,
COUNT(*) as customer_count,
AVG(churn_probability) as avg_probability
FROM prediction_results
GROUP BY risk_category;
Expected Outcomes
- Best AUC score: 0.85-0.92
- Best performing models: Random Forest or Gradient Boosting
- Training time: 10-30 minutes for 50 trials
- Top features: Contract type, tenure, monthly charges