Customer Churn Prediction

Build a model to predict which customers are likely to churn based on their account characteristics and usage patterns.

All recipes· ml· 8 minutesintermediatesql

Customer Churn Prediction

Objective

Build a model to predict which customers are likely to churn based on their account characteristics and usage patterns.

Step 1: Create Tables and Load Data

Create main customer data table

CREATE TABLE customer_data (
    customer_id INT PRIMARY KEY,
    age INT,
    tenure_months INT,
    monthly_charges DECIMAL(10,2),
    total_charges DECIMAL(10,2),
    num_services INT,
    contract_type VARCHAR(20),
    payment_method VARCHAR(50),
    has_phone_service BOOLEAN,
    has_internet BOOLEAN,
    churned BOOLEAN
);

Insert training data

INSERT INTO customer_data (
    customer_id, age, tenure_months, monthly_charges, total_charges, 
    num_services, contract_type, payment_method, has_phone_service, 
    has_internet, churned
) VALUES
(1, 29, 1, 29.85, 29.85, 1, 'Month-to-month', 'Electronic check', true, false, true),
(2, 34, 56, 80.85, 4532.60, 4, 'Two year', 'Bank transfer', true, true, false),
(3, 48, 21, 98.75, 2074.75, 6, 'One year', 'Credit card', true, true, false),
(4, 55, 72, 105.50, 7596.00, 5, 'Two year', 'Credit card', true, true, false),
(5, 23, 3, 45.25, 135.75, 2, 'Month-to-month', 'Electronic check', false, true, true),
(6, 67, 48, 89.95, 4317.60, 4, 'One year', 'Bank transfer', true, true, false),
(7, 31, 12, 55.00, 660.00, 3, 'Month-to-month', 'Mailed check', true, true, true),
(8, 42, 36, 75.45, 2716.20, 5, 'Two year', 'Credit card', true, true, false),
(9, 28, 8, 39.99, 319.92, 2, 'Month-to-month', 'Electronic check', false, true, true),
(10, 59, 65, 110.25, 7166.25, 6, 'Two year', 'Bank transfer', true, true, false),
(11, 37, 24, 65.30, 1567.20, 3, 'One year', 'Credit card', true, true, false),
(12, 44, 15, 49.95, 749.25, 2, 'Month-to-month', 'Mailed check', true, false, true),
(13, 52, 42, 95.00, 3990.00, 5, 'One year', 'Bank transfer', true, true, false),
(14, 26, 6, 35.50, 213.00, 1, 'Month-to-month', 'Electronic check', false, true, true),
(15, 61, 60, 120.75, 7245.00, 7, 'Two year', 'Credit card', true, true, false);

Create table for new customers to predict

CREATE TABLE new_customers (
    customer_id INT PRIMARY KEY,
    age INT,
    tenure_months INT,
    monthly_charges DECIMAL(10,2),
    total_charges DECIMAL(10,2),
    num_services INT,
    contract_type VARCHAR(20),
    payment_method VARCHAR(50),
    has_phone_service BOOLEAN,
    has_internet BOOLEAN
);

Insert new customers for prediction (no 'churned' column)

INSERT INTO new_customers (
    customer_id, age, tenure_months, monthly_charges, total_charges,
    num_services, contract_type, payment_method, has_phone_service, has_internet
) VALUES
(101, 35, 2, 45.50, 91.00, 3, 'Month-to-month', 'Electronic check', true, true),
(102, 42, 18, 78.90, 1420.20, 5, 'One year', 'Credit card', true, true),
(103, 28, 5, 32.75, 163.75, 2, 'Month-to-month', 'Mailed check', false, true),
(104, 51, 30, 92.00, 2760.00, 4, 'Two year', 'Bank transfer', true, true),
(105, 39, 10, 65.25, 652.50, 3, 'Month-to-month', 'Electronic check', true, true);

Create test customers table for validation

CREATE TABLE test_customers (
    customer_id INT PRIMARY KEY,
    age INT,
    tenure_months INT,
    monthly_charges DECIMAL(10,2),
    total_charges DECIMAL(10,2),
    num_services INT,
    contract_type VARCHAR(20),
    payment_method VARCHAR(50),
    has_phone_service BOOLEAN,
    has_internet BOOLEAN,
    churned BOOLEAN  -- Include actual values for testing accuracy
);

Insert test customers with known outcomes

INSERT INTO test_customers (
    customer_id, age, tenure_months, monthly_charges, total_charges,
    num_services, contract_type, payment_method, has_phone_service,
    has_internet, churned
) VALUES
(201, 29, 2, 35.50, 71.00, 2, 'Month-to-month', 'Electronic check', true, false, true),
(202, 45, 40, 85.75, 3430.00, 5, 'Two year', 'Bank transfer', true, true, false),
(203, 31, 6, 42.25, 253.50, 2, 'Month-to-month', 'Mailed check', false, true, true),
(204, 55, 58, 99.99, 5799.42, 6, 'Two year', 'Credit card', true, true, false),
(205, 26, 3, 48.00, 144.00, 3, 'Month-to-month', 'Electronic check', true, true, true);

Step 2: Create and Run AutoML Experiment

CREATE EXPERIMENT churn_prediction_model AS
SELECT 
    age,
    tenure_months,
    monthly_charges,
    total_charges,
    num_services,
    contract_type,
    payment_method,
    has_phone_service,
    has_internet,
    churned as target
FROM customer_data
WITH (
    task_type = 'binary_classification',
    target_column = 'target',
    optimization_metric = 'auc',
    max_trials = 50,
    time_budget_seconds = 3600,
    algorithms = ['logistic_regression', 'random_forest', 'gradient_boosting', 'neural_network'],
    validation_strategy = 'stratified_kfold',
    n_folds = 5,
    feature_engineering = true,
    hyperparameter_strategy = 'bayesian'
);

Alternative: Test with all algorithms including KNN

CREATE EXPERIMENT test_with_knn AS
SELECT 
    age,
    tenure_months,
    monthly_charges,
    total_charges,
    num_services,
    contract_type,
    payment_method,
    has_phone_service,
    has_internet,
    churned as target
FROM customer_data
WITH (
    target_column = 'target',
    task_type = 'binary_classification',
    algorithm_selection = 'all',
    max_trials = 10
);

Step 3: Monitor Experiment Progress

SELECT * FROM automl_experiments 
WHERE name = 'churn_prediction_model';

View trials and their performance

SELECT 
    trial_id,
    model_type,
    hyperparameters,
    cv_score,
    training_time
FROM automl_trials 
WHERE experiment_id = (
    SELECT id FROM automl_experiments 
    WHERE name = 'churn_prediction_model'
)
ORDER BY cv_score DESC
LIMIT 10;

Check algorithms tried (for KNN test)

SELECT algorithms_tried, best_algorithm
FROM automl_experiments
WHERE name = 'test_with_knn';

Step 4: Deploy Model

-- Deploy the best model
DEPLOY MODEL churn_predictor FROM EXPERIMENT churn_prediction_model;

Step 5: Make Predictions

This is the query that you would use to make predictions.

PREDICT churn_probability USING churn_predictor AS
SELECT 
    customer_id,
    age,
    tenure_months,
    monthly_charges,
    total_charges,
    num_services,
    contract_type,
    payment_method,
    has_phone_service,
    has_internet
FROM new_customers;

Test batch predictions with validation

CREATE TABLE prediction_results AS
PREDICT churn_probability USING churn_predictor AS
SELECT * FROM test_customers;

Analyze prediction distribution

SELECT 
    CASE 
        WHEN churn_probability < 0.3 THEN 'Low Risk'
        WHEN churn_probability < 0.7 THEN 'Medium Risk'
        ELSE 'High Risk'
    END as risk_category,
    COUNT(*) as customer_count,
    AVG(churn_probability) as avg_probability
FROM prediction_results
GROUP BY risk_category;

Expected Outcomes

  • Best AUC score: 0.85-0.92
  • Best performing models: Random Forest or Gradient Boosting
  • Training time: 10-30 minutes for 50 trials
  • Top features: Contract type, tenure, monthly charges

Tags

mlsqlclassificationcustomer-analyticschurn

Run this on your own machine

Install SynapCores Community Edition free, paste the SQL or Cypher above into the bundled web UI, and watch it run.

Download Free CE