Payment Fraud Detection
Objective
Score incoming transactions for fraud risk in near real time. Block high-risk cases.
Step 1: Create tables and load data
CREATE TABLE transactions (
transaction_id BIGINT PRIMARY KEY,
account_id BIGINT,
amount DOUBLE,
merchant_type VARCHAR(50),
country VARCHAR(50),
timestamp TIMESTAMP,
label INT -- 1 fraud, 0 legit (null = unknown)
);
INSERT INTO transactions VALUES
(700001, 3001, 199.99, 'ecommerce', 'US', '2025-09-01 10:01:00', 0),
(700002, 3002, 2500.00, 'electronics', 'US', '2025-09-01 10:02:10', 1),
(700003, 3003, 15.00, 'food', 'FR', '2025-09-01 10:02:30', 0),
(700004, 3001, 999.00, 'ecommerce', 'US', '2025-09-01 10:03:05', 0),
(700005, 3004, 3800.50, 'luxury', 'RU', '2025-09-01 10:03:20', 1);
Step 2: Feature view for training
-- derive time features
CREATE VIEW v_tx_features AS
SELECT
transaction_id,
amount,
merchant_type,
country,
EXTRACT(HOUR FROM timestamp) AS hour_of_day,
label
FROM transactions;
Step 3: Create experiment
CREATE EXPERIMENT fraud_detection_rf AS
SELECT
amount,
merchant_type,
country,
hour_of_day,
label AS target
FROM v_tx_features
WHERE label IS NOT NULL
WITH (
task_type = 'classification',
target_column = 'target',
algorithms = ['random_forest'],
optimization_metric = 'f1_score',
validation_strategy = 'kfold',
n_folds = 5,
class_weight = 'balanced',
algorithm_params = {'n_estimators': 600, 'max_depth': 14, 'min_samples_leaf': 2}
);
Step 4: Deploy best model
DEPLOY MODEL fraud_model FROM EXPERIMENT fraud_detection_rf;
Step 5: Real-time scoring window
PREDICT fraud_probability USING fraud_model AS
SELECT
transaction_id,
amount,
merchant_type,
country,
EXTRACT(HOUR FROM timestamp) AS hour_of_day
FROM transactions
WHERE timestamp >= NOW() - INTERVAL '15 MINUTE';
Step 6: Action policy
-- high risk: block and require step-up verification
SELECT
transaction_id,
fraud_probability,
CASE
WHEN fraud_probability >= 0.90 THEN 'BLOCK'
WHEN fraud_probability >= 0.70 THEN 'CHALLENGE' -- OTP, doc check
ELSE 'ALLOW'
END AS action
FROM (
PREDICT fraud_probability USING fraud_model AS
SELECT transaction_id, amount, merchant_type, country, EXTRACT(HOUR FROM timestamp) AS hour_of_day
FROM transactions
WHERE timestamp >= NOW() - INTERVAL '15 MINUTE'
) s
ORDER BY fraud_probability DESC;
Optional: Post-hoc calibration and cost control
-- monitor precision/recall and false positive cost
WITH scored AS (
SELECT
t.transaction_id,
t.label,
p.fraud_probability
FROM transactions t
JOIN (
PREDICT fraud_probability USING fraud_model AS
SELECT transaction_id, amount, merchant_type, country, EXTRACT(HOUR FROM timestamp) AS hour_of_day
FROM transactions
WHERE label IS NOT NULL
) p USING(transaction_id)
)
SELECT
AVG(CASE WHEN fraud_probability>=0.7 AND label=1 THEN 1 ELSE 0 END) AS recall_at_70,
AVG(CASE WHEN fraud_probability>=0.7 AND label=0 THEN 1 ELSE 0 END) AS fp_rate_at_70
FROM scored;
Expected outcomes
- High recall on fraud with controlled false positives.
- Clear actions: block, challenge, allow.
- Live dashboard for risk ops.