Content Recommendation Engine
Objective
Create a content recommendation system that combines vector similarity with user behavior data to provide personalized content suggestions across different media types.
Step 1: Create Content Items Table
Store recommendable content.
CREATE TABLE content_items (
id INTEGER PRIMARY KEY,
item_code VARCHAR(50) NOT NULL UNIQUE,
title VARCHAR(300) NOT NULL,
description TEXT,
content_type VARCHAR(50),
category VARCHAR(100),
subcategory VARCHAR(100),
tags TEXT,
difficulty_level VARCHAR(20),
duration_minutes INTEGER,
author VARCHAR(200),
popularity_score DECIMAL(5, 2) DEFAULT 0,
quality_score DECIMAL(3, 2),
content_embedding VECTOR(384),
is_featured BOOLEAN DEFAULT FALSE,
status VARCHAR(50) DEFAULT 'active',
published_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Step 2: Create Users Table
Store user profiles.
CREATE TABLE rec_users (
id INTEGER PRIMARY KEY,
user_id VARCHAR(50) NOT NULL UNIQUE,
email VARCHAR(200),
user_name VARCHAR(200),
interests TEXT,
skill_level VARCHAR(20),
preferred_content_types TEXT,
preferred_categories TEXT,
user_embedding VECTOR(384),
last_active TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Step 3: Create User Interactions Table
Track engagement.
CREATE TABLE user_interactions (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL,
item_id INTEGER NOT NULL,
interaction_type VARCHAR(50),
rating DECIMAL(2, 1),
time_spent_seconds INTEGER,
completion_percent DECIMAL(5, 2),
interaction_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES rec_users(id),
FOREIGN KEY (item_id) REFERENCES content_items(id)
);
Step 4: Create User Preferences Table
Store explicit preferences.
CREATE TABLE user_preferences (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL,
preference_type VARCHAR(50),
preference_value VARCHAR(200),
weight DECIMAL(3, 2) DEFAULT 1.0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES rec_users(id)
);
Step 5: Create Recommendations Table
Store generated recommendations.
CREATE TABLE recommendations (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL,
item_id INTEGER NOT NULL,
recommendation_type VARCHAR(50),
score DECIMAL(5, 4),
reason TEXT,
is_shown BOOLEAN DEFAULT FALSE,
shown_at TIMESTAMP,
is_clicked BOOLEAN DEFAULT FALSE,
clicked_at TIMESTAMP,
generated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES rec_users(id),
FOREIGN KEY (item_id) REFERENCES content_items(id)
);
Step 6: Insert Sample Content
Add content catalog.
INSERT INTO content_items (id, item_code, title, description, content_type, category, subcategory, tags, difficulty_level, duration_minutes, author, quality_score, status, published_at) VALUES
(1, 'TUT-PY-001', 'Python Basics for Beginners', 'Learn Python fundamentals including variables, data types, and control flow', 'tutorial', 'programming', 'python', 'python,beginner,basics,programming', 'beginner', 45, 'Dr. Code', 4.8, 'active', '2024-01-01 00:00:00'),
(2, 'TUT-PY-002', 'Python Data Structures', 'Deep dive into lists, dictionaries, sets, and tuples', 'tutorial', 'programming', 'python', 'python,data-structures,intermediate', 'intermediate', 60, 'Dr. Code', 4.7, 'active', '2024-01-05 00:00:00'),
(3, 'TUT-JS-001', 'JavaScript Fundamentals', 'Modern JavaScript basics for web development', 'tutorial', 'programming', 'javascript', 'javascript,web,beginner', 'beginner', 50, 'Web Master', 4.6, 'active', '2024-01-03 00:00:00'),
(4, 'ART-ML-001', 'Introduction to Machine Learning', 'Overview of ML concepts and applications', 'article', 'data-science', 'machine-learning', 'ml,ai,data-science,overview', 'intermediate', 20, 'AI Expert', 4.9, 'active', '2024-01-08 00:00:00'),
(5, 'VID-ML-001', 'Neural Networks Explained', 'Visual explanation of neural network architectures', 'video', 'data-science', 'deep-learning', 'neural-networks,deep-learning,ai', 'advanced', 35, 'AI Expert', 4.8, 'active', '2024-01-10 00:00:00'),
(6, 'TUT-DB-001', 'SQL Database Design', 'Learn to design efficient database schemas', 'tutorial', 'databases', 'sql', 'sql,database,design,intermediate', 'intermediate', 55, 'Data Pro', 4.5, 'active', '2024-01-12 00:00:00'),
(7, 'VID-PY-001', 'Python for Data Analysis', 'Using pandas and numpy for data analysis', 'video', 'data-science', 'python', 'python,pandas,numpy,data-analysis', 'intermediate', 90, 'Data Pro', 4.7, 'active', '2024-01-15 00:00:00'),
(8, 'ART-WEB-001', 'Modern Web Architecture', 'Understanding microservices and cloud-native design', 'article', 'architecture', 'web', 'microservices,cloud,architecture', 'advanced', 25, 'Arch Expert', 4.6, 'active', '2024-01-18 00:00:00'),
(9, 'TUT-PY-003', 'Python Object-Oriented Programming', 'Master OOP concepts in Python', 'tutorial', 'programming', 'python', 'python,oop,classes,advanced', 'advanced', 75, 'Dr. Code', 4.7, 'active', '2024-01-20 00:00:00'),
(10, 'VID-JS-001', 'React Fundamentals', 'Building modern UIs with React', 'video', 'programming', 'javascript', 'react,javascript,frontend,ui', 'intermediate', 120, 'Web Master', 4.8, 'active', '2024-01-22 00:00:00');
UPDATE content_items
SET content_embedding = EMBED(title || ' ' || description || ' ' || tags)
WHERE content_embedding IS NULL;
Step 7: Insert Users and Interactions
Add user data.
INSERT INTO rec_users (id, user_id, email, user_name, interests, skill_level, preferred_content_types, preferred_categories) VALUES
(1, 'USR-001', 'alice@example.com', 'Alice Developer', 'python,data-science,machine-learning', 'intermediate', 'tutorial,video', 'programming,data-science'),
(2, 'USR-002', 'bob@example.com', 'Bob Coder', 'javascript,web-development,react', 'beginner', 'tutorial,article', 'programming,architecture'),
(3, 'USR-003', 'carol@example.com', 'Carol Data', 'machine-learning,deep-learning,python', 'advanced', 'video,article', 'data-science'),
(4, 'USR-004', 'dave@example.com', 'Dave Newbie', 'programming,basics', 'beginner', 'tutorial', 'programming');
UPDATE rec_users
SET user_embedding = EMBED(interests || ' ' || preferred_categories)
WHERE user_embedding IS NULL;
INSERT INTO user_interactions (id, user_id, item_id, interaction_type, rating, time_spent_seconds, completion_percent, interaction_time) VALUES
(1, 1, 1, 'complete', 5.0, 2700, 100.0, '2024-01-25 10:00:00'),
(2, 1, 2, 'complete', 4.5, 3600, 100.0, '2024-01-26 14:00:00'),
(3, 1, 4, 'read', 4.0, 1200, 100.0, '2024-01-27 09:00:00'),
(4, 1, 7, 'watch', NULL, 3000, 55.5, '2024-01-28 11:00:00'),
(5, 2, 3, 'complete', 4.5, 3000, 100.0, '2024-01-25 15:00:00'),
(6, 2, 10, 'watch', NULL, 4500, 62.5, '2024-01-26 16:00:00'),
(7, 3, 4, 'read', 5.0, 1200, 100.0, '2024-01-24 10:00:00'),
(8, 3, 5, 'complete', 5.0, 2100, 100.0, '2024-01-25 11:00:00'),
(9, 3, 9, 'complete', 4.0, 4500, 100.0, '2024-01-26 09:00:00'),
(10, 4, 1, 'complete', 5.0, 2700, 100.0, '2024-01-27 14:00:00');
Step 8: Insert Preferences
Add explicit preferences.
INSERT INTO user_preferences (id, user_id, preference_type, preference_value, weight) VALUES
(1, 1, 'category', 'data-science', 1.5),
(2, 1, 'content_type', 'tutorial', 1.2),
(3, 1, 'difficulty', 'intermediate', 1.0),
(4, 2, 'category', 'programming', 1.3),
(5, 2, 'subcategory', 'javascript', 1.5),
(6, 3, 'category', 'data-science', 1.8),
(7, 3, 'difficulty', 'advanced', 1.2),
(8, 4, 'difficulty', 'beginner', 1.5);
Step 9: Content-Based Recommendations
Find similar content.
-- Recommend content similar to what user has liked
SELECT
ci.item_code,
ci.title,
ci.content_type,
ci.category,
ci.difficulty_level,
COSINE_SIMILARITY(ci.content_embedding, liked.avg_embedding) as similarity
FROM content_items ci
CROSS JOIN (
SELECT AVG(ci2.content_embedding) as avg_embedding
FROM user_interactions ui
INNER JOIN content_items ci2 ON ui.item_id = ci2.id
WHERE ui.user_id = 1
AND ui.rating >= 4.0
) liked
WHERE ci.id NOT IN (
SELECT item_id FROM user_interactions WHERE user_id = 1
)
AND ci.content_embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT 5;
Step 10: User-Profile Recommendations
Match user interests.
SELECT
ci.item_code,
ci.title,
ci.content_type,
ci.category,
COSINE_SIMILARITY(ci.content_embedding, ru.user_embedding) as relevance
FROM content_items ci
CROSS JOIN rec_users ru
WHERE ru.id = 1
AND ci.id NOT IN (
SELECT item_id FROM user_interactions WHERE user_id = 1
)
AND ci.content_embedding IS NOT NULL
AND ru.user_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 5;
Step 11: Popular in Category
Recommend trending content.
SELECT
ci.item_code,
ci.title,
ci.content_type,
ci.quality_score,
COUNT(ui.id) as interaction_count,
AVG(ui.rating) as avg_rating
FROM content_items ci
LEFT JOIN user_interactions ui ON ci.id = ui.item_id
WHERE ci.category = 'programming'
AND ci.status = 'active'
AND ci.id NOT IN (
SELECT item_id FROM user_interactions WHERE user_id = 4
)
GROUP BY ci.id, ci.item_code, ci.title, ci.content_type, ci.quality_score
ORDER BY avg_rating DESC NULLS LAST, interaction_count DESC
LIMIT 5;
Step 12: Generate Recommendations
Store computed recommendations.
INSERT INTO recommendations (user_id, item_id, recommendation_type, score, reason, expires_at)
SELECT
1 as user_id,
ci.id as item_id,
'content_based' as recommendation_type,
COSINE_SIMILARITY(ci.content_embedding, ru.user_embedding) as score,
'Based on your interests in ' || ru.interests as reason,
CURRENT_TIMESTAMP + INTERVAL '7 days' as expires_at
FROM content_items ci
CROSS JOIN rec_users ru
WHERE ru.id = 1
AND ci.id NOT IN (
SELECT item_id FROM user_interactions WHERE user_id = 1
)
AND ci.content_embedding IS NOT NULL
AND ru.user_embedding IS NOT NULL
ORDER BY score DESC
LIMIT 10;
Step 13: Get User Recommendations
Retrieve personalized suggestions.
SELECT
ci.item_code,
ci.title,
ci.content_type,
ci.category,
ci.difficulty_level,
ci.duration_minutes,
r.recommendation_type,
r.score,
r.reason
FROM recommendations r
INNER JOIN content_items ci ON r.item_id = ci.id
WHERE r.user_id = 1
AND r.expires_at > CURRENT_TIMESTAMP
AND r.is_clicked = FALSE
ORDER BY r.score DESC
LIMIT 5;
Step 14: Next in Sequence
Recommend logical next steps.
-- For user who completed Python Basics, recommend Python Data Structures
SELECT
ci.item_code,
ci.title,
ci.difficulty_level,
'next_in_series' as recommendation_type
FROM content_items ci
WHERE ci.subcategory = 'python'
AND ci.difficulty_level IN ('beginner', 'intermediate')
AND ci.id NOT IN (
SELECT item_id FROM user_interactions WHERE user_id = 4
)
ORDER BY
CASE ci.difficulty_level
WHEN 'beginner' THEN 1
WHEN 'intermediate' THEN 2
WHEN 'advanced' THEN 3
END
LIMIT 3;
Step 15: Recommendation Performance
Analyze effectiveness.
SELECT
r.recommendation_type,
COUNT(*) as total_recommendations,
COUNT(CASE WHEN r.is_shown THEN 1 END) as shown,
COUNT(CASE WHEN r.is_clicked THEN 1 END) as clicked,
CASE
WHEN COUNT(CASE WHEN r.is_shown THEN 1 END) > 0
THEN COUNT(CASE WHEN r.is_clicked THEN 1 END) * 100.0 / COUNT(CASE WHEN r.is_shown THEN 1 END)
ELSE 0
END as click_through_rate,
AVG(r.score) as avg_score
FROM recommendations r
WHERE r.generated_at >= CURRENT_DATE - 30
GROUP BY r.recommendation_type
ORDER BY click_through_rate DESC;
Cleanup (Optional)
DROP TABLE IF EXISTS recommendations;
DROP TABLE IF EXISTS user_preferences;
DROP TABLE IF EXISTS user_interactions;
DROP TABLE IF EXISTS rec_users;
DROP TABLE IF EXISTS content_items;
Expected Outcomes
- Content indexed with embeddings
- User profiles created
- Interactions tracked
- Recommendations generated
- Performance measured
Recommendation Types
| Type | Method |
|---|---|
| content_based | Similar to liked items |
| user_profile | Match user interests |
| popular | Trending in category |
| sequential | Next logical step |
| collaborative | Similar user preferences |
Key Concepts Learned
- Content-based filtering
- User embedding creation
- Interaction tracking
- Recommendation scoring
- A/B testing metrics