Multimodal Content Search
Objective
Build a unified search system that enables searching across different content types (images, videos, audio, documents) using a common semantic interface and vector embeddings.
Step 1: Create Unified Content Table
Store all content types together.
CREATE TABLE unified_content (
id INTEGER PRIMARY KEY,
content_code VARCHAR(50) NOT NULL UNIQUE,
content_type VARCHAR(50) NOT NULL,
title VARCHAR(300) NOT NULL,
description TEXT,
source_table VARCHAR(100),
source_id INTEGER,
category VARCHAR(100),
tags TEXT,
file_size_mb DECIMAL(10, 2),
duration_seconds INTEGER,
page_count INTEGER,
width INTEGER,
height INTEGER,
created_by VARCHAR(100),
content_embedding VECTOR(384),
indexed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Step 2: Create Content Relationships Table
Track content connections.
CREATE TABLE content_relationships (
id INTEGER PRIMARY KEY,
source_content_id INTEGER NOT NULL,
related_content_id INTEGER NOT NULL,
relationship_type VARCHAR(50),
similarity_score DECIMAL(5, 4),
is_manual BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (source_content_id) REFERENCES unified_content(id),
FOREIGN KEY (related_content_id) REFERENCES unified_content(id)
);
Step 3: Create Search Sessions Table
Track user search sessions.
CREATE TABLE search_sessions (
id INTEGER PRIMARY KEY,
session_id VARCHAR(100) NOT NULL UNIQUE,
user_id VARCHAR(50),
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
ended_at TIMESTAMP,
query_count INTEGER DEFAULT 0,
click_count INTEGER DEFAULT 0
);
Step 4: Create Search Queries Table
Log search requests.
CREATE TABLE search_queries (
id INTEGER PRIMARY KEY,
session_id INTEGER NOT NULL,
query_text TEXT,
query_embedding VECTOR(384),
content_type_filter VARCHAR(50),
category_filter VARCHAR(100),
results_count INTEGER,
search_time_ms INTEGER,
queried_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (session_id) REFERENCES search_sessions(id)
);
Step 5: Create Search Results Table
Track result interactions.
CREATE TABLE search_results (
id INTEGER PRIMARY KEY,
query_id INTEGER NOT NULL,
content_id INTEGER NOT NULL,
result_position INTEGER,
relevance_score DECIMAL(5, 4),
was_clicked BOOLEAN DEFAULT FALSE,
clicked_at TIMESTAMP,
time_on_content_seconds INTEGER,
FOREIGN KEY (query_id) REFERENCES search_queries(id),
FOREIGN KEY (content_id) REFERENCES unified_content(id)
);
Step 6: Insert Mixed Content
Add various content types.
-- Images
INSERT INTO unified_content (id, content_code, content_type, title, description, source_table, category, tags, file_size_mb, width, height, created_by) VALUES
(1, 'IMG-001', 'image', 'Product Hero Shot', 'High-resolution product photography showing the new Model X device from multiple angles', 'product_images', 'product', 'product,photography,hero,model-x', 15.2, 4000, 3000, 'design_team'),
(2, 'IMG-002', 'image', 'Team Photo 2024', 'Annual company team photo taken at headquarters', 'company_images', 'corporate', 'team,company,people,office', 8.5, 5000, 3333, 'hr_team'),
(3, 'IMG-003', 'image', 'Infographic - Sales Process', 'Visual guide to the sales process workflow', 'marketing_assets', 'marketing', 'infographic,sales,process,workflow', 2.1, 2000, 4000, 'marketing_team');
-- Videos
INSERT INTO unified_content (id, content_code, content_type, title, description, source_table, category, tags, file_size_mb, duration_seconds, created_by) VALUES
(4, 'VID-001', 'video', 'Product Launch Keynote', 'Annual product launch event featuring new announcements and demonstrations', 'video_library', 'corporate', 'keynote,launch,product,event', 2850.5, 5400, 'events_team'),
(5, 'VID-002', 'video', 'Customer Testimonial - TechCorp', 'Customer success story interview with TechCorp executives', 'video_library', 'marketing', 'testimonial,customer,success,interview', 580.2, 1200, 'marketing_team'),
(6, 'VID-003', 'video', 'Platform Tutorial Episode 1', 'Getting started tutorial covering basic features', 'video_library', 'education', 'tutorial,training,getting-started,beginner', 890.4, 1800, 'training_team');
-- Audio
INSERT INTO unified_content (id, content_code, content_type, title, description, source_table, category, tags, file_size_mb, duration_seconds, created_by) VALUES
(7, 'AUD-001', 'audio', 'Podcast Episode 45 - AI Trends', 'Discussion on emerging AI trends with industry experts', 'podcast_episodes', 'marketing', 'podcast,ai,trends,discussion,expert', 52.4, 4200, 'content_team'),
(8, 'AUD-002', 'audio', 'Q1 Earnings Call Recording', 'Quarterly earnings call with investors', 'earnings_calls', 'corporate', 'earnings,financial,investor,quarterly', 45.8, 3600, 'ir_team'),
(9, 'AUD-003', 'audio', 'Sales Training - Objection Handling', 'Audio training on handling common sales objections', 'training_audio', 'education', 'training,sales,objections,techniques', 28.5, 2400, 'sales_enablement');
-- Documents
INSERT INTO unified_content (id, content_code, content_type, title, description, source_table, category, tags, file_size_mb, page_count, created_by) VALUES
(10, 'DOC-001', 'document', 'Product Requirements Document', 'Detailed PRD for Product X version 3.0 release', 'documents', 'product', 'prd,requirements,product,specifications', 2.8, 35, 'product_team'),
(11, 'DOC-002', 'document', 'Employee Handbook 2024', 'Comprehensive guide to company policies and benefits', 'documents', 'hr', 'handbook,policies,hr,employee,benefits', 5.2, 85, 'hr_team'),
(12, 'DOC-003', 'document', 'Technical Architecture Guide', 'System architecture documentation for developers', 'documents', 'technical', 'architecture,technical,documentation,developer', 8.5, 120, 'engineering_team');
Step 7: Generate Embeddings
Create unified embeddings.
UPDATE unified_content
SET content_embedding = EMBED(title || ' ' || description),
indexed_at = CURRENT_TIMESTAMP
WHERE content_embedding IS NULL;
-- Verify indexing
SELECT
content_type,
COUNT(*) as total,
COUNT(content_embedding) as indexed
FROM unified_content
GROUP BY content_type;
Step 8: Unified Semantic Search
Search across all content.
SELECT
uc.content_code,
uc.content_type,
uc.title,
uc.description,
uc.category,
COSINE_SIMILARITY(uc.content_embedding, EMBED('product launch and new features announcement')) as relevance
FROM unified_content uc
WHERE uc.content_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 10;
Step 9: Filter by Content Type
Search specific media.
SELECT
uc.content_code,
uc.title,
uc.duration_seconds / 60 as duration_minutes,
COSINE_SIMILARITY(uc.content_embedding, EMBED('customer success story and testimonial')) as relevance
FROM unified_content uc
WHERE uc.content_type = 'video'
AND uc.content_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 5;
Step 10: Cross-Modal Search
Find related content across types.
-- Find content related to Product Launch Keynote (VID-001)
SELECT
target.content_code,
target.content_type,
target.title,
COSINE_SIMILARITY(source.content_embedding, target.content_embedding) as similarity
FROM unified_content source
CROSS JOIN unified_content target
WHERE source.content_code = 'VID-001'
AND target.content_code != 'VID-001'
AND source.content_embedding IS NOT NULL
AND target.content_embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT 5;
Step 11: Create Search Session
Track user searches.
INSERT INTO search_sessions (id, session_id, user_id, started_at)
VALUES (1, 'SESS-2024-001', 'USR-001', CURRENT_TIMESTAMP);
INSERT INTO search_queries (id, session_id, query_text, content_type_filter, results_count, search_time_ms)
VALUES (1, 1, 'product launch and new features', NULL, 10, 125);
INSERT INTO search_results (query_id, content_id, result_position, relevance_score, was_clicked, clicked_at, time_on_content_seconds) VALUES
(1, 4, 1, 0.92, TRUE, CURRENT_TIMESTAMP, 180),
(1, 1, 2, 0.85, TRUE, CURRENT_TIMESTAMP, 45),
(1, 10, 3, 0.78, FALSE, NULL, NULL);
Step 12: Build Content Relationships
Auto-discover related content.
INSERT INTO content_relationships (source_content_id, related_content_id, relationship_type, similarity_score)
SELECT
source.id,
target.id,
CASE
WHEN source.content_type = target.content_type THEN 'same_type_similar'
ELSE 'cross_type_similar'
END,
COSINE_SIMILARITY(source.content_embedding, target.content_embedding)
FROM unified_content source
CROSS JOIN unified_content target
WHERE source.id < target.id
AND source.content_embedding IS NOT NULL
AND target.content_embedding IS NOT NULL
AND COSINE_SIMILARITY(source.content_embedding, target.content_embedding) > 0.65;
Step 13: Content Type Distribution
Analyze search by type.
SELECT
uc.content_type,
COUNT(sr.id) as times_in_results,
COUNT(CASE WHEN sr.was_clicked THEN 1 END) as clicks,
AVG(sr.result_position) as avg_position,
AVG(sr.relevance_score) as avg_relevance
FROM unified_content uc
LEFT JOIN search_results sr ON uc.id = sr.content_id
GROUP BY uc.content_type
ORDER BY times_in_results DESC;
Step 14: Category Search
Search within categories.
SELECT
uc.content_code,
uc.content_type,
uc.title,
uc.category,
COSINE_SIMILARITY(uc.content_embedding, EMBED('training and learning materials')) as relevance
FROM unified_content uc
WHERE uc.category = 'education'
AND uc.content_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 5;
Step 15: Search Analytics
Measure search effectiveness.
SELECT
sq.query_text,
sq.content_type_filter,
sq.results_count,
sq.search_time_ms,
COUNT(sr.id) as results_shown,
COUNT(CASE WHEN sr.was_clicked THEN 1 END) as clicks,
AVG(CASE WHEN sr.was_clicked THEN sr.result_position END) as avg_click_position,
AVG(sr.time_on_content_seconds) as avg_engagement_seconds
FROM search_queries sq
LEFT JOIN search_results sr ON sq.id = sr.query_id
GROUP BY sq.id, sq.query_text, sq.content_type_filter, sq.results_count, sq.search_time_ms
ORDER BY sq.queried_at DESC;
Cleanup (Optional)
DROP TABLE IF EXISTS search_results;
DROP TABLE IF EXISTS search_queries;
DROP TABLE IF EXISTS search_sessions;
DROP TABLE IF EXISTS content_relationships;
DROP TABLE IF EXISTS unified_content;
Expected Outcomes
- All content types unified
- Cross-modal search works
- Related content discovered
- Search sessions tracked
- Analytics available
Content Type Reference
| Type | Attributes Tracked |
|---|---|
| image | width, height, file_size |
| video | duration, file_size |
| audio | duration, file_size |
| document | page_count, file_size |
Key Concepts Learned
- Unified content indexing
- Cross-modal search
- Content relationships
- Search session tracking
- Engagement analytics