Vector Similarity Image Search

Implement semantic image search using vector embeddings and similarity functions

All recipes· advanced-patterns· 15 minutesadvanced

Vector Similarity Image Search

Objective

Build a semantic image search system using vector embeddings to find visually and conceptually similar images, enabling content-based image retrieval beyond simple keyword matching.

Step 1: Create Image Catalog Table

Store images with embeddings.

CREATE TABLE image_catalog (
    id INTEGER PRIMARY KEY,
    image_code VARCHAR(50) NOT NULL UNIQUE,
    image_name VARCHAR(200),
    description TEXT,
    image_file IMAGE(JPEG),
    thumbnail IMAGE(JPEG),
    category VARCHAR(100),
    tags TEXT,
    file_size_mb DECIMAL(10, 2),
    width INTEGER,
    height INTEGER,
    format VARCHAR(20),
    image_embedding VECTOR(512),
    description_embedding VECTOR(384),
    indexed_at TIMESTAMP,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

Step 2: Create Image Collections Table

Organize images into sets.

CREATE TABLE image_collections (
    id INTEGER PRIMARY KEY,
    collection_code VARCHAR(50) NOT NULL UNIQUE,
    collection_name VARCHAR(200),
    description TEXT,
    owner_id VARCHAR(50),
    is_public BOOLEAN DEFAULT FALSE,
    image_count INTEGER DEFAULT 0,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE collection_images (
    id INTEGER PRIMARY KEY,
    collection_id INTEGER NOT NULL,
    image_id INTEGER NOT NULL,
    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (collection_id) REFERENCES image_collections(id),
    FOREIGN KEY (image_id) REFERENCES image_catalog(id)
);

Step 3: Create Search History Table

Track search patterns.

CREATE TABLE image_search_history (
    id INTEGER PRIMARY KEY,
    user_id VARCHAR(50),
    search_type VARCHAR(50),
    query_text TEXT,
    query_image_id INTEGER,
    results_count INTEGER,
    top_result_id INTEGER,
    search_time_ms INTEGER,
    searched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (query_image_id) REFERENCES image_catalog(id),
    FOREIGN KEY (top_result_id) REFERENCES image_catalog(id)
);

Step 4: Create Similar Images Cache Table

Cache similarity results.

CREATE TABLE similar_images_cache (
    id INTEGER PRIMARY KEY,
    source_image_id INTEGER NOT NULL,
    similar_image_id INTEGER NOT NULL,
    similarity_score DECIMAL(5, 4),
    rank_position INTEGER,
    cache_method VARCHAR(50),
    cached_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    expires_at TIMESTAMP,
    FOREIGN KEY (source_image_id) REFERENCES image_catalog(id),
    FOREIGN KEY (similar_image_id) REFERENCES image_catalog(id)
);

Step 5: Insert Sample Images

Add image catalog.

INSERT INTO image_catalog (id, image_code, image_name, description, category, tags, file_size_mb, width, height, format, created_at) VALUES
    (1, 'IMG-001', 'Mountain Sunset', 'Beautiful sunset over mountain range with orange and purple sky', 'nature', 'sunset,mountains,landscape,sky,orange', 4.5, 4000, 3000, 'JPEG', '2024-01-01 10:00:00'),
    (2, 'IMG-002', 'Beach Sunrise', 'Calm beach scene with sunrise and gentle waves', 'nature', 'beach,sunrise,ocean,waves,calm', 3.8, 4000, 2667, 'JPEG', '2024-01-02 10:00:00'),
    (3, 'IMG-003', 'City Skyline Night', 'Urban cityscape at night with illuminated buildings', 'urban', 'city,night,skyline,buildings,lights', 5.2, 5000, 3333, 'JPEG', '2024-01-03 10:00:00'),
    (4, 'IMG-004', 'Forest Path', 'Winding path through autumn forest with fallen leaves', 'nature', 'forest,path,autumn,leaves,trees', 4.1, 4000, 3000, 'JPEG', '2024-01-04 10:00:00'),
    (5, 'IMG-005', 'Desert Dunes', 'Sand dunes in golden hour light', 'nature', 'desert,dunes,sand,golden,landscape', 3.5, 4000, 2667, 'JPEG', '2024-01-05 10:00:00'),
    (6, 'IMG-006', 'Snow Mountains', 'Snow-capped mountain peaks under blue sky', 'nature', 'mountains,snow,peaks,winter,blue', 4.8, 4500, 3000, 'JPEG', '2024-01-06 10:00:00'),
    (7, 'IMG-007', 'Tropical Beach', 'Palm trees on white sand tropical beach', 'nature', 'beach,tropical,palm,sand,paradise', 3.9, 4000, 3000, 'JPEG', '2024-01-07 10:00:00'),
    (8, 'IMG-008', 'Urban Street', 'Busy city street with people and shops', 'urban', 'city,street,people,shops,busy', 4.3, 4000, 2667, 'JPEG', '2024-01-08 10:00:00'),
    (9, 'IMG-009', 'Waterfall', 'Majestic waterfall in tropical rainforest', 'nature', 'waterfall,rainforest,tropical,water,green', 4.6, 3000, 4000, 'JPEG', '2024-01-09 10:00:00'),
    (10, 'IMG-010', 'Night Sky Stars', 'Milky Way visible in clear night sky', 'nature', 'stars,night,milkyway,sky,astronomy', 5.5, 5000, 3333, 'JPEG', '2024-01-10 10:00:00');

Step 6: Generate Embeddings

Create vector representations.

-- Update images with description embeddings
UPDATE image_catalog
SET description_embedding = EMBED(description)
WHERE description_embedding IS NULL;

-- Verify embedding generation
SELECT
    image_code,
    image_name,
    CASE
        WHEN description_embedding IS NOT NULL THEN 'Generated'
        ELSE 'Pending'
    END as embedding_status
FROM image_catalog
ORDER BY id;

Step 7: Insert Collections

Create image sets.

INSERT INTO image_collections (id, collection_code, collection_name, description, owner_id, is_public, image_count) VALUES
    (1, 'COL-NATURE', 'Nature Photography', 'Collection of nature and landscape images', 'USR-001', TRUE, 7),
    (2, 'COL-URBAN', 'Urban Scenes', 'City and urban photography', 'USR-001', TRUE, 2),
    (3, 'COL-SUNSET', 'Sunset Collection', 'Images featuring sunsets and golden hour', 'USR-002', TRUE, 3);

INSERT INTO collection_images (id, collection_id, image_id) VALUES
    (1, 1, 1), (2, 1, 2), (3, 1, 4), (4, 1, 5), (5, 1, 6), (6, 1, 7), (7, 1, 9),
    (8, 2, 3), (9, 2, 8),
    (10, 3, 1), (11, 3, 2), (12, 3, 5);

Step 8: Text-Based Semantic Search

Find images by description.

SELECT
    ic.image_code,
    ic.image_name,
    ic.description,
    ic.category,
    COSINE_SIMILARITY(ic.description_embedding, EMBED('mountain landscape with dramatic sky')) as similarity
FROM image_catalog ic
WHERE ic.description_embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT 5;

Step 9: Find Similar Images

Search by image similarity.

-- Find images similar to Mountain Sunset (IMG-001)
SELECT
    target.image_code,
    target.image_name,
    target.description,
    target.category,
    COSINE_SIMILARITY(source.description_embedding, target.description_embedding) as similarity
FROM image_catalog source
CROSS JOIN image_catalog target
WHERE source.image_code = 'IMG-001'
  AND target.image_code != 'IMG-001'
  AND source.description_embedding IS NOT NULL
  AND target.description_embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT 5;

Step 10: Category-Filtered Search

Search within categories.

SELECT
    ic.image_code,
    ic.image_name,
    ic.description,
    COSINE_SIMILARITY(ic.description_embedding, EMBED('peaceful water scene')) as similarity
FROM image_catalog ic
WHERE ic.category = 'nature'
  AND ic.description_embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT 5;

Step 11: Cache Similar Images

Pre-compute similarities.

-- Calculate and cache similarities for frequently accessed images
INSERT INTO similar_images_cache (source_image_id, similar_image_id, similarity_score, rank_position, cache_method, expires_at)
SELECT
    source.id as source_image_id,
    target.id as similar_image_id,
    COSINE_SIMILARITY(source.description_embedding, target.description_embedding) as similarity_score,
    ROW_NUMBER() OVER (PARTITION BY source.id ORDER BY COSINE_SIMILARITY(source.description_embedding, target.description_embedding) DESC) as rank_position,
    'batch_compute' as cache_method,
    CURRENT_TIMESTAMP + INTERVAL '7 days' as expires_at
FROM image_catalog source
CROSS JOIN image_catalog target
WHERE source.id != target.id
  AND source.description_embedding IS NOT NULL
  AND target.description_embedding IS NOT NULL
  AND source.id = 1;

Step 12: Retrieve Cached Similarities

Use pre-computed results.

SELECT
    ic.image_code,
    ic.image_name,
    ic.description,
    sic.similarity_score,
    sic.rank_position
FROM similar_images_cache sic
INNER JOIN image_catalog ic ON sic.similar_image_id = ic.id
WHERE sic.source_image_id = 1
  AND sic.expires_at > CURRENT_TIMESTAMP
ORDER BY sic.rank_position
LIMIT 5;

Step 13: Log Search Queries

Track search history.

INSERT INTO image_search_history (user_id, search_type, query_text, results_count, top_result_id, search_time_ms)
VALUES ('USR-001', 'text', 'mountain landscape with dramatic sky', 5, 1, 125);

INSERT INTO image_search_history (user_id, search_type, query_image_id, results_count, top_result_id, search_time_ms)
VALUES ('USR-001', 'image', 1, 5, 6, 85);

Step 14: Search Analytics

Analyze search patterns.

SELECT
    search_type,
    COUNT(*) as search_count,
    AVG(results_count) as avg_results,
    AVG(search_time_ms) as avg_time_ms
FROM image_search_history
WHERE searched_at >= CURRENT_DATE - 30
GROUP BY search_type
ORDER BY search_count DESC;

Step 15: Collection Similarity Analysis

Find similar collections.

SELECT
    ic1.collection_name as collection,
    ic.image_name,
    COSINE_SIMILARITY(ic.description_embedding, EMBED('tropical paradise vacation')) as relevance
FROM collection_images ci
INNER JOIN image_collections ic1 ON ci.collection_id = ic1.id
INNER JOIN image_catalog ic ON ci.image_id = ic.id
WHERE ic.description_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 10;

Cleanup (Optional)

DROP TABLE IF EXISTS similar_images_cache;
DROP TABLE IF EXISTS image_search_history;
DROP TABLE IF EXISTS collection_images;
DROP TABLE IF EXISTS image_collections;
DROP TABLE IF EXISTS image_catalog;

Expected Outcomes

  • Images indexed with embeddings
  • Semantic search enabled
  • Similar images found
  • Results cached
  • Search history tracked

Similarity Score Interpretation

Score Range Interpretation
0.90-1.00 Nearly identical
0.75-0.89 Very similar
0.60-0.74 Related
0.40-0.59 Loosely related
< 0.40 Different

Key Concepts Learned

  • Vector embeddings
  • EMBED() function
  • COSINE_SIMILARITY()
  • Semantic search
  • Similarity caching

Tags

sqladvancedimagevectorsimilarityaiembeddings

Run this on your own machine

Install SynapCores Community Edition free, paste the SQL or Cypher above into the bundled web UI, and watch it run.

Download Free CE