Real Estate AI: Property Matching & Market Analysis

Build an intelligent real estate platform that matches buyers with properties, predicts market trends, analyzes property values, and generates insi...

All recipes· ml· 10 minutesadvancedsql

Real Estate AI: Property Matching & Market Analysis

Problem

Build an intelligent real estate platform that matches buyers with properties, predicts market trends, analyzes property values, and generates insights for better investment decisions using AI.

What You'll Learn

  • Match buyers with properties using semantic search
  • Predict property values and market trends
  • Analyze neighborhood characteristics and amenities
  • Generate automated property descriptions and market reports

Setup: Create Real Estate Database

CREATE TABLE properties (
    id INTEGER PRIMARY KEY,
    address TEXT NOT NULL,
    city TEXT,
    state TEXT,
    zip_code TEXT,
    property_type TEXT,
    bedrooms INTEGER,
    bathrooms REAL,
    square_feet INTEGER,
    lot_size REAL,
    year_built INTEGER,
    price DECIMAL(12,2),
    listing_date DATE,
    status TEXT,
    description TEXT,
    amenities TEXT,
    property_embedding VECTOR(384)
);

CREATE TABLE buyers (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT,
    phone TEXT,
    budget_min DECIMAL(12,2),
    budget_max DECIMAL(12,2),
    preferred_cities TEXT,
    property_preferences TEXT,
    lifestyle_preferences TEXT,
    buyer_profile VECTOR(384)
);

CREATE TABLE property_views (
    id INTEGER PRIMARY KEY,
    buyer_id INTEGER REFERENCES buyers(id),
    property_id INTEGER REFERENCES properties(id),
    view_date TIMESTAMP,
    view_type TEXT,
    interest_level INTEGER CHECK (interest_level >= 1 AND interest_level <= 5),
    feedback TEXT
);

CREATE TABLE market_data (
    id INTEGER PRIMARY KEY,
    city TEXT,
    state TEXT,
    month TEXT,
    median_price DECIMAL(12,2),
    avg_days_market INTEGER,
    inventory_count INTEGER,
    price_per_sqft DECIMAL(8,2),
    market_trend TEXT
);

CREATE TABLE neighborhoods (
    id INTEGER PRIMARY KEY,
    name TEXT,
    city TEXT,
    state TEXT,
    safety_score REAL,
    school_rating REAL,
    walkability_score REAL,
    transit_score REAL,
    amenities_nearby TEXT,
    demographics TEXT,
    neighborhood_embedding VECTOR(384)
);

Seed Data: Property Listings

INSERT INTO properties (id, address, city, state, zip_code, property_type, bedrooms, bathrooms, square_feet, lot_size, year_built, price, listing_date, status, description, amenities) VALUES
(1, '123 Oak Street', 'San Francisco', 'CA', '94102', 'Condo', 2, 2.0, 1200, 0, 2018, 850000.00, '2025-01-10', 'ACTIVE', 'Modern downtown condo with city views, updated kitchen, hardwood floors', 'Gym, Pool, Rooftop Deck, Parking'),
(2, '456 Pine Avenue', 'Los Angeles', 'CA', '90210', 'Single Family', 4, 3.0, 2500, 0.25, 1995, 1200000.00, '2025-01-12', 'ACTIVE', 'Beautiful family home in prestigious neighborhood, large backyard, updated bathrooms', 'Pool, Garage, Garden, Security System'),
(3, '789 Maple Drive', 'Austin', 'TX', '73301', 'Townhouse', 3, 2.5, 1800, 0.1, 2010, 450000.00, '2025-01-08', 'ACTIVE', 'Charming townhouse near downtown, open floor plan, granite countertops', 'Patio, Fireplace, Walk-in Closet'),
(4, '321 Elm Street', 'Seattle', 'WA', '98101', 'Condo', 1, 1.0, 800, 0, 2020, 520000.00, '2025-01-15', 'ACTIVE', 'Luxury studio in tech district, floor-to-ceiling windows, modern appliances', 'Concierge, Gym, Business Center'),
(5, '654 Cedar Lane', 'Denver', 'CO', '80202', 'Single Family', 5, 4.0, 3200, 0.3, 2005, 750000.00, '2025-01-14', 'ACTIVE', 'Spacious family home with mountain views, finished basement, large deck', 'Fireplace, Garage, Deck, Mountain Views'),
(6, '987 Birch Road', 'Miami', 'FL', '33101', 'Condo', 2, 2.0, 1400, 0, 2019, 650000.00, '2025-01-11', 'PENDING', 'Waterfront condo with ocean views, marble floors, premium finishes', 'Pool, Beach Access, Valet Parking'),
(7, '147 Willow Way', 'Nashville', 'TN', '37201', 'Single Family', 3, 2.0, 1600, 0.2, 2000, 380000.00, '2025-01-13', 'ACTIVE', 'Cozy home in music district, hardwood floors, renovated kitchen', 'Garden, Garage, Historic District'),
(8, '258 Spruce Court', 'Portland', 'OR', '97201', 'Townhouse', 2, 1.5, 1300, 0.05, 2015, 480000.00, '2025-01-09', 'ACTIVE', 'Eco-friendly townhouse, solar panels, energy-efficient appliances', 'Solar Panels, Garden, Bike Storage');

INSERT INTO buyers (id, name, email, phone, budget_min, budget_max, preferred_cities, property_preferences, lifestyle_preferences) VALUES
(1, 'Jennifer Adams', 'jennifer.a@email.com', '+1-555-2001', 400000, 600000, 'Austin, Nashville, Denver', '2-3 bedrooms, modern kitchen, garage', 'Family-friendly, good schools, walkable neighborhood'),
(2, 'Mark Thompson', 'mark.t@email.com', '+1-555-2002', 800000, 1200000, 'San Francisco, Seattle, Los Angeles', '2+ bedrooms, city views, luxury amenities', 'Urban lifestyle, tech hubs, public transportation'),
(3, 'Sarah Chen', 'sarah.c@email.com', '+1-555-2003', 300000, 500000, 'Portland, Austin, Nashville', '1-2 bedrooms, eco-friendly, outdoor space', 'Sustainable living, arts community, bike-friendly'),
(4, 'Robert Martinez', 'robert.m@email.com', '+1-555-2004', 600000, 900000, 'Miami, Los Angeles, Denver', '3+ bedrooms, pool, modern design', 'Luxury lifestyle, warm climate, entertainment options'),
(5, 'Lisa Park', 'lisa.p@email.com', '+1-555-2005', 450000, 700000, 'Seattle, Portland, San Francisco', '2-3 bedrooms, tech-friendly, views', 'Tech industry, coffee culture, outdoor activities');

INSERT INTO neighborhoods (id, name, city, state, safety_score, school_rating, walkability_score, transit_score, amenities_nearby, demographics) VALUES
(1, 'SOMA', 'San Francisco', 'CA', 7.5, 8.2, 9.1, 9.5, 'Restaurants, Tech companies, Museums, Shopping', 'Young professionals, Tech workers, Diverse'),
(2, 'Beverly Hills', 'Los Angeles', 'CA', 9.2, 9.5, 6.8, 6.2, 'Luxury shopping, Fine dining, Entertainment', 'Affluent families, Celebrities, Retirees'),
(3, 'South by Southwest', 'Austin', 'TX', 8.1, 7.8, 8.5, 7.2, 'Music venues, Restaurants, Bars, Parks', 'Young professionals, Musicians, Artists'),
(4, 'Belltown', 'Seattle', 'WA', 7.8, 8.0, 9.2, 8.8, 'Tech companies, Restaurants, Waterfront', 'Tech workers, Young professionals, Urban dwellers'),
(5, 'RiNo', 'Denver', 'CO', 8.0, 7.5, 7.8, 7.5, 'Art galleries, Breweries, Parks, Recreation', 'Artists, Young families, Professionals');

INSERT INTO market_data (id, city, state, month, median_price, avg_days_market, inventory_count, price_per_sqft, market_trend) VALUES
(1, 'San Francisco', 'CA', '2025-01', 950000, 25, 850, 1200, 'STABLE'),
(2, 'Los Angeles', 'CA', '2025-01', 750000, 30, 1200, 650, 'RISING'),
(3, 'Austin', 'TX', '2025-01', 420000, 20, 950, 280, 'RISING'),
(4, 'Seattle', 'WA', '2025-01', 680000, 18, 600, 520, 'STABLE'),
(5, 'Denver', 'CO', '2025-01', 580000, 22, 800, 320, 'RISING'),
(6, 'Miami', 'FL', '2025-01', 490000, 35, 1100, 450, 'STABLE'),
(7, 'Nashville', 'TN', '2025-01', 350000, 28, 750, 250, 'RISING'),
(8, 'Portland', 'OR', '2025-01', 480000, 32, 680, 380, 'DECLINING');

INSERT INTO property_views (id, buyer_id, property_id, view_date, view_type, interest_level, feedback) VALUES
(1, 1, 3, '2025-01-16 14:30:00', 'ONLINE', 4, 'Love the open floor plan and location near downtown'),
(2, 1, 7, '2025-01-17 10:15:00', 'IN_PERSON', 5, 'Perfect size for our family and great neighborhood'),
(3, 2, 1, '2025-01-16 16:45:00', 'ONLINE', 3, 'Nice views but concerned about HOA fees'),
(4, 2, 4, '2025-01-18 11:30:00', 'IN_PERSON', 4, 'Great location for work commute, modern amenities'),
(5, 3, 8, '2025-01-17 13:20:00', 'ONLINE', 5, 'Eco-friendly features are exactly what we want'),
(6, 4, 6, '2025-01-18 15:45:00', 'IN_PERSON', 4, 'Beautiful waterfront views, luxury finishes'),
(7, 5, 4, '2025-01-19 09:30:00', 'ONLINE', 3, 'Good location but a bit small for our needs');

Generate Embeddings

UPDATE properties 
SET property_embedding = EMBED(
    address || ' ' || city || ' ' || property_type || ' ' || bedrooms || ' bedrooms ' || 
    bathrooms || ' bathrooms ' || square_feet || ' sqft ' || description || ' ' || amenities
);

UPDATE buyers 
SET buyer_profile = EMBED(
    budget_min || ' to ' || budget_max || ' budget ' || preferred_cities || ' ' || 
    property_preferences || ' ' || lifestyle_preferences
);

UPDATE neighborhoods 
SET neighborhood_embedding = EMBED(
    name || ' ' || city || ' ' || amenities_nearby || ' ' || demographics
);

Smart Property Matching

SELECT 
    b.name as buyer_name,
    p.address,
    p.city,
    p.property_type,
    p.price,
    p.bedrooms,
    p.bathrooms,
    COSINE_SIMILARITY(b.buyer_profile, p.property_embedding) as match_score,
    CASE 
        WHEN p.price BETWEEN b.budget_min AND b.budget_max THEN 'Within Budget'
        WHEN p.price < b.budget_min THEN 'Below Budget'
        ELSE 'Over Budget'
    END as budget_fit
FROM buyers b
CROSS JOIN properties p
WHERE p.status = 'ACTIVE'
    AND p.city IN (
        SELECT TRIM(value) FROM (
            SELECT SUBSTR(b.preferred_cities, 1, INSTR(b.preferred_cities || ',', ',') - 1) as value
            UNION ALL
            SELECT SUBSTR(b.preferred_cities, INSTR(b.preferred_cities, ',') + 1) as value
            WHERE INSTR(b.preferred_cities, ',') > 0
        )
    )
    AND COSINE_SIMILARITY(b.buyer_profile, p.property_embedding) > 0.3
ORDER BY b.id, match_score DESC;

Property Value Prediction

WITH property_analysis AS (
    SELECT 
        p.*,
        md.median_price as market_median,
        md.price_per_sqft as market_price_sqft,
        md.market_trend,
        n.safety_score,
        n.school_rating,
        n.walkability_score,
        CASE 
            WHEN p.year_built > 2015 THEN 1.1
            WHEN p.year_built > 2000 THEN 1.0
            WHEN p.year_built > 1980 THEN 0.9
            ELSE 0.8
        END as age_factor
    FROM properties p
    LEFT JOIN market_data md ON p.city = md.city AND p.state = md.state
    LEFT JOIN neighborhoods n ON p.city = n.city AND p.state = n.state
)
SELECT 
    address,
    city,
    price as current_price,
    ROUND(market_median * age_factor * (safety_score/10) * (school_rating/10), 0) as estimated_value,
    ROUND((price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100, 1) as price_variance_pct,
    market_trend,
    CASE 
        WHEN (price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100 > 10 THEN 'Overpriced'
        WHEN (price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100 < -10 THEN 'Underpriced'
        ELSE 'Fair Value'
    END as price_assessment
FROM property_analysis
WHERE market_median IS NOT NULL
ORDER BY price_variance_pct DESC;

Neighborhood Analysis

SELECT 
    n.name as neighborhood,
    n.city,
    COUNT(p.id) as properties_available,
    AVG(p.price) as avg_price,
    AVG(p.price / p.square_feet) as avg_price_per_sqft,
    n.safety_score,
    n.school_rating,
    n.walkability_score,
    n.transit_score,
    ROUND((n.safety_score + n.school_rating + n.walkability_score + n.transit_score) / 4, 1) as overall_score,
    n.amenities_nearby
FROM neighborhoods n
LEFT JOIN properties p ON n.city = p.city AND n.state = p.state AND p.status = 'ACTIVE'
GROUP BY n.id, n.name, n.city, n.safety_score, n.school_rating, n.walkability_score, n.transit_score, n.amenities_nearby
ORDER BY overall_score DESC;

Generate Property Descriptions

SELECT 
    p.address,
    p.city,
    p.price,
    GENERATE(
        'Write an attractive real estate listing description for: ' || 
        p.bedrooms || ' bedroom, ' || p.bathrooms || ' bathroom ' || p.property_type || 
        ' in ' || p.city || ', ' || p.square_feet || ' square feet, built in ' || p.year_built || 
        '. Amenities: ' || p.amenities || '. Current description: ' || p.description,
        '{"max_length": 200, "tone": "professional", "focus": "real_estate_marketing"}'
    ) as enhanced_description
FROM properties p
WHERE p.id <= 4
ORDER BY p.price DESC;

Buyer Interest Analysis

SELECT 
    p.address,
    p.city,
    p.price,
    COUNT(pv.id) as total_views,
    AVG(pv.interest_level) as avg_interest,
    COUNT(CASE WHEN pv.view_type = 'IN_PERSON' THEN 1 END) as in_person_views,
    COUNT(CASE WHEN pv.interest_level >= 4 THEN 1 END) as high_interest_views,
    GROUP_CONCAT(pv.feedback, ' | ') as buyer_feedback,
    CASE 
        WHEN AVG(pv.interest_level) > 4 THEN 'High Demand'
        WHEN AVG(pv.interest_level) > 3 THEN 'Moderate Demand'
        ELSE 'Low Demand'
    END as demand_level
FROM properties p
LEFT JOIN property_views pv ON p.id = pv.property_id
GROUP BY p.id, p.address, p.city, p.price
HAVING COUNT(pv.id) > 0
ORDER BY avg_interest DESC, total_views DESC;

Market Trend Analysis

SELECT 
    city,
    state,
    median_price,
    price_per_sqft,
    avg_days_market,
    inventory_count,
    market_trend,
    CASE 
        WHEN avg_days_market < 20 THEN 'Seller Market'
        WHEN avg_days_market > 35 THEN 'Buyer Market'
        ELSE 'Balanced Market'
    END as market_type,
    ROUND(inventory_count / 30.0, 1) as months_supply
FROM market_data
WHERE month = '2025-01'
ORDER BY median_price DESC;

Investment Opportunity Analysis

WITH investment_analysis AS (
    SELECT 
        p.*,
        md.market_trend,
        md.price_per_sqft as market_avg_sqft,
        ROUND(p.price / p.square_feet, 2) as property_price_sqft,
        ROUND((md.price_per_sqft - (p.price / p.square_feet)) / md.price_per_sqft * 100, 1) as sqft_value_difference,
        n.overall_score
    FROM properties p
    JOIN market_data md ON p.city = md.city AND p.state = md.state
    JOIN (
        SELECT city, state, (safety_score + school_rating + walkability_score + transit_score) / 4 as overall_score
        FROM neighborhoods
    ) n ON p.city = n.city AND p.state = n.state
    WHERE p.status = 'ACTIVE'
)
SELECT 
    address,
    city,
    price,
    property_price_sqft,
    market_avg_sqft,
    sqft_value_difference,
    market_trend,
    overall_score,
    CASE 
        WHEN sqft_value_difference > 20 AND market_trend = 'RISING' THEN 'Excellent Investment'
        WHEN sqft_value_difference > 10 AND market_trend IN ('RISING', 'STABLE') THEN 'Good Investment'
        WHEN sqft_value_difference > 0 THEN 'Fair Investment'
        ELSE 'Risky Investment'
    END as investment_rating
FROM investment_analysis
ORDER BY sqft_value_difference DESC;

Find Similar Properties

WITH target_property AS (
    SELECT property_embedding FROM properties WHERE id = 1
)
SELECT 
    p.address,
    p.city,
    p.property_type,
    p.price,
    p.bedrooms,
    p.bathrooms,
    COSINE_SIMILARITY(tp.property_embedding, p.property_embedding) as similarity_score
FROM properties p
CROSS JOIN target_property tp
WHERE p.id != 1 AND p.status = 'ACTIVE'
ORDER BY similarity_score DESC
LIMIT 5;

Try These Additional Queries

-- Extract key features from property descriptions
SELECT 
    address,
    city,
    EXTRACT_KEYWORDS(description, 5) as key_features,
    amenities
FROM properties
WHERE description IS NOT NULL
LIMIT 5;

-- Generate market reports
SELECT 
    city,
    GENERATE(
        'Create a real estate market report for ' || city || ', ' || state || 
        '. Median price: $' || median_price || ', Average days on market: ' || avg_days_market || 
        ', Market trend: ' || market_trend || ', Inventory: ' || inventory_count || ' properties.',
        '{"max_length": 250, "focus": "market_analysis"}'
    ) as market_report
FROM market_data
WHERE month = '2025-01'
LIMIT 3;

-- Search properties by lifestyle preferences
SELECT address, city, property_type, price, amenities
FROM properties
WHERE SEMANTIC_MATCH('family friendly pool garage safe neighborhood', description || ' ' || amenities, 0.4)
AND status = 'ACTIVE';

-- Analyze buyer behavior patterns
SELECT 
    b.name,
    b.lifestyle_preferences,
    COUNT(pv.id) as properties_viewed,
    AVG(pv.interest_level) as avg_interest,
    GROUP_CONCAT(DISTINCT p.city) as cities_viewed
FROM buyers b
LEFT JOIN property_views pv ON b.id = pv.buyer_id
LEFT JOIN properties p ON pv.property_id = p.id
GROUP BY b.id, b.name, b.lifestyle_preferences
ORDER BY properties_viewed DESC;

-- Price prediction based on features
SELECT 
    property_type,
    city,
    AVG(price) as avg_price,
    AVG(price / square_feet) as avg_price_per_sqft,
    COUNT(*) as sample_size
FROM properties
WHERE status = 'ACTIVE'
GROUP BY property_type, city
HAVING COUNT(*) >= 1
ORDER BY avg_price DESC;

Key Takeaways

Intelligent Matching: AI-powered buyer-property matching using semantic similarity
Market Analysis: Automated property valuation and investment opportunity identification
Neighborhood Insights: Comprehensive area analysis including amenities and demographics
Predictive Analytics: Market trend analysis and price prediction capabilities

Next Steps

  • Implement automated property valuation models (AVM)
  • Add image analysis for property features extraction
  • Create virtual tour recommendations based on buyer preferences
  • Integrate with MLS data feeds for real-time updates

Note: This tutorial demonstrates real estate analytics using simulated property data for educational purposes. Property values and market data are fictional examples.

Tags

mlsqlreal-estatevector-searchrag

Run this on your own machine

Install SynapCores Community Edition free, paste the SQL or Cypher above into the bundled web UI, and watch it run.

Download Free CE