Real Estate AI: Property Matching & Market Analysis
Problem
Build an intelligent real estate platform that matches buyers with properties, predicts market trends, analyzes property values, and generates insights for better investment decisions using AI.
What You'll Learn
- Match buyers with properties using semantic search
- Predict property values and market trends
- Analyze neighborhood characteristics and amenities
- Generate automated property descriptions and market reports
Setup: Create Real Estate Database
CREATE TABLE properties (
id INTEGER PRIMARY KEY,
address TEXT NOT NULL,
city TEXT,
state TEXT,
zip_code TEXT,
property_type TEXT,
bedrooms INTEGER,
bathrooms REAL,
square_feet INTEGER,
lot_size REAL,
year_built INTEGER,
price DECIMAL(12,2),
listing_date DATE,
status TEXT,
description TEXT,
amenities TEXT,
property_embedding VECTOR(384)
);
CREATE TABLE buyers (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT,
phone TEXT,
budget_min DECIMAL(12,2),
budget_max DECIMAL(12,2),
preferred_cities TEXT,
property_preferences TEXT,
lifestyle_preferences TEXT,
buyer_profile VECTOR(384)
);
CREATE TABLE property_views (
id INTEGER PRIMARY KEY,
buyer_id INTEGER REFERENCES buyers(id),
property_id INTEGER REFERENCES properties(id),
view_date TIMESTAMP,
view_type TEXT,
interest_level INTEGER CHECK (interest_level >= 1 AND interest_level <= 5),
feedback TEXT
);
CREATE TABLE market_data (
id INTEGER PRIMARY KEY,
city TEXT,
state TEXT,
month TEXT,
median_price DECIMAL(12,2),
avg_days_market INTEGER,
inventory_count INTEGER,
price_per_sqft DECIMAL(8,2),
market_trend TEXT
);
CREATE TABLE neighborhoods (
id INTEGER PRIMARY KEY,
name TEXT,
city TEXT,
state TEXT,
safety_score REAL,
school_rating REAL,
walkability_score REAL,
transit_score REAL,
amenities_nearby TEXT,
demographics TEXT,
neighborhood_embedding VECTOR(384)
);
Seed Data: Property Listings
INSERT INTO properties (id, address, city, state, zip_code, property_type, bedrooms, bathrooms, square_feet, lot_size, year_built, price, listing_date, status, description, amenities) VALUES
(1, '123 Oak Street', 'San Francisco', 'CA', '94102', 'Condo', 2, 2.0, 1200, 0, 2018, 850000.00, '2025-01-10', 'ACTIVE', 'Modern downtown condo with city views, updated kitchen, hardwood floors', 'Gym, Pool, Rooftop Deck, Parking'),
(2, '456 Pine Avenue', 'Los Angeles', 'CA', '90210', 'Single Family', 4, 3.0, 2500, 0.25, 1995, 1200000.00, '2025-01-12', 'ACTIVE', 'Beautiful family home in prestigious neighborhood, large backyard, updated bathrooms', 'Pool, Garage, Garden, Security System'),
(3, '789 Maple Drive', 'Austin', 'TX', '73301', 'Townhouse', 3, 2.5, 1800, 0.1, 2010, 450000.00, '2025-01-08', 'ACTIVE', 'Charming townhouse near downtown, open floor plan, granite countertops', 'Patio, Fireplace, Walk-in Closet'),
(4, '321 Elm Street', 'Seattle', 'WA', '98101', 'Condo', 1, 1.0, 800, 0, 2020, 520000.00, '2025-01-15', 'ACTIVE', 'Luxury studio in tech district, floor-to-ceiling windows, modern appliances', 'Concierge, Gym, Business Center'),
(5, '654 Cedar Lane', 'Denver', 'CO', '80202', 'Single Family', 5, 4.0, 3200, 0.3, 2005, 750000.00, '2025-01-14', 'ACTIVE', 'Spacious family home with mountain views, finished basement, large deck', 'Fireplace, Garage, Deck, Mountain Views'),
(6, '987 Birch Road', 'Miami', 'FL', '33101', 'Condo', 2, 2.0, 1400, 0, 2019, 650000.00, '2025-01-11', 'PENDING', 'Waterfront condo with ocean views, marble floors, premium finishes', 'Pool, Beach Access, Valet Parking'),
(7, '147 Willow Way', 'Nashville', 'TN', '37201', 'Single Family', 3, 2.0, 1600, 0.2, 2000, 380000.00, '2025-01-13', 'ACTIVE', 'Cozy home in music district, hardwood floors, renovated kitchen', 'Garden, Garage, Historic District'),
(8, '258 Spruce Court', 'Portland', 'OR', '97201', 'Townhouse', 2, 1.5, 1300, 0.05, 2015, 480000.00, '2025-01-09', 'ACTIVE', 'Eco-friendly townhouse, solar panels, energy-efficient appliances', 'Solar Panels, Garden, Bike Storage');
INSERT INTO buyers (id, name, email, phone, budget_min, budget_max, preferred_cities, property_preferences, lifestyle_preferences) VALUES
(1, 'Jennifer Adams', 'jennifer.a@email.com', '+1-555-2001', 400000, 600000, 'Austin, Nashville, Denver', '2-3 bedrooms, modern kitchen, garage', 'Family-friendly, good schools, walkable neighborhood'),
(2, 'Mark Thompson', 'mark.t@email.com', '+1-555-2002', 800000, 1200000, 'San Francisco, Seattle, Los Angeles', '2+ bedrooms, city views, luxury amenities', 'Urban lifestyle, tech hubs, public transportation'),
(3, 'Sarah Chen', 'sarah.c@email.com', '+1-555-2003', 300000, 500000, 'Portland, Austin, Nashville', '1-2 bedrooms, eco-friendly, outdoor space', 'Sustainable living, arts community, bike-friendly'),
(4, 'Robert Martinez', 'robert.m@email.com', '+1-555-2004', 600000, 900000, 'Miami, Los Angeles, Denver', '3+ bedrooms, pool, modern design', 'Luxury lifestyle, warm climate, entertainment options'),
(5, 'Lisa Park', 'lisa.p@email.com', '+1-555-2005', 450000, 700000, 'Seattle, Portland, San Francisco', '2-3 bedrooms, tech-friendly, views', 'Tech industry, coffee culture, outdoor activities');
INSERT INTO neighborhoods (id, name, city, state, safety_score, school_rating, walkability_score, transit_score, amenities_nearby, demographics) VALUES
(1, 'SOMA', 'San Francisco', 'CA', 7.5, 8.2, 9.1, 9.5, 'Restaurants, Tech companies, Museums, Shopping', 'Young professionals, Tech workers, Diverse'),
(2, 'Beverly Hills', 'Los Angeles', 'CA', 9.2, 9.5, 6.8, 6.2, 'Luxury shopping, Fine dining, Entertainment', 'Affluent families, Celebrities, Retirees'),
(3, 'South by Southwest', 'Austin', 'TX', 8.1, 7.8, 8.5, 7.2, 'Music venues, Restaurants, Bars, Parks', 'Young professionals, Musicians, Artists'),
(4, 'Belltown', 'Seattle', 'WA', 7.8, 8.0, 9.2, 8.8, 'Tech companies, Restaurants, Waterfront', 'Tech workers, Young professionals, Urban dwellers'),
(5, 'RiNo', 'Denver', 'CO', 8.0, 7.5, 7.8, 7.5, 'Art galleries, Breweries, Parks, Recreation', 'Artists, Young families, Professionals');
INSERT INTO market_data (id, city, state, month, median_price, avg_days_market, inventory_count, price_per_sqft, market_trend) VALUES
(1, 'San Francisco', 'CA', '2025-01', 950000, 25, 850, 1200, 'STABLE'),
(2, 'Los Angeles', 'CA', '2025-01', 750000, 30, 1200, 650, 'RISING'),
(3, 'Austin', 'TX', '2025-01', 420000, 20, 950, 280, 'RISING'),
(4, 'Seattle', 'WA', '2025-01', 680000, 18, 600, 520, 'STABLE'),
(5, 'Denver', 'CO', '2025-01', 580000, 22, 800, 320, 'RISING'),
(6, 'Miami', 'FL', '2025-01', 490000, 35, 1100, 450, 'STABLE'),
(7, 'Nashville', 'TN', '2025-01', 350000, 28, 750, 250, 'RISING'),
(8, 'Portland', 'OR', '2025-01', 480000, 32, 680, 380, 'DECLINING');
INSERT INTO property_views (id, buyer_id, property_id, view_date, view_type, interest_level, feedback) VALUES
(1, 1, 3, '2025-01-16 14:30:00', 'ONLINE', 4, 'Love the open floor plan and location near downtown'),
(2, 1, 7, '2025-01-17 10:15:00', 'IN_PERSON', 5, 'Perfect size for our family and great neighborhood'),
(3, 2, 1, '2025-01-16 16:45:00', 'ONLINE', 3, 'Nice views but concerned about HOA fees'),
(4, 2, 4, '2025-01-18 11:30:00', 'IN_PERSON', 4, 'Great location for work commute, modern amenities'),
(5, 3, 8, '2025-01-17 13:20:00', 'ONLINE', 5, 'Eco-friendly features are exactly what we want'),
(6, 4, 6, '2025-01-18 15:45:00', 'IN_PERSON', 4, 'Beautiful waterfront views, luxury finishes'),
(7, 5, 4, '2025-01-19 09:30:00', 'ONLINE', 3, 'Good location but a bit small for our needs');
Generate Embeddings
UPDATE properties
SET property_embedding = EMBED(
address || ' ' || city || ' ' || property_type || ' ' || bedrooms || ' bedrooms ' ||
bathrooms || ' bathrooms ' || square_feet || ' sqft ' || description || ' ' || amenities
);
UPDATE buyers
SET buyer_profile = EMBED(
budget_min || ' to ' || budget_max || ' budget ' || preferred_cities || ' ' ||
property_preferences || ' ' || lifestyle_preferences
);
UPDATE neighborhoods
SET neighborhood_embedding = EMBED(
name || ' ' || city || ' ' || amenities_nearby || ' ' || demographics
);
Smart Property Matching
SELECT
b.name as buyer_name,
p.address,
p.city,
p.property_type,
p.price,
p.bedrooms,
p.bathrooms,
COSINE_SIMILARITY(b.buyer_profile, p.property_embedding) as match_score,
CASE
WHEN p.price BETWEEN b.budget_min AND b.budget_max THEN 'Within Budget'
WHEN p.price < b.budget_min THEN 'Below Budget'
ELSE 'Over Budget'
END as budget_fit
FROM buyers b
CROSS JOIN properties p
WHERE p.status = 'ACTIVE'
AND p.city IN (
SELECT TRIM(value) FROM (
SELECT SUBSTR(b.preferred_cities, 1, INSTR(b.preferred_cities || ',', ',') - 1) as value
UNION ALL
SELECT SUBSTR(b.preferred_cities, INSTR(b.preferred_cities, ',') + 1) as value
WHERE INSTR(b.preferred_cities, ',') > 0
)
)
AND COSINE_SIMILARITY(b.buyer_profile, p.property_embedding) > 0.3
ORDER BY b.id, match_score DESC;
Property Value Prediction
WITH property_analysis AS (
SELECT
p.*,
md.median_price as market_median,
md.price_per_sqft as market_price_sqft,
md.market_trend,
n.safety_score,
n.school_rating,
n.walkability_score,
CASE
WHEN p.year_built > 2015 THEN 1.1
WHEN p.year_built > 2000 THEN 1.0
WHEN p.year_built > 1980 THEN 0.9
ELSE 0.8
END as age_factor
FROM properties p
LEFT JOIN market_data md ON p.city = md.city AND p.state = md.state
LEFT JOIN neighborhoods n ON p.city = n.city AND p.state = n.state
)
SELECT
address,
city,
price as current_price,
ROUND(market_median * age_factor * (safety_score/10) * (school_rating/10), 0) as estimated_value,
ROUND((price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100, 1) as price_variance_pct,
market_trend,
CASE
WHEN (price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100 > 10 THEN 'Overpriced'
WHEN (price - (market_median * age_factor * (safety_score/10) * (school_rating/10))) / price * 100 < -10 THEN 'Underpriced'
ELSE 'Fair Value'
END as price_assessment
FROM property_analysis
WHERE market_median IS NOT NULL
ORDER BY price_variance_pct DESC;
Neighborhood Analysis
SELECT
n.name as neighborhood,
n.city,
COUNT(p.id) as properties_available,
AVG(p.price) as avg_price,
AVG(p.price / p.square_feet) as avg_price_per_sqft,
n.safety_score,
n.school_rating,
n.walkability_score,
n.transit_score,
ROUND((n.safety_score + n.school_rating + n.walkability_score + n.transit_score) / 4, 1) as overall_score,
n.amenities_nearby
FROM neighborhoods n
LEFT JOIN properties p ON n.city = p.city AND n.state = p.state AND p.status = 'ACTIVE'
GROUP BY n.id, n.name, n.city, n.safety_score, n.school_rating, n.walkability_score, n.transit_score, n.amenities_nearby
ORDER BY overall_score DESC;
Generate Property Descriptions
SELECT
p.address,
p.city,
p.price,
GENERATE(
'Write an attractive real estate listing description for: ' ||
p.bedrooms || ' bedroom, ' || p.bathrooms || ' bathroom ' || p.property_type ||
' in ' || p.city || ', ' || p.square_feet || ' square feet, built in ' || p.year_built ||
'. Amenities: ' || p.amenities || '. Current description: ' || p.description,
'{"max_length": 200, "tone": "professional", "focus": "real_estate_marketing"}'
) as enhanced_description
FROM properties p
WHERE p.id <= 4
ORDER BY p.price DESC;
Buyer Interest Analysis
SELECT
p.address,
p.city,
p.price,
COUNT(pv.id) as total_views,
AVG(pv.interest_level) as avg_interest,
COUNT(CASE WHEN pv.view_type = 'IN_PERSON' THEN 1 END) as in_person_views,
COUNT(CASE WHEN pv.interest_level >= 4 THEN 1 END) as high_interest_views,
GROUP_CONCAT(pv.feedback, ' | ') as buyer_feedback,
CASE
WHEN AVG(pv.interest_level) > 4 THEN 'High Demand'
WHEN AVG(pv.interest_level) > 3 THEN 'Moderate Demand'
ELSE 'Low Demand'
END as demand_level
FROM properties p
LEFT JOIN property_views pv ON p.id = pv.property_id
GROUP BY p.id, p.address, p.city, p.price
HAVING COUNT(pv.id) > 0
ORDER BY avg_interest DESC, total_views DESC;
Market Trend Analysis
SELECT
city,
state,
median_price,
price_per_sqft,
avg_days_market,
inventory_count,
market_trend,
CASE
WHEN avg_days_market < 20 THEN 'Seller Market'
WHEN avg_days_market > 35 THEN 'Buyer Market'
ELSE 'Balanced Market'
END as market_type,
ROUND(inventory_count / 30.0, 1) as months_supply
FROM market_data
WHERE month = '2025-01'
ORDER BY median_price DESC;
Investment Opportunity Analysis
WITH investment_analysis AS (
SELECT
p.*,
md.market_trend,
md.price_per_sqft as market_avg_sqft,
ROUND(p.price / p.square_feet, 2) as property_price_sqft,
ROUND((md.price_per_sqft - (p.price / p.square_feet)) / md.price_per_sqft * 100, 1) as sqft_value_difference,
n.overall_score
FROM properties p
JOIN market_data md ON p.city = md.city AND p.state = md.state
JOIN (
SELECT city, state, (safety_score + school_rating + walkability_score + transit_score) / 4 as overall_score
FROM neighborhoods
) n ON p.city = n.city AND p.state = n.state
WHERE p.status = 'ACTIVE'
)
SELECT
address,
city,
price,
property_price_sqft,
market_avg_sqft,
sqft_value_difference,
market_trend,
overall_score,
CASE
WHEN sqft_value_difference > 20 AND market_trend = 'RISING' THEN 'Excellent Investment'
WHEN sqft_value_difference > 10 AND market_trend IN ('RISING', 'STABLE') THEN 'Good Investment'
WHEN sqft_value_difference > 0 THEN 'Fair Investment'
ELSE 'Risky Investment'
END as investment_rating
FROM investment_analysis
ORDER BY sqft_value_difference DESC;
Find Similar Properties
WITH target_property AS (
SELECT property_embedding FROM properties WHERE id = 1
)
SELECT
p.address,
p.city,
p.property_type,
p.price,
p.bedrooms,
p.bathrooms,
COSINE_SIMILARITY(tp.property_embedding, p.property_embedding) as similarity_score
FROM properties p
CROSS JOIN target_property tp
WHERE p.id != 1 AND p.status = 'ACTIVE'
ORDER BY similarity_score DESC
LIMIT 5;
Try These Additional Queries
-- Extract key features from property descriptions
SELECT
address,
city,
EXTRACT_KEYWORDS(description, 5) as key_features,
amenities
FROM properties
WHERE description IS NOT NULL
LIMIT 5;
-- Generate market reports
SELECT
city,
GENERATE(
'Create a real estate market report for ' || city || ', ' || state ||
'. Median price: $' || median_price || ', Average days on market: ' || avg_days_market ||
', Market trend: ' || market_trend || ', Inventory: ' || inventory_count || ' properties.',
'{"max_length": 250, "focus": "market_analysis"}'
) as market_report
FROM market_data
WHERE month = '2025-01'
LIMIT 3;
-- Search properties by lifestyle preferences
SELECT address, city, property_type, price, amenities
FROM properties
WHERE SEMANTIC_MATCH('family friendly pool garage safe neighborhood', description || ' ' || amenities, 0.4)
AND status = 'ACTIVE';
-- Analyze buyer behavior patterns
SELECT
b.name,
b.lifestyle_preferences,
COUNT(pv.id) as properties_viewed,
AVG(pv.interest_level) as avg_interest,
GROUP_CONCAT(DISTINCT p.city) as cities_viewed
FROM buyers b
LEFT JOIN property_views pv ON b.id = pv.buyer_id
LEFT JOIN properties p ON pv.property_id = p.id
GROUP BY b.id, b.name, b.lifestyle_preferences
ORDER BY properties_viewed DESC;
-- Price prediction based on features
SELECT
property_type,
city,
AVG(price) as avg_price,
AVG(price / square_feet) as avg_price_per_sqft,
COUNT(*) as sample_size
FROM properties
WHERE status = 'ACTIVE'
GROUP BY property_type, city
HAVING COUNT(*) >= 1
ORDER BY avg_price DESC;
Key Takeaways
✅ Intelligent Matching: AI-powered buyer-property matching using semantic similarity
✅ Market Analysis: Automated property valuation and investment opportunity identification
✅ Neighborhood Insights: Comprehensive area analysis including amenities and demographics
✅ Predictive Analytics: Market trend analysis and price prediction capabilities
Next Steps
- Implement automated property valuation models (AVM)
- Add image analysis for property features extraction
- Create virtual tour recommendations based on buyer preferences
- Integrate with MLS data feeds for real-time updates
Note: This tutorial demonstrates real estate analytics using simulated property data for educational purposes. Property values and market data are fictional examples.