AI-Native Database SQL Reference

Published on January 20, 2025

SynapCores SQL Reference Guide

Core Architecture

  • AI-native database with unified SQL, Vector, and ML capabilities
  • Built for high-performance AI workloads
  • Tenant isolation, partitioning, vector operations

Data Types

Standard Types

BOOLEAN, INTEGER, BIGINT, REAL, DOUBLE, TEXT, VARCHAR(n), JSON, JSONB, UUID, TIMESTAMP, DATE, DECIMAL(p,s)

AI-Native Types

VECTOR(dimensions), AUDIO(format), VIDEO(format), IMAGE(format), PDF

Multimedia Formats

  • Audio: MP3, WAV, FLAC
  • Video: MP4, AVI, MKV
  • Image: JPEG, PNG, WEBP, GIF

SQL Extensions

Basic Operations

CREATE TABLE name (col type [CONSTRAINTS]) [PARTITION BY RANGE|LIST|HASH (col)];
INSERT [OR REPLACE] INTO table VALUES (...) [ON CONFLICT IGNORE];
SELECT [DISTINCT] cols FROM table [WHERE] [GROUP BY] [HAVING] [ORDER BY] [LIMIT n OFFSET m];
UPDATE table SET col=val WHERE condition;
DELETE FROM table WHERE condition;

Partitioning

-- Range partitioning
CREATE TABLE sales (...) PARTITION BY RANGE (date);
CREATE TABLE sales_q1 PARTITION OF sales FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

-- List partitioning
CREATE TABLE customers (...) PARTITION BY LIST (country);
CREATE TABLE customers_usa PARTITION OF customers FOR VALUES IN ('USA', 'US');

-- Hash partitioning
CREATE TABLE activity (...) PARTITION BY HASH (user_id);
CREATE TABLE activity_0 PARTITION OF activity FOR VALUES WITH (MODULUS 4, REMAINDER 0);

AI/ML Functions

-- Text Embeddings
EMBED(text), EMBEDDING(text)

-- NLP Functions
SUMMARIZE(text, max_length)
SENTIMENT_ANALYSIS(text)
EXTRACT_ENTITIES(text)
EXTRACT_KEYWORDS(text, count)
GENERATE(prompt, options)

-- Semantic Operations
SEMANTIC_MATCH('query', content, threshold)
COSINE_SIMILARITY(vec1, vec2)
EUCLIDEAN_DISTANCE(vec1, vec2)

Vector Operations

-- Distance Operators
embedding <=> query_vector < 0.5  -- Cosine distance
embedding <-> query_vector < 1.0  -- Euclidean distance
embedding <#> query_vector > 0.8  -- Inner product

-- Vector Functions
VECTOR_ADD(vec1, vec2), VECTOR_SUBTRACT(vec1, vec2)
VECTOR_MULTIPLY(vec, scalar), VECTOR_NORMALIZE(vec)
VECTOR_MAGNITUDE(vec), VECTOR_DOT(vec1, vec2)

AutoML

-- Create Experiment
CREATE AUTOML EXPERIMENT name
USING (SELECT * FROM data) TARGET column
OPTIONS (task_type='classification', max_trials=50);

-- Train Model
TRAIN MODEL name FROM data TARGET column TYPE classification
OPTIONS (algorithm='RandomForest');

-- Control
START EXPERIMENT name;
STOP EXPERIMENT name;
SHOW AUTOML EXPERIMENTS;

Natural Language SQL

ASK "Show customers from USA who bought in 2024"
EXPLAIN NATURAL "Find top products by revenue"
ASK "sales by region" WITH CONTEXT (tables=['sales','regions'])

Multimedia Operations

-- Processing
PROCESS_MULTIMEDIA(data)
EXTRACT_METADATA(content)
DETECT_FORMAT(content)
TRANSCRIBE(audio, 'whisper-base')
EXTRACT_TEXT(image)  -- OCR
EXTRACT_FRAMES(video, interval)
EXTRACT_AUDIO(video)
RESIZE_IMAGE(image, width, height)

-- Storage
CREATE TABLE media (audio AUDIO(MP3), video VIDEO(MP4), image IMAGE(JPEG), doc PDF);

Backup/Restore

-- Backup
BACKUP DATABASE TO 'path' WITH (TYPE='FULL|INCREMENTAL', COMPRESSION='ZSTD', ENCRYPTION=TRUE);
BACKUP TABLES table1, table2 TO 'path';

-- Restore
RESTORE DATABASE FROM 'path' WITH (OVERWRITE=TRUE, VERIFY_CHECKSUMS=TRUE);
CLONE DATABASE source TO target;
CLONE TABLE sales TO backup WHERE date > '2024-01-01';

Advanced Features

-- Transactions
BEGIN TRANSACTION; operations; COMMIT; -- or ROLLBACK;

-- Window Functions
SUM(value) OVER (PARTITION BY category ORDER BY date)

-- CTEs
WITH cte AS (SELECT ...) SELECT * FROM cte;

-- Statistical Functions
STDDEV(col), VARIANCE(col), PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY val)

Key Syntax Examples

-- AI-Enhanced Query
SELECT id, EMBED(description) as embedding,
       SENTIMENT_ANALYSIS(review) as sentiment
FROM products
WHERE description <=> EMBED('laptop computer') < 0.3;

-- Multimedia Analysis
SELECT filename, TRANSCRIBE(audio_data, 'whisper-base') as transcript,
       EXTRACT_TEXT(image_data) as ocr_text
FROM media_files;

-- AutoML Workflow
CREATE AUTOML EXPERIMENT customer_churn
USING (SELECT * FROM customer_features) TARGET churned
OPTIONS (task_type='classification');
START EXPERIMENT customer_churn;

Performance Features

  • Vector operations support up to 10M+ vectors
  • Partition pruning automatically optimizes queries
  • Natural language queries use schema context for best results
  • Multimedia files support streaming for large files
  • Backup encryption uses tenant-specific keys

Document Version: 1.0 Last Updated: December 2025 Website: https://synapcores.com