Audio Transcription and Analysis
Objective
Create an audio processing pipeline that transcribes audio content, extracts key information, performs sentiment analysis, and enables semantic search across audio transcripts.
Step 1: Create Audio Files Table
Store audio content.
CREATE TABLE audio_files (
id INTEGER PRIMARY KEY,
audio_code VARCHAR(50) NOT NULL UNIQUE,
title VARCHAR(300),
description TEXT,
audio_file AUDIO(MP3),
file_size_mb DECIMAL(10, 2),
duration_seconds INTEGER,
sample_rate INTEGER,
channels INTEGER,
audio_type VARCHAR(50),
language VARCHAR(20) DEFAULT 'en',
source VARCHAR(100),
recorded_at TIMESTAMP,
uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Step 2: Create Transcripts Table
Store transcription results.
CREATE TABLE audio_transcripts (
id INTEGER PRIMARY KEY,
audio_id INTEGER NOT NULL,
full_transcript TEXT,
word_count INTEGER,
transcript_language VARCHAR(20),
confidence_score DECIMAL(3, 2),
processing_time_seconds INTEGER,
transcript_embedding VECTOR(384),
is_reviewed BOOLEAN DEFAULT FALSE,
reviewed_by VARCHAR(100),
reviewed_at TIMESTAMP,
transcribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (audio_id) REFERENCES audio_files(id)
);
Step 3: Create Speaker Segments Table
Track speaker turns.
CREATE TABLE speaker_segments (
id INTEGER PRIMARY KEY,
transcript_id INTEGER NOT NULL,
segment_number INTEGER,
speaker_label VARCHAR(100),
speaker_id VARCHAR(50),
start_time_seconds DECIMAL(10, 3),
end_time_seconds DECIMAL(10, 3),
segment_text TEXT,
segment_embedding VECTOR(384),
word_count INTEGER,
confidence DECIMAL(3, 2),
FOREIGN KEY (transcript_id) REFERENCES audio_transcripts(id)
);
Step 4: Create Analysis Results Table
Store AI analysis.
CREATE TABLE transcript_analysis (
id INTEGER PRIMARY KEY,
transcript_id INTEGER NOT NULL,
analysis_type VARCHAR(50),
summary TEXT,
key_topics TEXT,
sentiment_score DECIMAL(3, 2),
sentiment_label VARCHAR(20),
entities_extracted TEXT,
action_items TEXT,
analyzed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (transcript_id) REFERENCES audio_transcripts(id)
);
Step 5: Create Keywords Table
Extract important terms.
CREATE TABLE transcript_keywords (
id INTEGER PRIMARY KEY,
transcript_id INTEGER NOT NULL,
keyword VARCHAR(100),
keyword_type VARCHAR(50),
frequency INTEGER,
relevance_score DECIMAL(3, 2),
first_occurrence_seconds DECIMAL(10, 3),
FOREIGN KEY (transcript_id) REFERENCES audio_transcripts(id)
);
Step 6: Insert Sample Audio Files
Add audio content.
INSERT INTO audio_files (id, audio_code, title, description, file_size_mb, duration_seconds, sample_rate, channels, audio_type, language, source, recorded_at) VALUES
(1, 'AUD-001', 'Q1 Earnings Call', 'Quarterly earnings call with investors', 45.8, 3600, 44100, 2, 'conference_call', 'en', 'Investor Relations', '2024-01-25 14:00:00'),
(2, 'AUD-002', 'Customer Support Call #1234', 'Support call regarding product issue', 8.5, 720, 16000, 1, 'support_call', 'en', 'Support Center', '2024-01-20 10:30:00'),
(3, 'AUD-003', 'Product Strategy Meeting', 'Internal meeting on 2024 product roadmap', 28.4, 2400, 44100, 2, 'meeting', 'en', 'Product Team', '2024-01-18 09:00:00'),
(4, 'AUD-004', 'Sales Training Session', 'Training on new sales methodology', 65.2, 5400, 44100, 2, 'training', 'en', 'Sales Enablement', '2024-01-22 13:00:00'),
(5, 'AUD-005', 'Customer Interview', 'User research interview for mobile app', 22.6, 1800, 44100, 2, 'interview', 'en', 'UX Research', '2024-01-15 11:00:00'),
(6, 'AUD-006', 'Podcast Episode 45', 'Tech trends discussion with industry experts', 52.4, 4200, 44100, 2, 'podcast', 'en', 'Marketing', '2024-01-28 00:00:00');
Step 7: Insert Transcripts
Add transcription results.
INSERT INTO audio_transcripts (id, audio_id, full_transcript, word_count, transcript_language, confidence_score, processing_time_seconds) VALUES
(1, 1, 'Good afternoon everyone and welcome to our Q1 2024 earnings call. I am pleased to report strong financial results with revenue growth of 25% year over year. Our key metrics show continued momentum across all business segments. Customer acquisition costs decreased while lifetime value increased. We are raising our full year guidance based on this performance...', 8500, 'en', 0.94, 180),
(2, 2, 'Hello, thank you for calling support. How can I help you today? The customer explained they were having issues with the login feature on the mobile app. After troubleshooting, we identified the issue was related to cached credentials. The solution involved clearing the app cache and re-authenticating. Customer confirmed the issue was resolved.', 1200, 'en', 0.92, 45),
(3, 3, 'Today we will discuss our product roadmap for 2024. Our main priorities are improving user experience, expanding our API capabilities, and launching the mobile application. We have allocated resources across three main initiatives. Timeline shows Q2 for API launch and Q3 for mobile...', 4500, 'en', 0.95, 120),
(4, 4, 'Welcome to sales training. Today we cover the consultative selling methodology. The key principles are understanding customer needs, building value propositions, and handling objections effectively. We will practice with role-playing exercises...', 9200, 'en', 0.93, 210),
(5, 5, 'Thank you for participating in this research session. Can you tell me about your typical workflow when using our application? The user described their daily usage patterns and pain points with the current navigation. Key feedback included desire for better search functionality and faster load times...', 3200, 'en', 0.91, 90);
Step 8: Generate Embeddings
Create vector representations.
UPDATE audio_transcripts
SET transcript_embedding = EMBED(full_transcript)
WHERE transcript_embedding IS NULL;
SELECT
af.audio_code,
af.title,
CASE WHEN at.transcript_embedding IS NOT NULL THEN 'Yes' ELSE 'No' END as embedded
FROM audio_files af
LEFT JOIN audio_transcripts at ON af.id = at.audio_id
ORDER BY af.id;
Step 9: Insert Speaker Segments
Add speaker turns.
INSERT INTO speaker_segments (id, transcript_id, segment_number, speaker_label, start_time_seconds, end_time_seconds, segment_text, word_count, confidence) VALUES
(1, 1, 1, 'CEO', 0.0, 120.5, 'Good afternoon everyone and welcome to our Q1 2024 earnings call. I am pleased to report strong financial results.', 85, 0.96),
(2, 1, 2, 'CFO', 120.5, 360.2, 'Let me walk you through the financial details. Revenue came in at 125 million, representing 25% growth.', 245, 0.94),
(3, 1, 3, 'Analyst', 360.2, 400.0, 'Thank you for the presentation. Can you provide more details on the enterprise segment growth?', 42, 0.92),
(4, 2, 1, 'Agent', 0.0, 15.5, 'Hello, thank you for calling support. How can I help you today?', 28, 0.95),
(5, 2, 2, 'Customer', 15.5, 85.0, 'Hi, I am having trouble logging into the mobile app. It keeps saying invalid credentials.', 65, 0.90),
(6, 2, 3, 'Agent', 85.0, 180.5, 'I understand. Let me help you troubleshoot this issue. Can you try clearing the app cache?', 78, 0.93),
(7, 5, 1, 'Researcher', 0.0, 45.0, 'Thank you for participating in this research session. Can you tell me about your typical workflow?', 52, 0.94),
(8, 5, 2, 'Participant', 45.0, 240.5, 'Sure. I typically start my day by checking the dashboard and then moving to the task list. The navigation could be better.', 125, 0.89);
UPDATE speaker_segments
SET segment_embedding = EMBED(segment_text)
WHERE segment_embedding IS NULL;
Step 10: Analyze Transcripts
Extract insights with AI.
INSERT INTO transcript_analysis (id, transcript_id, analysis_type, summary, key_topics, sentiment_score, sentiment_label, entities_extracted, action_items) VALUES
(1, 1, 'earnings_call', 'Strong Q1 results with 25% revenue growth. Company raising full year guidance. All business segments showing momentum.', 'revenue growth, earnings, guidance, customer metrics', 0.85, 'positive', 'Q1 2024, 25% growth, enterprise segment', NULL),
(2, 2, 'support_call', 'Customer experienced login issues on mobile app. Issue resolved by clearing app cache and re-authenticating.', 'login issues, mobile app, troubleshooting, cache', 0.65, 'neutral', 'mobile app, login feature, cache', 'Follow up on mobile app authentication improvements'),
(3, 3, 'meeting', 'Product roadmap discussion covering UX improvements, API expansion, and mobile launch. Timeline set for Q2-Q3 deliveries.', 'product roadmap, API, mobile app, user experience', 0.75, 'positive', 'Q2 API launch, Q3 mobile launch', 'Finalize API specifications, Begin mobile development'),
(4, 4, 'training', 'Sales methodology training covering consultative selling principles and objection handling techniques.', 'sales methodology, consultative selling, objections', 0.70, 'neutral', 'consultative selling, value proposition', NULL),
(5, 5, 'research', 'User research revealing pain points with navigation and search. Key requests for performance improvements.', 'user research, navigation, search, performance', 0.55, 'mixed', 'search functionality, load times, navigation', 'Prioritize search improvements in backlog');
Step 11: Extract Keywords
Identify key terms.
INSERT INTO transcript_keywords (id, transcript_id, keyword, keyword_type, frequency, relevance_score, first_occurrence_seconds) VALUES
(1, 1, 'revenue growth', 'metric', 5, 0.95, 45.2),
(2, 1, 'Q1 2024', 'time_period', 8, 0.90, 12.5),
(3, 1, 'enterprise', 'segment', 4, 0.85, 180.0),
(4, 2, 'mobile app', 'product', 6, 0.92, 20.5),
(5, 2, 'login', 'feature', 4, 0.88, 18.2),
(6, 2, 'cache', 'technical', 3, 0.75, 95.0),
(7, 3, 'product roadmap', 'topic', 3, 0.94, 5.0),
(8, 3, 'API', 'feature', 5, 0.90, 120.5),
(9, 5, 'navigation', 'feature', 4, 0.88, 65.0),
(10, 5, 'search', 'feature', 3, 0.85, 180.2);
Step 12: Semantic Audio Search
Find audio by meaning.
SELECT
af.audio_code,
af.title,
af.audio_type,
at.word_count,
COSINE_SIMILARITY(at.transcript_embedding, EMBED('financial performance and revenue growth')) as relevance
FROM audio_files af
INNER JOIN audio_transcripts at ON af.id = at.audio_id
WHERE at.transcript_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 5;
Step 13: Search Speaker Segments
Find specific discussions.
SELECT
af.title as audio,
ss.speaker_label,
ss.segment_text,
ss.start_time_seconds,
COSINE_SIMILARITY(ss.segment_embedding, EMBED('customer issues and troubleshooting')) as relevance
FROM speaker_segments ss
INNER JOIN audio_transcripts at ON ss.transcript_id = at.id
INNER JOIN audio_files af ON at.audio_id = af.id
WHERE ss.segment_embedding IS NOT NULL
ORDER BY relevance DESC
LIMIT 5;
Step 14: Sentiment Summary
Analyze emotional tone.
SELECT
af.audio_type,
COUNT(*) as audio_count,
AVG(ta.sentiment_score) as avg_sentiment,
COUNT(CASE WHEN ta.sentiment_label = 'positive' THEN 1 END) as positive_count,
COUNT(CASE WHEN ta.sentiment_label = 'neutral' THEN 1 END) as neutral_count,
COUNT(CASE WHEN ta.sentiment_label = 'negative' THEN 1 END) as negative_count
FROM audio_files af
INNER JOIN audio_transcripts at ON af.id = at.audio_id
INNER JOIN transcript_analysis ta ON at.id = ta.transcript_id
GROUP BY af.audio_type
ORDER BY avg_sentiment DESC;
Step 15: Action Items Report
Extract tasks from transcripts.
SELECT
af.title,
af.audio_type,
af.recorded_at,
ta.action_items
FROM audio_files af
INNER JOIN audio_transcripts at ON af.id = at.audio_id
INNER JOIN transcript_analysis ta ON at.id = ta.transcript_id
WHERE ta.action_items IS NOT NULL
ORDER BY af.recorded_at DESC;
Cleanup (Optional)
DROP TABLE IF EXISTS transcript_keywords;
DROP TABLE IF EXISTS transcript_analysis;
DROP TABLE IF EXISTS speaker_segments;
DROP TABLE IF EXISTS audio_transcripts;
DROP TABLE IF EXISTS audio_files;
Expected Outcomes
- Audio files cataloged
- Transcripts generated
- Speakers identified
- Analysis extracted
- Semantic search enabled
Key Concepts Learned
- Audio transcription workflow
- Speaker diarization
- Sentiment analysis
- Keyword extraction
- Semantic audio search