Spaces:
Sleeping
Sleeping
feat: voice recording
Browse files- audio_processor.py +16 -0
- streamlit_app.py +125 -81
audio_processor.py
CHANGED
|
@@ -52,8 +52,24 @@ class AudioEmotionProcessor:
|
|
| 52 |
def load_audio(self, filepath):
|
| 53 |
"""Load audio file and resample to target sample rate"""
|
| 54 |
audio, sr = librosa.load(filepath, sr=self.sample_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
return audio, sr
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def get_audio_duration(self, audio, sr):
|
| 58 |
"""Get duration of audio in seconds"""
|
| 59 |
return librosa.get_duration(y=audio, sr=sr)
|
|
|
|
| 52 |
def load_audio(self, filepath):
|
| 53 |
"""Load audio file and resample to target sample rate"""
|
| 54 |
audio, sr = librosa.load(filepath, sr=self.sample_rate)
|
| 55 |
+
|
| 56 |
+
# Normalize audio volume (boost quiet recordings)
|
| 57 |
+
audio = self.normalize_audio(audio)
|
| 58 |
+
|
| 59 |
return audio, sr
|
| 60 |
|
| 61 |
+
def normalize_audio(self, audio):
|
| 62 |
+
"""Normalize audio to increase volume"""
|
| 63 |
+
# Get max absolute value
|
| 64 |
+
max_val = np.max(np.abs(audio))
|
| 65 |
+
|
| 66 |
+
# Avoid division by zero
|
| 67 |
+
if max_val > 0:
|
| 68 |
+
# Normalize to 0.95 to prevent clipping
|
| 69 |
+
audio = audio / max_val * 0.95
|
| 70 |
+
|
| 71 |
+
return audio
|
| 72 |
+
|
| 73 |
def get_audio_duration(self, audio, sr):
|
| 74 |
"""Get duration of audio in seconds"""
|
| 75 |
return librosa.get_duration(y=audio, sr=sr)
|
streamlit_app.py
CHANGED
|
@@ -21,13 +21,13 @@ st.markdown("Analyze emotions from audio files with timeline visualization")
|
|
| 21 |
FLASK_URL = os.getenv("FLASK_URL", "http://localhost:5000")
|
| 22 |
|
| 23 |
# Create tabs
|
| 24 |
-
tab1, tab2 = st.tabs(["📁
|
| 25 |
|
| 26 |
# ============================================
|
| 27 |
-
# TAB 1:
|
| 28 |
# ============================================
|
| 29 |
with tab1:
|
| 30 |
-
st.header("📁
|
| 31 |
st.markdown("Upload a pre-recorded audio file for sentiment analysis")
|
| 32 |
|
| 33 |
# File selection option
|
|
@@ -300,63 +300,110 @@ with tab1:
|
|
| 300 |
# TAB 2: Audio Input Analysis (Live Recording)
|
| 301 |
# ============================================
|
| 302 |
with tab2:
|
| 303 |
-
st.header("🎙️
|
| 304 |
-
st.markdown("Record audio
|
| 305 |
|
| 306 |
-
#
|
| 307 |
-
|
|
|
|
| 308 |
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
with col2:
|
| 312 |
-
stop_btn = st.button("⏹️ Stop Recording", width="stretch")
|
| 313 |
-
with col3:
|
| 314 |
-
analyze_record_btn = st.button("🔍 Analyze Recording", width="stretch")
|
| 315 |
|
| 316 |
-
|
| 317 |
-
if record_btn:
|
| 318 |
-
st.warning("🔴 Recording... (This feature will be implemented)")
|
| 319 |
|
| 320 |
-
if
|
| 321 |
-
st.
|
| 322 |
|
| 323 |
-
#
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
with col1:
|
| 329 |
-
sample_rate = st.selectbox(
|
| 330 |
-
"Sample Rate",
|
| 331 |
-
options=[16000, 22050, 44100, 48000],
|
| 332 |
-
index=0,
|
| 333 |
-
help="Audio sample rate in Hz"
|
| 334 |
-
)
|
| 335 |
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
|
|
|
| 343 |
|
| 344 |
-
#
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
-
#
|
| 349 |
-
if
|
| 350 |
-
|
| 351 |
-
st.info("⚙️ Processing audio through Flask API...")
|
| 352 |
-
|
| 353 |
-
st.success("✅ Analysis Complete!")
|
| 354 |
|
| 355 |
-
# Results layout
|
| 356 |
st.markdown("---")
|
| 357 |
st.subheader("📊 Emotion Analysis Results")
|
| 358 |
|
| 359 |
-
# Emotion emoji mapping
|
| 360 |
emotion_emoji_map = {
|
| 361 |
'Happy': '😊',
|
| 362 |
'Sad': '😢',
|
|
@@ -368,26 +415,23 @@ with tab2:
|
|
| 368 |
'Calm': '😌'
|
| 369 |
}
|
| 370 |
|
| 371 |
-
#
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
'Confidence': [0.88, 0.85, 0.90, 0.72, 0.87]
|
| 376 |
-
})
|
| 377 |
|
| 378 |
# Add emoji column
|
| 379 |
sample_data['Emoji'] = sample_data['Emotion'].map(emotion_emoji_map)
|
| 380 |
|
| 381 |
-
# Calculate metrics
|
| 382 |
-
total_duration = "00:30"
|
| 383 |
-
unique_emotions = sample_data['Emotion'].nunique()
|
| 384 |
-
dominant_emotion = sample_data['Emotion'].mode()[0]
|
| 385 |
-
dominant_emoji = emotion_emoji_map[dominant_emotion]
|
| 386 |
-
|
| 387 |
# Metrics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
col1, col2, col3 = st.columns(3)
|
| 389 |
with col1:
|
| 390 |
-
st.metric("
|
| 391 |
with col2:
|
| 392 |
st.metric("Emotions Detected", unique_emotions, help="Number of unique emotions")
|
| 393 |
with col3:
|
|
@@ -401,9 +445,7 @@ with tab2:
|
|
| 401 |
with col1:
|
| 402 |
st.subheader("⏱️ Emotion Timeline")
|
| 403 |
|
| 404 |
-
#
|
| 405 |
-
fig_timeline = go.Figure()
|
| 406 |
-
|
| 407 |
colors = {
|
| 408 |
'Happy': '#FFD700',
|
| 409 |
'Sad': '#4169E1',
|
|
@@ -415,26 +457,28 @@ with tab2:
|
|
| 415 |
'Calm': '#87CEEB'
|
| 416 |
}
|
| 417 |
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
fig_timeline.update_layout(
|
| 431 |
xaxis_title="Time",
|
| 432 |
yaxis_title="Confidence",
|
| 433 |
yaxis_range=[0, 1.1],
|
| 434 |
-
barmode='group',
|
| 435 |
height=400,
|
| 436 |
-
|
| 437 |
-
hovermode='x unified'
|
| 438 |
)
|
| 439 |
|
| 440 |
st.plotly_chart(fig_timeline, width="stretch")
|
|
@@ -442,13 +486,13 @@ with tab2:
|
|
| 442 |
with col2:
|
| 443 |
st.subheader("📊 Distribution")
|
| 444 |
|
| 445 |
-
# Pie chart
|
| 446 |
emotion_counts = sample_data['Emotion'].value_counts()
|
| 447 |
|
| 448 |
fig_pie = go.Figure(data=[go.Pie(
|
| 449 |
-
labels=[f"{emotion_emoji_map
|
| 450 |
values=emotion_counts.values,
|
| 451 |
-
marker=dict(colors=[colors
|
| 452 |
textinfo='percent+label',
|
| 453 |
textfont=dict(size=12),
|
| 454 |
hole=0.3
|
|
|
|
| 21 |
FLASK_URL = os.getenv("FLASK_URL", "http://localhost:5000")
|
| 22 |
|
| 23 |
# Create tabs
|
| 24 |
+
tab1, tab2 = st.tabs(["📁 File Analysis", "🎙️ Audio Recording"])
|
| 25 |
|
| 26 |
# ============================================
|
| 27 |
+
# TAB 1: File Analysis
|
| 28 |
# ============================================
|
| 29 |
with tab1:
|
| 30 |
+
st.header("📁 File Analysis")
|
| 31 |
st.markdown("Upload a pre-recorded audio file for sentiment analysis")
|
| 32 |
|
| 33 |
# File selection option
|
|
|
|
| 300 |
# TAB 2: Audio Input Analysis (Live Recording)
|
| 301 |
# ============================================
|
| 302 |
with tab2:
|
| 303 |
+
st.header("🎙️ Audio Recording Analysis")
|
| 304 |
+
st.markdown("Record audio from your microphone for real-time sentiment analysis")
|
| 305 |
|
| 306 |
+
# Initialize session state for Tab 2
|
| 307 |
+
if 'tab2_results' not in st.session_state:
|
| 308 |
+
st.session_state.tab2_results = None
|
| 309 |
|
| 310 |
+
# Audio recorder widget
|
| 311 |
+
audio_data = st.audio_input("Record your audio")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
+
audio_filename = "recorded_audio.wav"
|
|
|
|
|
|
|
| 314 |
|
| 315 |
+
if audio_data:
|
| 316 |
+
st.success("✅ Recording complete! You can now analyze it.")
|
| 317 |
|
| 318 |
+
# Show audio player if available
|
| 319 |
+
if audio_data:
|
| 320 |
+
st.subheader("🎵 Audio Preview")
|
| 321 |
+
st.audio(audio_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
+
# Analyze button
|
| 324 |
+
analyze_btn_tab2 = st.button(
|
| 325 |
+
"🔍 Analyze Audio",
|
| 326 |
+
type="primary",
|
| 327 |
+
width="stretch",
|
| 328 |
+
disabled=(audio_data is None),
|
| 329 |
+
key="analyze_tab2"
|
| 330 |
+
)
|
| 331 |
|
| 332 |
+
# Analysis process
|
| 333 |
+
if analyze_btn_tab2 and audio_data:
|
| 334 |
+
try:
|
| 335 |
+
# Prepare file for upload
|
| 336 |
+
if hasattr(audio_data, 'seek'):
|
| 337 |
+
audio_data.seek(0)
|
| 338 |
+
|
| 339 |
+
files = {'file': (audio_filename, audio_data, 'audio/wav')}
|
| 340 |
+
|
| 341 |
+
# Upload to Flask
|
| 342 |
+
with st.spinner("📤 Uploading audio..."):
|
| 343 |
+
upload_response = requests.post(
|
| 344 |
+
f"{FLASK_URL}/upload",
|
| 345 |
+
files=files
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
if upload_response.status_code == 202:
|
| 349 |
+
job_data = upload_response.json()
|
| 350 |
+
job_id = job_data['job_id']
|
| 351 |
+
|
| 352 |
+
# Poll for status
|
| 353 |
+
progress_bar = st.progress(0)
|
| 354 |
+
status_text = st.empty()
|
| 355 |
+
|
| 356 |
+
import time
|
| 357 |
+
max_attempts = 60
|
| 358 |
+
attempt = 0
|
| 359 |
+
|
| 360 |
+
while attempt < max_attempts:
|
| 361 |
+
status_response = requests.get(f"{FLASK_URL}/status/{job_id}")
|
| 362 |
+
|
| 363 |
+
if status_response.status_code == 200:
|
| 364 |
+
status_data = status_response.json()
|
| 365 |
+
progress = status_data['progress']
|
| 366 |
+
message = status_data['message']
|
| 367 |
+
status = status_data['status']
|
| 368 |
+
|
| 369 |
+
progress_bar.progress(progress / 100)
|
| 370 |
+
status_text.text(f"⚙️ {message} ({progress}%)")
|
| 371 |
+
|
| 372 |
+
if status == "completed":
|
| 373 |
+
st.session_state.tab2_results = status_data['results']
|
| 374 |
+
progress_bar.progress(100)
|
| 375 |
+
status_text.empty()
|
| 376 |
+
st.success("✅ Analysis Complete!")
|
| 377 |
+
break
|
| 378 |
+
|
| 379 |
+
elif status == "failed":
|
| 380 |
+
error_msg = status_data.get('error', 'Unknown error')
|
| 381 |
+
st.error(f"❌ Processing failed: {error_msg}")
|
| 382 |
+
progress_bar.empty()
|
| 383 |
+
status_text.empty()
|
| 384 |
+
break
|
| 385 |
+
|
| 386 |
+
time.sleep(5)
|
| 387 |
+
attempt += 1
|
| 388 |
+
|
| 389 |
+
if attempt >= max_attempts:
|
| 390 |
+
st.error("⏱️ Processing timeout. Please try again.")
|
| 391 |
+
else:
|
| 392 |
+
st.error(f"❌ Upload failed: {upload_response.json().get('error', 'Unknown error')}")
|
| 393 |
+
|
| 394 |
+
except requests.exceptions.ConnectionError:
|
| 395 |
+
st.error("❌ Could not connect to Flask server. Make sure it's running on port 5000!")
|
| 396 |
+
except Exception as e:
|
| 397 |
+
st.error(f"❌ An error occurred: {str(e)}")
|
| 398 |
|
| 399 |
+
# Display results if available
|
| 400 |
+
if st.session_state.tab2_results:
|
| 401 |
+
results = st.session_state.tab2_results
|
|
|
|
|
|
|
|
|
|
| 402 |
|
|
|
|
| 403 |
st.markdown("---")
|
| 404 |
st.subheader("📊 Emotion Analysis Results")
|
| 405 |
|
| 406 |
+
# Emotion emoji mapping
|
| 407 |
emotion_emoji_map = {
|
| 408 |
'Happy': '😊',
|
| 409 |
'Sad': '😢',
|
|
|
|
| 415 |
'Calm': '😌'
|
| 416 |
}
|
| 417 |
|
| 418 |
+
# Convert timeline to DataFrame
|
| 419 |
+
timeline_data = results['timeline']
|
| 420 |
+
sample_data = pd.DataFrame(timeline_data)
|
| 421 |
+
sample_data.rename(columns={'time': 'Time (s)', 'emotion': 'Emotion', 'confidence': 'Confidence'}, inplace=True)
|
|
|
|
|
|
|
| 422 |
|
| 423 |
# Add emoji column
|
| 424 |
sample_data['Emoji'] = sample_data['Emotion'].map(emotion_emoji_map)
|
| 425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
# Metrics
|
| 427 |
+
total_duration = results['duration']
|
| 428 |
+
unique_emotions = results['emotions_detected']
|
| 429 |
+
dominant_emotion = results['dominant_emotion']
|
| 430 |
+
dominant_emoji = emotion_emoji_map.get(dominant_emotion, '❓')
|
| 431 |
+
|
| 432 |
col1, col2, col3 = st.columns(3)
|
| 433 |
with col1:
|
| 434 |
+
st.metric("Audio Duration", total_duration, help="Length of audio")
|
| 435 |
with col2:
|
| 436 |
st.metric("Emotions Detected", unique_emotions, help="Number of unique emotions")
|
| 437 |
with col3:
|
|
|
|
| 445 |
with col1:
|
| 446 |
st.subheader("⏱️ Emotion Timeline")
|
| 447 |
|
| 448 |
+
# Color mapping
|
|
|
|
|
|
|
| 449 |
colors = {
|
| 450 |
'Happy': '#FFD700',
|
| 451 |
'Sad': '#4169E1',
|
|
|
|
| 457 |
'Calm': '#87CEEB'
|
| 458 |
}
|
| 459 |
|
| 460 |
+
# Create bar chart
|
| 461 |
+
bar_colors = [colors.get(emotion, '#808080') for emotion in sample_data['Emotion']]
|
| 462 |
+
bar_text = [emotion_emoji_map.get(emotion, '❓') for emotion in sample_data['Emotion']]
|
| 463 |
+
|
| 464 |
+
fig_timeline = go.Figure()
|
| 465 |
+
fig_timeline.add_trace(go.Bar(
|
| 466 |
+
x=sample_data['Time (s)'],
|
| 467 |
+
y=sample_data['Confidence'],
|
| 468 |
+
marker_color=bar_colors,
|
| 469 |
+
text=bar_text,
|
| 470 |
+
textposition='outside',
|
| 471 |
+
textfont=dict(size=20),
|
| 472 |
+
hovertemplate='<b>%{x}</b><br>Confidence: %{y:.2%}<br><extra></extra>',
|
| 473 |
+
showlegend=False
|
| 474 |
+
))
|
| 475 |
|
| 476 |
fig_timeline.update_layout(
|
| 477 |
xaxis_title="Time",
|
| 478 |
yaxis_title="Confidence",
|
| 479 |
yaxis_range=[0, 1.1],
|
|
|
|
| 480 |
height=400,
|
| 481 |
+
hovermode='x'
|
|
|
|
| 482 |
)
|
| 483 |
|
| 484 |
st.plotly_chart(fig_timeline, width="stretch")
|
|
|
|
| 486 |
with col2:
|
| 487 |
st.subheader("📊 Distribution")
|
| 488 |
|
| 489 |
+
# Pie chart
|
| 490 |
emotion_counts = sample_data['Emotion'].value_counts()
|
| 491 |
|
| 492 |
fig_pie = go.Figure(data=[go.Pie(
|
| 493 |
+
labels=[f"{emotion_emoji_map.get(e, '❓')} {e}" for e in emotion_counts.index],
|
| 494 |
values=emotion_counts.values,
|
| 495 |
+
marker=dict(colors=[colors.get(e, '#808080') for e in emotion_counts.index]),
|
| 496 |
textinfo='percent+label',
|
| 497 |
textfont=dict(size=12),
|
| 498 |
hole=0.3
|