temp12821 commited on
Commit
c9132cc
·
1 Parent(s): feaf7eb

feat: voice recording

Browse files
Files changed (2) hide show
  1. audio_processor.py +16 -0
  2. streamlit_app.py +125 -81
audio_processor.py CHANGED
@@ -52,8 +52,24 @@ class AudioEmotionProcessor:
52
  def load_audio(self, filepath):
53
  """Load audio file and resample to target sample rate"""
54
  audio, sr = librosa.load(filepath, sr=self.sample_rate)
 
 
 
 
55
  return audio, sr
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def get_audio_duration(self, audio, sr):
58
  """Get duration of audio in seconds"""
59
  return librosa.get_duration(y=audio, sr=sr)
 
52
  def load_audio(self, filepath):
53
  """Load audio file and resample to target sample rate"""
54
  audio, sr = librosa.load(filepath, sr=self.sample_rate)
55
+
56
+ # Normalize audio volume (boost quiet recordings)
57
+ audio = self.normalize_audio(audio)
58
+
59
  return audio, sr
60
 
61
+ def normalize_audio(self, audio):
62
+ """Normalize audio to increase volume"""
63
+ # Get max absolute value
64
+ max_val = np.max(np.abs(audio))
65
+
66
+ # Avoid division by zero
67
+ if max_val > 0:
68
+ # Normalize to 0.95 to prevent clipping
69
+ audio = audio / max_val * 0.95
70
+
71
+ return audio
72
+
73
  def get_audio_duration(self, audio, sr):
74
  """Get duration of audio in seconds"""
75
  return librosa.get_duration(y=audio, sr=sr)
streamlit_app.py CHANGED
@@ -21,13 +21,13 @@ st.markdown("Analyze emotions from audio files with timeline visualization")
21
  FLASK_URL = os.getenv("FLASK_URL", "http://localhost:5000")
22
 
23
  # Create tabs
24
- tab1, tab2 = st.tabs(["📁 Test File Analysis", "🎙️ Audio Input Analysis"])
25
 
26
  # ============================================
27
- # TAB 1: Test File Analysis
28
  # ============================================
29
  with tab1:
30
- st.header("📁 Test File Analysis")
31
  st.markdown("Upload a pre-recorded audio file for sentiment analysis")
32
 
33
  # File selection option
@@ -300,63 +300,110 @@ with tab1:
300
  # TAB 2: Audio Input Analysis (Live Recording)
301
  # ============================================
302
  with tab2:
303
- st.header("🎙️ Live Audio Input Analysis")
304
- st.markdown("Record audio in real-time for sentiment analysis")
305
 
306
- # Recording controls
307
- col1, col2, col3 = st.columns(3)
 
308
 
309
- with col1:
310
- record_btn = st.button("🔴 Start Recording", type="primary", width="stretch")
311
- with col2:
312
- stop_btn = st.button("⏹️ Stop Recording", width="stretch")
313
- with col3:
314
- analyze_record_btn = st.button("🔍 Analyze Recording", width="stretch")
315
 
316
- # Recording status
317
- if record_btn:
318
- st.warning("🔴 Recording... (This feature will be implemented)")
319
 
320
- if stop_btn:
321
- st.info("⏹️ Recording stopped")
322
 
323
- # Audio input section
324
- st.subheader("🎤 Audio Input Settings")
325
-
326
- col1, col2 = st.columns(2)
327
-
328
- with col1:
329
- sample_rate = st.selectbox(
330
- "Sample Rate",
331
- options=[16000, 22050, 44100, 48000],
332
- index=0,
333
- help="Audio sample rate in Hz"
334
- )
335
 
336
- with col2:
337
- channels = st.selectbox(
338
- "Channels",
339
- options=["Mono", "Stereo"],
340
- index=0,
341
- help="Audio channel configuration"
342
- )
 
343
 
344
- # Recorded audio preview (placeholder)
345
- st.subheader("🎵 Recorded Audio Preview")
346
- st.info("📝 No recording available yet. Click 'Start Recording' to begin.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
- # Analysis results (placeholder)
349
- if analyze_record_btn:
350
- with st.spinner("🔄 Analyzing recorded audio..."):
351
- st.info("⚙️ Processing audio through Flask API...")
352
-
353
- st.success("✅ Analysis Complete!")
354
 
355
- # Results layout
356
  st.markdown("---")
357
  st.subheader("📊 Emotion Analysis Results")
358
 
359
- # Emotion emoji mapping (supports all emotions)
360
  emotion_emoji_map = {
361
  'Happy': '😊',
362
  'Sad': '😢',
@@ -368,26 +415,23 @@ with tab2:
368
  'Calm': '😌'
369
  }
370
 
371
- # Sample data for recorded audio
372
- sample_data = pd.DataFrame({
373
- 'Time (s)': ['00:00', '00:08', '00:15', '00:22', '00:28'],
374
- 'Emotion': ['Neutral', 'Happy', 'Neutral', 'Sad', 'Neutral'],
375
- 'Confidence': [0.88, 0.85, 0.90, 0.72, 0.87]
376
- })
377
 
378
  # Add emoji column
379
  sample_data['Emoji'] = sample_data['Emotion'].map(emotion_emoji_map)
380
 
381
- # Calculate metrics
382
- total_duration = "00:30"
383
- unique_emotions = sample_data['Emotion'].nunique()
384
- dominant_emotion = sample_data['Emotion'].mode()[0]
385
- dominant_emoji = emotion_emoji_map[dominant_emotion]
386
-
387
  # Metrics
 
 
 
 
 
388
  col1, col2, col3 = st.columns(3)
389
  with col1:
390
- st.metric("Recording Duration", total_duration, help="Length of recording")
391
  with col2:
392
  st.metric("Emotions Detected", unique_emotions, help="Number of unique emotions")
393
  with col3:
@@ -401,9 +445,7 @@ with tab2:
401
  with col1:
402
  st.subheader("⏱️ Emotion Timeline")
403
 
404
- # Bar chart with emojis
405
- fig_timeline = go.Figure()
406
-
407
  colors = {
408
  'Happy': '#FFD700',
409
  'Sad': '#4169E1',
@@ -415,26 +457,28 @@ with tab2:
415
  'Calm': '#87CEEB'
416
  }
417
 
418
- for emotion in sample_data['Emotion'].unique():
419
- emotion_data = sample_data[sample_data['Emotion'] == emotion]
420
- fig_timeline.add_trace(go.Bar(
421
- x=emotion_data['Time (s)'],
422
- y=emotion_data['Confidence'],
423
- name=f"{emotion_emoji_map[emotion]} {emotion}",
424
- marker_color=colors[emotion],
425
- text=[emotion_emoji_map[emotion]] * len(emotion_data),
426
- textposition='outside',
427
- textfont=dict(size=20)
428
- ))
 
 
 
 
429
 
430
  fig_timeline.update_layout(
431
  xaxis_title="Time",
432
  yaxis_title="Confidence",
433
  yaxis_range=[0, 1.1],
434
- barmode='group',
435
  height=400,
436
- showlegend=True,
437
- hovermode='x unified'
438
  )
439
 
440
  st.plotly_chart(fig_timeline, width="stretch")
@@ -442,13 +486,13 @@ with tab2:
442
  with col2:
443
  st.subheader("📊 Distribution")
444
 
445
- # Pie chart for emotion distribution
446
  emotion_counts = sample_data['Emotion'].value_counts()
447
 
448
  fig_pie = go.Figure(data=[go.Pie(
449
- labels=[f"{emotion_emoji_map[e]} {e}" for e in emotion_counts.index],
450
  values=emotion_counts.values,
451
- marker=dict(colors=[colors[e] for e in emotion_counts.index]),
452
  textinfo='percent+label',
453
  textfont=dict(size=12),
454
  hole=0.3
 
21
  FLASK_URL = os.getenv("FLASK_URL", "http://localhost:5000")
22
 
23
  # Create tabs
24
+ tab1, tab2 = st.tabs(["📁 File Analysis", "🎙️ Audio Recording"])
25
 
26
  # ============================================
27
+ # TAB 1: File Analysis
28
  # ============================================
29
  with tab1:
30
+ st.header("📁 File Analysis")
31
  st.markdown("Upload a pre-recorded audio file for sentiment analysis")
32
 
33
  # File selection option
 
300
  # TAB 2: Audio Input Analysis (Live Recording)
301
  # ============================================
302
  with tab2:
303
+ st.header("🎙️ Audio Recording Analysis")
304
+ st.markdown("Record audio from your microphone for real-time sentiment analysis")
305
 
306
+ # Initialize session state for Tab 2
307
+ if 'tab2_results' not in st.session_state:
308
+ st.session_state.tab2_results = None
309
 
310
+ # Audio recorder widget
311
+ audio_data = st.audio_input("Record your audio")
 
 
 
 
312
 
313
+ audio_filename = "recorded_audio.wav"
 
 
314
 
315
+ if audio_data:
316
+ st.success(" Recording complete! You can now analyze it.")
317
 
318
+ # Show audio player if available
319
+ if audio_data:
320
+ st.subheader("🎵 Audio Preview")
321
+ st.audio(audio_data)
 
 
 
 
 
 
 
 
322
 
323
+ # Analyze button
324
+ analyze_btn_tab2 = st.button(
325
+ "🔍 Analyze Audio",
326
+ type="primary",
327
+ width="stretch",
328
+ disabled=(audio_data is None),
329
+ key="analyze_tab2"
330
+ )
331
 
332
+ # Analysis process
333
+ if analyze_btn_tab2 and audio_data:
334
+ try:
335
+ # Prepare file for upload
336
+ if hasattr(audio_data, 'seek'):
337
+ audio_data.seek(0)
338
+
339
+ files = {'file': (audio_filename, audio_data, 'audio/wav')}
340
+
341
+ # Upload to Flask
342
+ with st.spinner("📤 Uploading audio..."):
343
+ upload_response = requests.post(
344
+ f"{FLASK_URL}/upload",
345
+ files=files
346
+ )
347
+
348
+ if upload_response.status_code == 202:
349
+ job_data = upload_response.json()
350
+ job_id = job_data['job_id']
351
+
352
+ # Poll for status
353
+ progress_bar = st.progress(0)
354
+ status_text = st.empty()
355
+
356
+ import time
357
+ max_attempts = 60
358
+ attempt = 0
359
+
360
+ while attempt < max_attempts:
361
+ status_response = requests.get(f"{FLASK_URL}/status/{job_id}")
362
+
363
+ if status_response.status_code == 200:
364
+ status_data = status_response.json()
365
+ progress = status_data['progress']
366
+ message = status_data['message']
367
+ status = status_data['status']
368
+
369
+ progress_bar.progress(progress / 100)
370
+ status_text.text(f"⚙️ {message} ({progress}%)")
371
+
372
+ if status == "completed":
373
+ st.session_state.tab2_results = status_data['results']
374
+ progress_bar.progress(100)
375
+ status_text.empty()
376
+ st.success("✅ Analysis Complete!")
377
+ break
378
+
379
+ elif status == "failed":
380
+ error_msg = status_data.get('error', 'Unknown error')
381
+ st.error(f"❌ Processing failed: {error_msg}")
382
+ progress_bar.empty()
383
+ status_text.empty()
384
+ break
385
+
386
+ time.sleep(5)
387
+ attempt += 1
388
+
389
+ if attempt >= max_attempts:
390
+ st.error("⏱️ Processing timeout. Please try again.")
391
+ else:
392
+ st.error(f"❌ Upload failed: {upload_response.json().get('error', 'Unknown error')}")
393
+
394
+ except requests.exceptions.ConnectionError:
395
+ st.error("❌ Could not connect to Flask server. Make sure it's running on port 5000!")
396
+ except Exception as e:
397
+ st.error(f"❌ An error occurred: {str(e)}")
398
 
399
+ # Display results if available
400
+ if st.session_state.tab2_results:
401
+ results = st.session_state.tab2_results
 
 
 
402
 
 
403
  st.markdown("---")
404
  st.subheader("📊 Emotion Analysis Results")
405
 
406
+ # Emotion emoji mapping
407
  emotion_emoji_map = {
408
  'Happy': '😊',
409
  'Sad': '😢',
 
415
  'Calm': '😌'
416
  }
417
 
418
+ # Convert timeline to DataFrame
419
+ timeline_data = results['timeline']
420
+ sample_data = pd.DataFrame(timeline_data)
421
+ sample_data.rename(columns={'time': 'Time (s)', 'emotion': 'Emotion', 'confidence': 'Confidence'}, inplace=True)
 
 
422
 
423
  # Add emoji column
424
  sample_data['Emoji'] = sample_data['Emotion'].map(emotion_emoji_map)
425
 
 
 
 
 
 
 
426
  # Metrics
427
+ total_duration = results['duration']
428
+ unique_emotions = results['emotions_detected']
429
+ dominant_emotion = results['dominant_emotion']
430
+ dominant_emoji = emotion_emoji_map.get(dominant_emotion, '❓')
431
+
432
  col1, col2, col3 = st.columns(3)
433
  with col1:
434
+ st.metric("Audio Duration", total_duration, help="Length of audio")
435
  with col2:
436
  st.metric("Emotions Detected", unique_emotions, help="Number of unique emotions")
437
  with col3:
 
445
  with col1:
446
  st.subheader("⏱️ Emotion Timeline")
447
 
448
+ # Color mapping
 
 
449
  colors = {
450
  'Happy': '#FFD700',
451
  'Sad': '#4169E1',
 
457
  'Calm': '#87CEEB'
458
  }
459
 
460
+ # Create bar chart
461
+ bar_colors = [colors.get(emotion, '#808080') for emotion in sample_data['Emotion']]
462
+ bar_text = [emotion_emoji_map.get(emotion, '❓') for emotion in sample_data['Emotion']]
463
+
464
+ fig_timeline = go.Figure()
465
+ fig_timeline.add_trace(go.Bar(
466
+ x=sample_data['Time (s)'],
467
+ y=sample_data['Confidence'],
468
+ marker_color=bar_colors,
469
+ text=bar_text,
470
+ textposition='outside',
471
+ textfont=dict(size=20),
472
+ hovertemplate='<b>%{x}</b><br>Confidence: %{y:.2%}<br><extra></extra>',
473
+ showlegend=False
474
+ ))
475
 
476
  fig_timeline.update_layout(
477
  xaxis_title="Time",
478
  yaxis_title="Confidence",
479
  yaxis_range=[0, 1.1],
 
480
  height=400,
481
+ hovermode='x'
 
482
  )
483
 
484
  st.plotly_chart(fig_timeline, width="stretch")
 
486
  with col2:
487
  st.subheader("📊 Distribution")
488
 
489
+ # Pie chart
490
  emotion_counts = sample_data['Emotion'].value_counts()
491
 
492
  fig_pie = go.Figure(data=[go.Pie(
493
+ labels=[f"{emotion_emoji_map.get(e, '❓')} {e}" for e in emotion_counts.index],
494
  values=emotion_counts.values,
495
+ marker=dict(colors=[colors.get(e, '#808080') for e in emotion_counts.index]),
496
  textinfo='percent+label',
497
  textfont=dict(size=12),
498
  hole=0.3