PsalmsJava commited on
Commit
32e24ba
Β·
verified Β·
1 Parent(s): eaf1a0f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Mood Detector - Simple version for Hugging Face
3
+ """
4
+ import gradio as gr
5
+ import numpy as np
6
+ from transformers import pipeline
7
+ import warnings
8
+ warnings.filterwarnings("ignore")
9
+
10
+ # Initialize the emotion detection model
11
+ print("Loading emotion detection model...")
12
+ try:
13
+ # Try the main model first
14
+ pipe = pipeline(
15
+ "audio-classification",
16
+ model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
17
+ )
18
+ except:
19
+ # Fallback model if first fails
20
+ pipe = pipeline(
21
+ "audio-classification",
22
+ model="superb/wav2vec2-base-superb-ers"
23
+ )
24
+
25
+ print("Model loaded successfully!")
26
+
27
+ def analyze_audio(audio):
28
+ """
29
+ Analyze audio and return mood with confidence
30
+ audio: tuple of (sample_rate, audio_data) from Gradio
31
+ """
32
+ if audio is None:
33
+ return "🎀 No audio", "0%", "Please record or upload audio first"
34
+
35
+ try:
36
+ # Get sample rate and audio data
37
+ sample_rate, audio_data = audio
38
+
39
+ # Convert to mono if stereo
40
+ if len(audio_data.shape) > 1:
41
+ audio_data = np.mean(audio_data, axis=0)
42
+
43
+ # Run prediction
44
+ predictions = pipe({
45
+ "raw": audio_data,
46
+ "sampling_rate": sample_rate
47
+ })
48
+
49
+ # Get top result
50
+ top = predictions[0]
51
+ mood = top['label'].upper()
52
+ confidence = f"{top['score']*100:.1f}%"
53
+
54
+ # Mood emoji mapping
55
+ emoji_map = {
56
+ "ANGER": "😠 Anger",
57
+ "DISGUST": "🀒 Disgust",
58
+ "FEAR": "😨 Fear",
59
+ "HAPPY": "😊 Happy",
60
+ "NEUTRAL": "😐 Neutral",
61
+ "SADNESS": "😒 Sad",
62
+ "SURPRISE": "😲 Surprise"
63
+ }
64
+
65
+ mood_display = emoji_map.get(mood, f"🎀 {mood}")
66
+
67
+ # Create details
68
+ details = "All Predictions:\n"
69
+ for i, pred in enumerate(predictions[:5], 1):
70
+ details += f"{i}. {pred['label'].upper()}: {pred['score']*100:.1f}%\n"
71
+
72
+ return mood_display, confidence, details
73
+
74
+ except Exception as e:
75
+ return f"❌ Error", "0%", f"Analysis failed: {str(e)}"
76
+
77
+ # Create Gradio interface
78
+ with gr.Blocks(title="Voice Mood Detector", theme=gr.themes.Soft()) as demo:
79
+ gr.Markdown("# 🎀 Voice Mood Detector")
80
+ gr.Markdown("Record your voice or upload audio to detect emotional state")
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ audio_input = gr.Audio(
85
+ sources=["microphone", "upload"],
86
+ type="numpy",
87
+ label="Speak or Upload Audio",
88
+ waveform_options={"show_controls": True}
89
+ )
90
+ btn = gr.Button("Analyze Mood 🎯", variant="primary")
91
+
92
+ with gr.Column():
93
+ mood_output = gr.Textbox(label="Detected Mood", interactive=False)
94
+ confidence_output = gr.Textbox(label="Confidence", interactive=False)
95
+ details_output = gr.Textbox(
96
+ label="Detailed Results",
97
+ lines=6,
98
+ interactive=False
99
+ )
100
+
101
+ # Instructions
102
+ with gr.Accordion("πŸ“– Instructions", open=False):
103
+ gr.Markdown("""
104
+ **How to use:**
105
+ 1. Click microphone icon and speak for 3-5 seconds
106
+ 2. OR upload an audio file (WAV/MP3)
107
+ 3. Click "Analyze Mood"
108
+ 4. View your emotional state
109
+
110
+ **Tips for best results:**
111
+ - Speak clearly in English
112
+ - Keep background noise minimal
113
+ - Optimal length: 3-5 seconds
114
+ - Use mono audio if possible
115
+ """)
116
+
117
+ # Set up button action
118
+ btn.click(
119
+ fn=analyze_audio,
120
+ inputs=audio_input,
121
+ outputs=[mood_output, confidence_output, details_output]
122
+ )
123
+
124
+ # Launch the app
125
+ if __name__ == "__main__":
126
+ demo.launch(debug=True)