Spaces:

Dpngtm
/

Audio-Emotion-Recognition

Sleeping

App Files Files Community

Dpngtm commited on Oct 29, 2024

Commit

56606dd

verified ·

1 Parent(s): 0a3c034

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -35

app.py CHANGED Viewed

@@ -18,10 +18,22 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 model.eval()
 def recognize_emotion(audio):
     try:
         if audio is None:
-            return {emotion: 0.0 for emotion in emotion_labels}
         audio_path = audio if isinstance(audio, str) else audio.name
         speech_array, sampling_rate = torchaudio.load(audio_path)
@@ -30,7 +42,7 @@ def recognize_emotion(audio):
         if duration > 60:
             return {
                 "Error": "Audio too long (max 1 minute)",
-                **{emotion: 0.0 for emotion in emotion_labels}
             }
         if sampling_rate != 16000:
@@ -52,7 +64,7 @@ def recognize_emotion(audio):
             probs = F.softmax(logits, dim=-1)[0].cpu().numpy()
             confidence_scores = {
-                emotion: round(float(prob) * 100, 2)
                 for emotion, prob in zip(emotion_labels, probs)
             }
@@ -67,51 +79,42 @@ def recognize_emotion(audio):
     except Exception as e:
         return {
             "Error": str(e),
-            **{emotion: 0.0 for emotion in emotion_labels}
         }
 interface = gr.Interface(
     fn=recognize_emotion,
     inputs=gr.Audio(
         sources=["microphone", "upload"],
         type="filepath",
-        label="Upload audio or record from microphone",
-        max_length=60
     ),
     outputs=gr.Label(
         num_top_classes=len(emotion_labels),
-        label="Emotion Predictions"
     ),
     title="Speech Emotion Recognition",
-    description="""
-    ## Speech Emotion Recognition using Wav2Vec2
-    This model recognizes emotions from speech audio in the following categories:
-    - Angry 😠
-    - Calm 😌
-    - Disgust 🤢
-    - Fearful 😨
-    - Happy 😊
-    - Neutral 😐
-    - Sad 😢
-    - Surprised 😲
-    ### Instructions:
-    1. Upload an audio file or record through the microphone
-    2. Wait for processing
-    3. View predicted emotions with confidence scores
-    ### Notes:
-    - Maximum audio length: 1 minute
-    - Best results with clear speech and minimal background noise
-    - Confidence scores are shown as percentages
     """
 )
-if __name__ == "__main__":
-    interface.launch(
-        share=True,
-        debug=True,
-        server_name="0.0.0.0",
-        server_port=7860
-    )

 model.to(device)
 model.eval()
+# At the top with other global variables
+emotion_icons = {
+    "angry": "😠",
+    "calm": "😌",
+    "disgust": "🤢",
+    "fearful": "😨",
+    "happy": "😊",
+    "neutral": "😐",
+    "sad": "😢",
+    "surprised": "😲"
+}
 def recognize_emotion(audio):
     try:
         if audio is None:
+            return {f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
         audio_path = audio if isinstance(audio, str) else audio.name
         speech_array, sampling_rate = torchaudio.load(audio_path)
         if duration > 60:
             return {
                 "Error": "Audio too long (max 1 minute)",
+                **{f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
             }
         if sampling_rate != 16000:
             probs = F.softmax(logits, dim=-1)[0].cpu().numpy()
             confidence_scores = {
+                f"{emotion} {emotion_icons[emotion]}": int(round(float(prob) * 100))
                 for emotion, prob in zip(emotion_labels, probs)
             }
     except Exception as e:
         return {
             "Error": str(e),
+            **{f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
         }
+# Create a formatted string of supported emotions
+supported_emotions = " | ".join([f"{emotion_icons[emotion]} {emotion}" for emotion in emotion_labels])
 interface = gr.Interface(
     fn=recognize_emotion,
     inputs=gr.Audio(
         sources=["microphone", "upload"],
         type="filepath",
+        label="Record or Upload Audio"
     ),
     outputs=gr.Label(
         num_top_classes=len(emotion_labels),
+        label="Detected Emotion"
     ),
     title="Speech Emotion Recognition",
+    description=f"""
+    ### Supported Emotions:
+    {supported_emotions}
+    Maximum audio length: 1 minute""",
+    theme=gr.themes.Soft(
+        primary_hue="orange",
+        secondary_hue="blue"
+    ),
+    css="""
+        .gradio-container {max-width: 800px}
+        .label {font-size: 18px}
     """
 )
+interface.launch(
+    share=True,
+    debug=True,
+    server_name="0.0.0.0",
+    server_port=7860
+)