Spaces:

minhaj-ripon
/

Speech_Emotion_Detector

Build error

App Files Files Community

minhaj-ripon commited on Sep 16, 2023

Commit

0d69f41

1 Parent(s): aab2349

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -65

app.py CHANGED Viewed

@@ -6,44 +6,37 @@ from radar_chart import radar_factory
 from keras.models import load_model
 import os
-#import joblib
 import numpy as np
-import soundfile as sf
-#model = joblib.load('Speech_Emotion_Detection_Model.pkl')
 model = load_model(os.path.join("lstm_all_four_complex.h5"))
 def convert_class_to_emotion(pred):
         """
         Method to convert the predictions (int) into human readable strings.
         """
-        label_conversion = {0: 'disgust',
-                        1: 'happy',
-                        2: 'sad',
-                        3: 'fear',
-                        4: 'angry',
-                        5: 'neutral',
-                        6: 'surprised',
-                        7: 'calm'}
-        # Convert the prediction to a string
-        #pred_str = str(pred)
-        # Check if the string exists in the dictionary, if not, return 'Unknown'
-        return label_conversion.get(int(pred))#, 'Unknown')
-        # label_conversion = {'neutral':'neutral',
-        #                     'calm': 'calm',
-        #                     'happy':'happy',
-        #                     'sad':'sad',
-        #                     'angry':'angry',
-        #                     'fearful':'fearful',
-        #                     'disgust':'disgust',
-        #                     'ps': 'surprised'
-        #                    }
-        # pred_str = str(pred)
-        # return label_conversion.get(pred_str)
 def make_predictions(file, micro=None):
@@ -66,56 +59,44 @@ def make_predictions(file, micro=None):
         x = np.expand_dims(x, axis=0)
         predictions = np.argmax(model.predict(x), axis=1)
-        # N = 8
-        # theta = radar_factory(N, frame='polygon')
-        # spoke_labels = np.array(['neutral',
-        #                          'calm',
-        #                          'happy',
-        #                          'sad',
-        #                          'angry',
-        #                          'fearful',
-        #                          'disgust',
-        #                          'surprised'])
-        # fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
-        #                     subplot_kw=dict(projection='radar'))
-        # vec = model.predict(x)[0]
-        # axs.plot(theta, vec, color="b")
-        # axs.fill(theta, vec, alpha=0.3)
-        # axs.set_varlabels(spoke_labels)
-        # fig = plt.figure()
-        # plt.plot(data, alpha=0.8)
-        # plt.xlabel("temps")
-        # plt.ylabel("amplitude")
-        # Get the class probabilities
-        class_probs = model.predict(x)[0]
-        # Convert class index to emotion label
-        emotion = convert_class_to_emotion(predictions[0])
-        emotion_labels = [
-        'disgust','happy', 'sad', 'fear', 'angry', 'neutral', 'surprised', 'calm'
-        ]
-        # Convert class probabilities to a string
-        class_probs_str = ", ".join([f"{label}: {prob:.2f}" for label, prob in zip(emotion_labels, class_probs)])
-        return emotion, class_probs_str#, fig, fig_radar
 # Set the starting state to an empty string
 iface = gr.Interface(
     fn=make_predictions,
-    title="identify emotion of a chunk of audio speech",
     description="a simple interface to perform emotion recognition from an audio file",
     #article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
     inputs=[gr.Audio(source="upload", type="filepath", label="File"),
-        gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")],
     examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
-    outputs=[gr.Textbox(label="Text output")]#, gr.Plot(), gr.Plot()]
     )
 iface.launch(debug=True)

 from keras.models import load_model
 import os
 import numpy as np
 model = load_model(os.path.join("lstm_all_four_complex.h5"))
 def convert_class_to_emotion(pred):
         """
         Method to convert the predictions (int) into human readable strings.
         """
+        label_conversion = {0: 'neutral',
+                            1: 'calm',
+                            2: 'happy',
+                            3: 'sad',
+                            4: 'angry',
+                            5: 'fearful',
+                            6: 'disgust',
+                            7: 'surprised'}
+        # label_conversion = {0: 'very happy',
+        #                     1: 'happy',
+        #                     2: 'very happy',
+        #                     3: 'very unhappy',
+        #                     4: 'very unhappy',
+        #                     5: 'unhappy',
+        #                     6: 'unhappy',
+        #                     7: 'happy'}
+        return label_conversion[int(pred)]
 def make_predictions(file, micro=None):
         x = np.expand_dims(x, axis=0)
         predictions = np.argmax(model.predict(x), axis=1)
+        N = 8
+        theta = radar_factory(N, frame='polygon')
+        spoke_labels = np.array(['neutral',
+                            'calm',
+                            'happy',
+                            'sad',
+                            'angry',
+                            'fearful',
+                            'disgust',
+                            'surprised'])
+        fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
+                            subplot_kw=dict(projection='radar'))
+        vec = model.predict(x)[0]
+        axs.plot(theta, vec, color="b")
+        axs.fill(theta, vec, alpha=0.3)
+        axs.set_varlabels(spoke_labels)
+        fig = plt.figure()
+        plt.plot(data, alpha=0.8)
+        plt.xlabel("temps")
+        plt.ylabel("amplitude")
+        return convert_class_to_emotion(predictions), fig, fig_radar
 # Set the starting state to an empty string
 iface = gr.Interface(
     fn=make_predictions,
+    title="Identify emotion of a chunk of audio speech",
     description="a simple interface to perform emotion recognition from an audio file",
     #article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
     inputs=[gr.Audio(source="upload", type="filepath", label="File"),
+        gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
+    ,
     examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
+    outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
     )
 iface.launch(debug=True)