minhaj-ripon commited on
Commit
0d69f41
·
1 Parent(s): aab2349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -65
app.py CHANGED
@@ -6,44 +6,37 @@ from radar_chart import radar_factory
6
 
7
  from keras.models import load_model
8
  import os
9
- #import joblib
10
  import numpy as np
11
- import soundfile as sf
12
 
13
 
14
- #model = joblib.load('Speech_Emotion_Detection_Model.pkl')
15
  model = load_model(os.path.join("lstm_all_four_complex.h5"))
16
 
 
17
  def convert_class_to_emotion(pred):
18
  """
19
  Method to convert the predictions (int) into human readable strings.
20
  """
21
 
22
- label_conversion = {0: 'disgust',
23
- 1: 'happy',
24
- 2: 'sad',
25
- 3: 'fear',
26
- 4: 'angry',
27
- 5: 'neutral',
28
- 6: 'surprised',
29
- 7: 'calm'}
30
- # Convert the prediction to a string
31
- #pred_str = str(pred)
32
-
33
- # Check if the string exists in the dictionary, if not, return 'Unknown'
34
- return label_conversion.get(int(pred))#, 'Unknown')
35
-
36
- # label_conversion = {'neutral':'neutral',
37
- # 'calm': 'calm',
38
- # 'happy':'happy',
39
- # 'sad':'sad',
40
- # 'angry':'angry',
41
- # 'fearful':'fearful',
42
- # 'disgust':'disgust',
43
- # 'ps': 'surprised'
44
- # }
45
- # pred_str = str(pred)
46
- # return label_conversion.get(pred_str)
47
 
48
 
49
  def make_predictions(file, micro=None):
@@ -66,56 +59,44 @@ def make_predictions(file, micro=None):
66
  x = np.expand_dims(x, axis=0)
67
  predictions = np.argmax(model.predict(x), axis=1)
68
 
69
- # N = 8
70
- # theta = radar_factory(N, frame='polygon')
71
- # spoke_labels = np.array(['neutral',
72
- # 'calm',
73
- # 'happy',
74
- # 'sad',
75
- # 'angry',
76
- # 'fearful',
77
- # 'disgust',
78
- # 'surprised'])
79
- # fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
80
- # subplot_kw=dict(projection='radar'))
81
- # vec = model.predict(x)[0]
82
- # axs.plot(theta, vec, color="b")
83
- # axs.fill(theta, vec, alpha=0.3)
84
-
85
- # axs.set_varlabels(spoke_labels)
86
-
87
- # fig = plt.figure()
88
- # plt.plot(data, alpha=0.8)
89
- # plt.xlabel("temps")
90
- # plt.ylabel("amplitude")
91
-
92
- # Get the class probabilities
93
- class_probs = model.predict(x)[0]
94
-
95
- # Convert class index to emotion label
96
- emotion = convert_class_to_emotion(predictions[0])
97
 
98
- emotion_labels = [
99
- 'disgust','happy', 'sad', 'fear', 'angry', 'neutral', 'surprised', 'calm'
100
- ]
101
 
 
 
 
 
102
 
103
- # Convert class probabilities to a string
104
- class_probs_str = ", ".join([f"{label}: {prob:.2f}" for label, prob in zip(emotion_labels, class_probs)])
105
 
106
- return emotion, class_probs_str#, fig, fig_radar
107
 
108
 
109
 
110
  # Set the starting state to an empty string
111
  iface = gr.Interface(
112
  fn=make_predictions,
113
- title="identify emotion of a chunk of audio speech",
114
  description="a simple interface to perform emotion recognition from an audio file",
115
  #article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
116
  inputs=[gr.Audio(source="upload", type="filepath", label="File"),
117
- gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")],
 
118
  examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
119
- outputs=[gr.Textbox(label="Text output")]#, gr.Plot(), gr.Plot()]
120
  )
121
  iface.launch(debug=True)
 
6
 
7
  from keras.models import load_model
8
  import os
 
9
  import numpy as np
 
10
 
11
 
12
+
13
  model = load_model(os.path.join("lstm_all_four_complex.h5"))
14
 
15
+
16
  def convert_class_to_emotion(pred):
17
  """
18
  Method to convert the predictions (int) into human readable strings.
19
  """
20
 
21
+ label_conversion = {0: 'neutral',
22
+ 1: 'calm',
23
+ 2: 'happy',
24
+ 3: 'sad',
25
+ 4: 'angry',
26
+ 5: 'fearful',
27
+ 6: 'disgust',
28
+ 7: 'surprised'}
29
+
30
+ # label_conversion = {0: 'very happy',
31
+ # 1: 'happy',
32
+ # 2: 'very happy',
33
+ # 3: 'very unhappy',
34
+ # 4: 'very unhappy',
35
+ # 5: 'unhappy',
36
+ # 6: 'unhappy',
37
+ # 7: 'happy'}
38
+
39
+ return label_conversion[int(pred)]
 
 
 
 
 
 
40
 
41
 
42
  def make_predictions(file, micro=None):
 
59
  x = np.expand_dims(x, axis=0)
60
  predictions = np.argmax(model.predict(x), axis=1)
61
 
62
+ N = 8
63
+ theta = radar_factory(N, frame='polygon')
64
+ spoke_labels = np.array(['neutral',
65
+ 'calm',
66
+ 'happy',
67
+ 'sad',
68
+ 'angry',
69
+ 'fearful',
70
+ 'disgust',
71
+ 'surprised'])
72
+ fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
73
+ subplot_kw=dict(projection='radar'))
74
+ vec = model.predict(x)[0]
75
+ axs.plot(theta, vec, color="b")
76
+ axs.fill(theta, vec, alpha=0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ axs.set_varlabels(spoke_labels)
 
 
79
 
80
+ fig = plt.figure()
81
+ plt.plot(data, alpha=0.8)
82
+ plt.xlabel("temps")
83
+ plt.ylabel("amplitude")
84
 
 
 
85
 
86
+ return convert_class_to_emotion(predictions), fig, fig_radar
87
 
88
 
89
 
90
  # Set the starting state to an empty string
91
  iface = gr.Interface(
92
  fn=make_predictions,
93
+ title="Identify emotion of a chunk of audio speech",
94
  description="a simple interface to perform emotion recognition from an audio file",
95
  #article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
96
  inputs=[gr.Audio(source="upload", type="filepath", label="File"),
97
+ gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
98
+ ,
99
  examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
100
+ outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
101
  )
102
  iface.launch(debug=True)