Dpngtm commited on
Commit
56606dd
Β·
verified Β·
1 Parent(s): 0a3c034

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -35
app.py CHANGED
@@ -18,10 +18,22 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
18
  model.to(device)
19
  model.eval()
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def recognize_emotion(audio):
22
  try:
23
  if audio is None:
24
- return {emotion: 0.0 for emotion in emotion_labels}
25
 
26
  audio_path = audio if isinstance(audio, str) else audio.name
27
  speech_array, sampling_rate = torchaudio.load(audio_path)
@@ -30,7 +42,7 @@ def recognize_emotion(audio):
30
  if duration > 60:
31
  return {
32
  "Error": "Audio too long (max 1 minute)",
33
- **{emotion: 0.0 for emotion in emotion_labels}
34
  }
35
 
36
  if sampling_rate != 16000:
@@ -52,7 +64,7 @@ def recognize_emotion(audio):
52
  probs = F.softmax(logits, dim=-1)[0].cpu().numpy()
53
 
54
  confidence_scores = {
55
- emotion: round(float(prob) * 100, 2)
56
  for emotion, prob in zip(emotion_labels, probs)
57
  }
58
 
@@ -67,51 +79,42 @@ def recognize_emotion(audio):
67
  except Exception as e:
68
  return {
69
  "Error": str(e),
70
- **{emotion: 0.0 for emotion in emotion_labels}
71
  }
72
 
 
 
 
73
  interface = gr.Interface(
74
  fn=recognize_emotion,
75
  inputs=gr.Audio(
76
  sources=["microphone", "upload"],
77
  type="filepath",
78
- label="Upload audio or record from microphone",
79
- max_length=60
80
  ),
81
  outputs=gr.Label(
82
  num_top_classes=len(emotion_labels),
83
- label="Emotion Predictions"
84
  ),
85
  title="Speech Emotion Recognition",
86
- description="""
87
- ## Speech Emotion Recognition using Wav2Vec2
88
-
89
- This model recognizes emotions from speech audio in the following categories:
90
- - Angry 😠
91
- - Calm 😌
92
- - Disgust 🀒
93
- - Fearful 😨
94
- - Happy 😊
95
- - Neutral 😐
96
- - Sad 😒
97
- - Surprised 😲
98
 
99
- ### Instructions:
100
- 1. Upload an audio file or record through the microphone
101
- 2. Wait for processing
102
- 3. View predicted emotions with confidence scores
103
-
104
- ### Notes:
105
- - Maximum audio length: 1 minute
106
- - Best results with clear speech and minimal background noise
107
- - Confidence scores are shown as percentages
108
  """
109
  )
110
 
111
- if __name__ == "__main__":
112
- interface.launch(
113
- share=True,
114
- debug=True,
115
- server_name="0.0.0.0",
116
- server_port=7860
117
- )
 
18
  model.to(device)
19
  model.eval()
20
 
21
+ # At the top with other global variables
22
+ emotion_icons = {
23
+ "angry": "😠",
24
+ "calm": "😌",
25
+ "disgust": "🀒",
26
+ "fearful": "😨",
27
+ "happy": "😊",
28
+ "neutral": "😐",
29
+ "sad": "😒",
30
+ "surprised": "😲"
31
+ }
32
+
33
  def recognize_emotion(audio):
34
  try:
35
  if audio is None:
36
+ return {f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
37
 
38
  audio_path = audio if isinstance(audio, str) else audio.name
39
  speech_array, sampling_rate = torchaudio.load(audio_path)
 
42
  if duration > 60:
43
  return {
44
  "Error": "Audio too long (max 1 minute)",
45
+ **{f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
46
  }
47
 
48
  if sampling_rate != 16000:
 
64
  probs = F.softmax(logits, dim=-1)[0].cpu().numpy()
65
 
66
  confidence_scores = {
67
+ f"{emotion} {emotion_icons[emotion]}": int(round(float(prob) * 100))
68
  for emotion, prob in zip(emotion_labels, probs)
69
  }
70
 
 
79
  except Exception as e:
80
  return {
81
  "Error": str(e),
82
+ **{f"{emotion} {emotion_icons[emotion]}": 0 for emotion in emotion_labels}
83
  }
84
 
85
+ # Create a formatted string of supported emotions
86
+ supported_emotions = " | ".join([f"{emotion_icons[emotion]} {emotion}" for emotion in emotion_labels])
87
+
88
  interface = gr.Interface(
89
  fn=recognize_emotion,
90
  inputs=gr.Audio(
91
  sources=["microphone", "upload"],
92
  type="filepath",
93
+ label="Record or Upload Audio"
 
94
  ),
95
  outputs=gr.Label(
96
  num_top_classes=len(emotion_labels),
97
+ label="Detected Emotion"
98
  ),
99
  title="Speech Emotion Recognition",
100
+ description=f"""
101
+ ### Supported Emotions:
102
+ {supported_emotions}
 
 
 
 
 
 
 
 
 
103
 
104
+ Maximum audio length: 1 minute""",
105
+ theme=gr.themes.Soft(
106
+ primary_hue="orange",
107
+ secondary_hue="blue"
108
+ ),
109
+ css="""
110
+ .gradio-container {max-width: 800px}
111
+ .label {font-size: 18px}
 
112
  """
113
  )
114
 
115
+ interface.launch(
116
+ share=True,
117
+ debug=True,
118
+ server_name="0.0.0.0",
119
+ server_port=7860
120
+ )