File size: 2,497 Bytes
4eecd2e
 
 
 
 
 
61502a4
4eecd2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61502a4
4eecd2e
61502a4
0f36c84
2ea00df
0f36c84
2ea00df
0f36c84
 
 
 
 
4eecd2e
7c04964
4eecd2e
7c04964
0f36c84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# AUTOGENERATED! DO NOT EDIT! 

# %% auto 0
__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio']

from fastai.vision.all import *
import gradio as gr
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import os

def extract_emotion(file_name: str) -> str:
    """
    Given the name of the file, return the label
    indicating the emotion associated with the audio.
    """
    # Split the filename at each underscore
    parts = file_name.split('_')
    # Label is after second
    label_with_extension = parts[-1]
    # Remove the extension to get only the label
    label = label_with_extension[:-4]
    return label

def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1])

# Load Learner
learn = load_learner("emotion_model.pkl")
categories = learn.dls.vocab

def classify_audio(audio_file):
  """
  Takes the audio file and returns its
  prediction of emotions along with probabilities.
  """
  # Load the audio file
  sample, sample_rate = librosa.load(audio_file, sr=None, duration=20)
  # Create spectogram
  S = librosa.feature.melspectrogram(y=sample, sr=sample_rate)
  S_DB = librosa.power_to_db(S, ref=np.max)
  # Prepare the figure for saving the spectrogram
  fig, ax = plt.subplots()
  fig.tight_layout(pad=0)
  # Create the spectogram image
  img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time',
                                 y_axis='mel', ax=ax)
  # Turn off the axis for saving
  plt.axis('off')
  # Save the spectogram temporarily
  temp_img_path = Path("temp_spectogram.png")
  plt.savefig(temp_img_path)

  pred,idx, probs = learn.predict(temp_img_path)

  # Remove the temporary spectogram image
  os.remove(temp_img_path)

  return dict(zip(categories, map(float, probs)))

description = """
## Welcome to the app that recognizes emotion from the audio! Upload/record your audio (no more than 20 seconds) and see the model prediction of the emotions.

## 7 Emotions the app recognizes: Anger, Disgust, Fear, Happiness, Pleasant Surprise (ps), Sadness, Neutral

For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector)
"""

audio = gr.Audio(type="filepath", label="Upload Audio")
label = gr.Label()

# Gradio Interface
inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=description)
inf.launch(share=True)