Spaces:
Runtime error
Runtime error
File size: 2,497 Bytes
4eecd2e 61502a4 4eecd2e 61502a4 4eecd2e 61502a4 0f36c84 2ea00df 0f36c84 2ea00df 0f36c84 4eecd2e 7c04964 4eecd2e 7c04964 0f36c84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# AUTOGENERATED! DO NOT EDIT!
# %% auto 0
__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio']
from fastai.vision.all import *
import gradio as gr
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import os
def extract_emotion(file_name: str) -> str:
"""
Given the name of the file, return the label
indicating the emotion associated with the audio.
"""
# Split the filename at each underscore
parts = file_name.split('_')
# Label is after second
label_with_extension = parts[-1]
# Remove the extension to get only the label
label = label_with_extension[:-4]
return label
def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1])
# Load Learner
learn = load_learner("emotion_model.pkl")
categories = learn.dls.vocab
def classify_audio(audio_file):
"""
Takes the audio file and returns its
prediction of emotions along with probabilities.
"""
# Load the audio file
sample, sample_rate = librosa.load(audio_file, sr=None, duration=20)
# Create spectogram
S = librosa.feature.melspectrogram(y=sample, sr=sample_rate)
S_DB = librosa.power_to_db(S, ref=np.max)
# Prepare the figure for saving the spectrogram
fig, ax = plt.subplots()
fig.tight_layout(pad=0)
# Create the spectogram image
img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time',
y_axis='mel', ax=ax)
# Turn off the axis for saving
plt.axis('off')
# Save the spectogram temporarily
temp_img_path = Path("temp_spectogram.png")
plt.savefig(temp_img_path)
pred,idx, probs = learn.predict(temp_img_path)
# Remove the temporary spectogram image
os.remove(temp_img_path)
return dict(zip(categories, map(float, probs)))
description = """
## Welcome to the app that recognizes emotion from the audio! Upload/record your audio (no more than 20 seconds) and see the model prediction of the emotions.
## 7 Emotions the app recognizes: Anger, Disgust, Fear, Happiness, Pleasant Surprise (ps), Sadness, Neutral
For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector)
"""
audio = gr.Audio(type="filepath", label="Upload Audio")
label = gr.Label()
# Gradio Interface
inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=description)
inf.launch(share=True)
|