Spaces:

Chatanya
/

emotion-detection-streamlit-app

Build error

App Files Files Community

emotion-detection-streamlit-app / app.py

Chatanya

Create app.py

1348175 verified 9 months ago

raw

history blame contribute delete

5.39 kB

	'''
	Outline:
	- Create animation: animate charts (potentially using streamlit)
	'''
	import librosa
	import streamlit as st
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import pickle
	import keras
	import tensorflow
	import matplotlib.animation as animation

	model_path = "model_simple.sav" #Defines the path to the model file

	emotion_map = {
	'Disgust': 0,
	'Happiness': 1,
	'Saddness': 2,
	'Neutral': 3,
	'Fear': 4,
	'Anger': 5,
	'Surprise': 6
	} #Maps emotions to integers: taken from data preprocessing

	reversed_emotion_map = {value:key for key, value in emotion_map.items()}
	#Reverses emotion mapping such that integers can be mapped into emotions

	#Uses librosa to load the inputted audio file as a list of frequency values
	@st.cache_data
	def process_audio(input_file):
	st.audio(input_file) #Creates an audio player within the streamlit app
	audio_signal, sample_rate = librosa.load(input_file)
	return audio_signal, sample_rate

	#Creates a line chart displaying the audio frequency using librosa
	def display_spectrum_animation(audio_signal, sample_rate):
	S = np.abs(librosa.stft(audio_signal))
	frequencies = librosa.fft_frequencies(sr=sample_rate)

	fig, ax = plt.subplots()

	def update_spectrum(num, S, ax):
	ax.clear()
	ax.plot(frequencies, S[:, num])
	ax.set_xlabel("Frequency (Hz)")
	ax.set_ylabel("Amplitude")

	ani = animation.FuncAnimation(fig, update_spectrum, frames=S.shape[1], fargs=[S, ax], blit=False)
	ani.save("spectrum_animation.gif", writer="imagemagick")
	st.image("spectrum_animation.gif")


	@st.cache_data
	def display_frequency(audio_signal, sample_rate):
	frequency_plot = librosa.display.waveshow(audio_signal, sr = sample_rate)
	st.pyplot(plt.gcf())

	#Creates and displays a mel spectrogram using librosa
	@st.cache_data
	def display_mel_spectogram(audio_signal, sample_rate):
	fig, ax = plt.subplots()
	audio_time = audio_signal.shape[0]/sample_rate
	D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_signal)), ref = np.max)

	amt_to_add = int(D.shape[-1]/audio_time)

	specshow = librosa.display.specshow(D, sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)

	def update_spectrogram (num, D, ax, plus):
	ax.clear()
	librosa.display.specshow(D[:, :num + plus], sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)

	ani = animation.FuncAnimation(fig, update_spectrogram, frames = np.arange(1, D.shape[1]), fargs = [D, ax, amt_to_add], blit = False)
	ani.save("spectrogram_animation.gif", writer = "imagemagick")
	st.image("spectrogram_animation.gif")

	#Creates the interface allowing users to select which plot they want displayed
	def create_selections(audio_signal, sample_rate):
	chart_options = ["Spectrum", "Mel-Spectogram"] #Graph titles go here
	functions = [display_spectrum_animation, display_mel_spectogram] #Graphing functions go here
	chart_selector = st.radio(
	label = "",
	options = chart_options,
	horizontal = True
	)
	selection_index = chart_options.index(chart_selector)
	functions[selection_index](audio_signal, sample_rate)

	#Helper function to force the length of a given frequency array into a specific length
	#Currently, this length is hard-coded at 66,150 though that may change in the future
	@st.cache_data
	def standardize_waveform_length(waveform):
	audio_length = 66150
	if len(waveform) > audio_length:
	waveform = waveform[:audio_length]
	else:
	waveform = np.pad(waveform, (0, max(0, audio_length - len(waveform))), "constant")
	return waveform

	#Takes in a given audio signal and returns its mel-frequency cepstral coefficients
	@st.cache_data
	def preprocess_audio_for_prediction(audio_signal, sample_rate):
	waveform = standardize_waveform_length(waveform = audio_signal)
	mfcc = librosa.feature.mfcc(y = waveform, sr = sample_rate, n_mels = 128)
	mfcc = mfcc.reshape(-1)
	return mfcc

	#Loads the model given in model_path and returns a Keras Sequential model
	@st.cache_data
	def load_model(model_path):
	model = pickle.load(open(model_path, "rb"))
	return model

	#Uses the model to predict the speaker's emotion in the given audio clip
	@st.cache_data
	def get_emotion_prediction(mfcc):
	model = load_model(model_path)
	prediction = model.predict(mfcc[None])
	predicted_index = np.argmax(prediction)
	emotion = reversed_emotion_map[predicted_index]
	return emotion

	#Combines all model functions and displays the model output as a subheader
	@st.cache_data
	def display_prediction(audio_signal, sample_rate):
	mfcc = preprocess_audio_for_prediction(audio_signal, sample_rate)
	prediction = get_emotion_prediction(mfcc)
	st.subheader("Predicted Emotion: " + prediction, divider = True)

	#Defines the entire process of inputting audio, displaying the model's predictions, and displaying graphs
	def run(input_file):
	audio_signal, sample_rate = process_audio(input_file)
	display_prediction(audio_signal, sample_rate)
	create_selections(audio_signal, sample_rate)

	#Creates an input area to upload the file
	def main():
	st.header("Upload your file here")
	file_uploader = st.file_uploader("", type = "wav")
	if file_uploader is not None:
	run(file_uploader)

	if __name__ == "__main__":
	main()