Spaces:
Build error
Build error
| ''' | |
| Outline: | |
| - Create animation: animate charts (potentially using streamlit) | |
| ''' | |
| import librosa | |
| import streamlit as st | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import pickle | |
| import keras | |
| import tensorflow | |
| import matplotlib.animation as animation | |
| model_path = "model_simple.sav" #Defines the path to the model file | |
| emotion_map = { | |
| 'Disgust': 0, | |
| 'Happiness': 1, | |
| 'Saddness': 2, | |
| 'Neutral': 3, | |
| 'Fear': 4, | |
| 'Anger': 5, | |
| 'Surprise': 6 | |
| } #Maps emotions to integers: taken from data preprocessing | |
| reversed_emotion_map = {value:key for key, value in emotion_map.items()} | |
| #Reverses emotion mapping such that integers can be mapped into emotions | |
| #Uses librosa to load the inputted audio file as a list of frequency values | |
| def process_audio(input_file): | |
| st.audio(input_file) #Creates an audio player within the streamlit app | |
| audio_signal, sample_rate = librosa.load(input_file) | |
| return audio_signal, sample_rate | |
| #Creates a line chart displaying the audio frequency using librosa | |
| def display_spectrum_animation(audio_signal, sample_rate): | |
| S = np.abs(librosa.stft(audio_signal)) | |
| frequencies = librosa.fft_frequencies(sr=sample_rate) | |
| fig, ax = plt.subplots() | |
| def update_spectrum(num, S, ax): | |
| ax.clear() | |
| ax.plot(frequencies, S[:, num]) | |
| ax.set_xlabel("Frequency (Hz)") | |
| ax.set_ylabel("Amplitude") | |
| ani = animation.FuncAnimation(fig, update_spectrum, frames=S.shape[1], fargs=[S, ax], blit=False) | |
| ani.save("spectrum_animation.gif", writer="imagemagick") | |
| st.image("spectrum_animation.gif") | |
| def display_frequency(audio_signal, sample_rate): | |
| frequency_plot = librosa.display.waveshow(audio_signal, sr = sample_rate) | |
| st.pyplot(plt.gcf()) | |
| #Creates and displays a mel spectrogram using librosa | |
| def display_mel_spectogram(audio_signal, sample_rate): | |
| fig, ax = plt.subplots() | |
| audio_time = audio_signal.shape[0]/sample_rate | |
| D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_signal)), ref = np.max) | |
| amt_to_add = int(D.shape[-1]/audio_time) | |
| specshow = librosa.display.specshow(D, sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax) | |
| def update_spectrogram (num, D, ax, plus): | |
| ax.clear() | |
| librosa.display.specshow(D[:, :num + plus], sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax) | |
| ani = animation.FuncAnimation(fig, update_spectrogram, frames = np.arange(1, D.shape[1]), fargs = [D, ax, amt_to_add], blit = False) | |
| ani.save("spectrogram_animation.gif", writer = "imagemagick") | |
| st.image("spectrogram_animation.gif") | |
| #Creates the interface allowing users to select which plot they want displayed | |
| def create_selections(audio_signal, sample_rate): | |
| chart_options = ["Spectrum", "Mel-Spectogram"] #Graph titles go here | |
| functions = [display_spectrum_animation, display_mel_spectogram] #Graphing functions go here | |
| chart_selector = st.radio( | |
| label = "", | |
| options = chart_options, | |
| horizontal = True | |
| ) | |
| selection_index = chart_options.index(chart_selector) | |
| functions[selection_index](audio_signal, sample_rate) | |
| #Helper function to force the length of a given frequency array into a specific length | |
| #Currently, this length is hard-coded at 66,150 though that may change in the future | |
| def standardize_waveform_length(waveform): | |
| audio_length = 66150 | |
| if len(waveform) > audio_length: | |
| waveform = waveform[:audio_length] | |
| else: | |
| waveform = np.pad(waveform, (0, max(0, audio_length - len(waveform))), "constant") | |
| return waveform | |
| #Takes in a given audio signal and returns its mel-frequency cepstral coefficients | |
| def preprocess_audio_for_prediction(audio_signal, sample_rate): | |
| waveform = standardize_waveform_length(waveform = audio_signal) | |
| mfcc = librosa.feature.mfcc(y = waveform, sr = sample_rate, n_mels = 128) | |
| mfcc = mfcc.reshape(-1) | |
| return mfcc | |
| #Loads the model given in model_path and returns a Keras Sequential model | |
| def load_model(model_path): | |
| model = pickle.load(open(model_path, "rb")) | |
| return model | |
| #Uses the model to predict the speaker's emotion in the given audio clip | |
| def get_emotion_prediction(mfcc): | |
| model = load_model(model_path) | |
| prediction = model.predict(mfcc[None]) | |
| predicted_index = np.argmax(prediction) | |
| emotion = reversed_emotion_map[predicted_index] | |
| return emotion | |
| #Combines all model functions and displays the model output as a subheader | |
| def display_prediction(audio_signal, sample_rate): | |
| mfcc = preprocess_audio_for_prediction(audio_signal, sample_rate) | |
| prediction = get_emotion_prediction(mfcc) | |
| st.subheader("Predicted Emotion: " + prediction, divider = True) | |
| #Defines the entire process of inputting audio, displaying the model's predictions, and displaying graphs | |
| def run(input_file): | |
| audio_signal, sample_rate = process_audio(input_file) | |
| display_prediction(audio_signal, sample_rate) | |
| create_selections(audio_signal, sample_rate) | |
| #Creates an input area to upload the file | |
| def main(): | |
| st.header("Upload your file here") | |
| file_uploader = st.file_uploader("", type = "wav") | |
| if file_uploader is not None: | |
| run(file_uploader) | |
| if __name__ == "__main__": | |
| main() |