Spaces:
Build error
Build error
| #IMPORT THE LIBRARIES | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import os | |
| import sys | |
| # librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later. | |
| import librosa | |
| import librosa.display | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.metrics import confusion_matrix, classification_report | |
| from sklearn.model_selection import train_test_split | |
| # to play the audio files | |
| import keras | |
| from keras.preprocessing import sequence | |
| from keras.models import Sequential,model_from_json | |
| from keras.layers import Dense, Embedding | |
| from keras.layers import LSTM,BatchNormalization , GRU | |
| from keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.utils import to_categorical | |
| from keras.layers import Input, Flatten, Dropout, Activation | |
| from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D | |
| from keras.models import Model | |
| from keras.callbacks import ModelCheckpoint | |
| from tensorflow.keras.optimizers import SGD | |
| import warnings | |
| if not sys.warnoptions: | |
| warnings.simplefilter("ignore") | |
| warnings.filterwarnings("ignore", category=DeprecationWarning) | |
| import tensorflow as tf | |
| from huggingface_hub import from_pretrained_keras | |
| import gradio as gr | |
| from huggingface_hub import from_pretrained_keras | |
| model=from_pretrained_keras( 'Mohamed41/MODEL_EMOTION_AR_TEXT_72P') | |
| def feat_ext(data): | |
| #Time_domain_features | |
| # ZCR Persody features or Low level ascoustic features | |
| result = np.array([]) | |
| zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0) | |
| result=np.hstack((result, zcr)) # stacking horizontally | |
| #Frequency_domain_features | |
| #Spectral and wavelet Features | |
| #MFCC | |
| mfcc = np.mean(librosa.feature.mfcc(y=data, sr=22050,n_mfcc=40).T, axis=0) | |
| result = np.hstack((result, mfcc)) # stacking horizontally | |
| return result | |
| scaler = joblib.load('scaler.joblib') | |
| encoder= joblib.load('encoder.joblib') | |
| def get_predict_feat(path): | |
| d, s_rate= librosa.load(path, duration=2.5, offset=0.6) | |
| res=feat_ext(d) | |
| result=np.array(res) | |
| result=np.reshape(result,newshape=(1,41)) | |
| i_result = scaler.transform(result) | |
| final_result=np.expand_dims(i_result, axis=2) | |
| return final_result | |
| emotions1={1:'Neutral', 2:'Calm', 3:'Happy', 4:'Sad', 5:'Angry', 6:'Fear', 7:'Disgust',8:'Surprise'} | |
| def prediction(path1): | |
| res=get_predict_feat(path1) | |
| predictions=model.predict(res) | |
| y_pred = encoder.inverse_transform(predictions) | |
| return y_pred[0][0] | |
| def mainfunc(data): | |
| print(data) | |
| return str(data) | |
| audio_input = gr.inputs.Audio(type="filepath") | |
| iface = gr.Interface(fn=mainfunc, inputs=audio_input, outputs="text") | |
| iface.launch(inline=False) | |