import numpy as np import librosa import tensorflow as tf from sklearn.preprocessing import LabelEncoder import gradio as gr #TF_ENABLE_ONEDNN_OPTS=0 classes_path = r'./classes_new.npy' model_path = r'./Model_Audio_Classification_new.h5' def audio_class(audio_path): labelencoder = LabelEncoder() labelencoder.classes_ = np.load(classes_path) model_h5 = tf.keras.models.load_model(model_path) audio, sample_rate = librosa.load(audio_path, res_type='soxr_vhq') mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) mfccs_scaled_features = np.mean(mfccs_features.T, axis=0) mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1) predict_x = model_h5.predict(mfccs_scaled_features) predicted_label = np.argmax(predict_x, axis=1) prediction_class = labelencoder.inverse_transform(predicted_label) result = str(prediction_class) result = result.replace("['", '') result = result.replace("']", '') #print(result) #print(f"Audio Class: {x}") return result des = '''Upload .wav / .mp3 file or record audio and wait until file upload or recording. Press submit button to classify among predefined audio classes or clear button to remove audio file in current directory. Audio classes for classification: air conditioner, car horn, children playing, dog bark, drilling, engine idling, gun shot, jackhammer, siren, and street music.''' x = gr.Interface( audio_class, gr.Audio(sources=["upload","microphone"],type="filepath"), "text", title="Audio classification Using Deep Learning", description=des) x.launch()