import numpy as np
import librosa
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import gradio as gr

#TF_ENABLE_ONEDNN_OPTS=0
classes_path = r'./classes_new.npy'
model_path = r'./Model_Audio_Classification_new.h5'

def audio_class(audio_path):
    labelencoder = LabelEncoder()
    labelencoder.classes_ = np.load(classes_path)
    model_h5 = tf.keras.models.load_model(model_path)

    audio, sample_rate = librosa.load(audio_path, res_type='soxr_vhq')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)

    predict_x = model_h5.predict(mfccs_scaled_features)
    predicted_label = np.argmax(predict_x, axis=1)
    prediction_class = labelencoder.inverse_transform(predicted_label)

    result = str(prediction_class)
    result = result.replace("['", '')
    result = result.replace("']", '')
    #print(result)
    #print(f"Audio Class: {x}")
    return result

des = '''Upload .wav / .mp3 file or record audio and wait until file upload or recording.
        Press submit button to classify among predefined audio classes or clear button to remove audio file in current directory.
        Audio classes for classification: air conditioner, car horn, children playing, dog bark, drilling, engine idling, gun shot, jackhammer, siren, and street music.'''
x = gr.Interface(
    audio_class,
    gr.Audio(sources=["upload","microphone"],type="filepath"),
    "text",
    title="Audio classification Using Deep Learning",
    description=des)
x.launch()