# Imports
import gradio as gr
import tensorflow_io as tfio
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from sklearn.linear_model import LogisticRegression
import joblib
import tensorflow as tf
import numpy as np


# Utility function for loading audio files and making sure the sample rate is correct.
@tf.function
def load_wav_16k_mono(filename):
    """Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio."""
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def extract_audio_embeddings(model, wav_audio_file_path: str) -> tuple:
    """Feature extractor that embeds audio into a vector."""
    signal, fs = torchaudio.load(wav_audio_file_path)  # Reformat audio signal into a tensor
    embeddings = model.encode_batch(
        signal
    )  # Pass tensor through pretrained neural net and extract representation
    return embeddings


def detect_snoring(audio):
  feature_extractor = EncoderClassifier.from_hparams(
  "speechbrain/spkrec-xvect-voxceleb",
  # run_opts={"device":"cuda"}  # Uncomment this to run on GPU if you have one (optional)
  )
  filename = 'finalized_model.sav'
  model = joblib.load(filename)

  embeddings = extract_audio_embeddings(feature_extractor, audio)
  embeddings_array = embeddings.cpu().numpy()[0]
  output = model.predict_proba(embeddings_array)
  output = np.round(output[:, 1])
  if 1 in output:
    output = "Snoring detected"
  else:
    output = "Snoring is not detected"
  return output


# Defining the audio filepaths
audio = gr.inputs.Audio(type="filepath")

# Loading the gradio framwork
iface = gr.Interface(fn=detect_snoring,inputs=audio, outputs="text", title="Snore.AI", description="Detect Snotring with artificial intelligence.")
iface.launch()