Snore.Ai / app.py
samusander's picture
Update app.py
ac9e7de
# Imports
import gradio as gr
import tensorflow_io as tfio
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from sklearn.linear_model import LogisticRegression
import joblib
import tensorflow as tf
import numpy as np
# Utility function for loading audio files and making sure the sample rate is correct.
@tf.function
def load_wav_16k_mono(filename):
"""Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio."""
file_contents = tf.io.read_file(filename)
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
wav = tf.squeeze(wav, axis=-1)
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
return wav
def extract_audio_embeddings(model, wav_audio_file_path: str) -> tuple:
"""Feature extractor that embeds audio into a vector."""
signal, fs = torchaudio.load(wav_audio_file_path) # Reformat audio signal into a tensor
embeddings = model.encode_batch(
signal
) # Pass tensor through pretrained neural net and extract representation
return embeddings
def detect_snoring(audio):
feature_extractor = EncoderClassifier.from_hparams(
"speechbrain/spkrec-xvect-voxceleb",
# run_opts={"device":"cuda"} # Uncomment this to run on GPU if you have one (optional)
)
filename = 'finalized_model.sav'
model = joblib.load(filename)
embeddings = extract_audio_embeddings(feature_extractor, audio)
embeddings_array = embeddings.cpu().numpy()[0]
output = model.predict_proba(embeddings_array)
output = np.round(output[:, 1])
if 1 in output:
output = "Snoring detected"
else:
output = "Snoring is not detected"
return output
# Defining the audio filepaths
audio = gr.inputs.Audio(type="filepath")
# Loading the gradio framwork
iface = gr.Interface(fn=detect_snoring,inputs=audio, outputs="text", title="Snore.AI", description="Detect Snotring with artificial intelligence.")
iface.launch()