# Imports import gradio as gr import tensorflow_io as tfio from speechbrain.pretrained import EncoderClassifier import torchaudio from sklearn.linear_model import LogisticRegression import joblib import tensorflow as tf import numpy as np # Utility function for loading audio files and making sure the sample rate is correct. @tf.function def load_wav_16k_mono(filename): """Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio.""" file_contents = tf.io.read_file(filename) wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1) wav = tf.squeeze(wav, axis=-1) sample_rate = tf.cast(sample_rate, dtype=tf.int64) wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000) return wav def extract_audio_embeddings(model, wav_audio_file_path: str) -> tuple: """Feature extractor that embeds audio into a vector.""" signal, fs = torchaudio.load(wav_audio_file_path) # Reformat audio signal into a tensor embeddings = model.encode_batch( signal ) # Pass tensor through pretrained neural net and extract representation return embeddings def detect_snoring(audio): feature_extractor = EncoderClassifier.from_hparams( "speechbrain/spkrec-xvect-voxceleb", # run_opts={"device":"cuda"} # Uncomment this to run on GPU if you have one (optional) ) filename = 'finalized_model.sav' model = joblib.load(filename) embeddings = extract_audio_embeddings(feature_extractor, audio) embeddings_array = embeddings.cpu().numpy()[0] output = model.predict_proba(embeddings_array) output = np.round(output[:, 1]) if 1 in output: output = "Snoring detected" else: output = "Snoring is not detected" return output # Defining the audio filepaths audio = gr.inputs.Audio(type="filepath") # Loading the gradio framwork iface = gr.Interface(fn=detect_snoring,inputs=audio, outputs="text", title="Snore.AI", description="Detect Snotring with artificial intelligence.") iface.launch()