samusander commited on
Commit
ac9e7de
1 Parent(s): e39bb38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -16
app.py CHANGED
@@ -1,29 +1,58 @@
1
  # Imports
2
- from sklearn.linear_model import LogisticRegression
3
- from sklearn.model_selection import cross_val_predict
4
- from sklearn.metrics import accuracy_score
5
- import joblib
6
- import os
7
- import pandas as pd
8
- import numpy as np
9
- import random
10
- import tensorflow as tf
11
- import torch
12
- from tqdm import tqdm
13
  import tensorflow_io as tfio
14
- from pathlib import Path
15
  from speechbrain.pretrained import EncoderClassifier
16
  import torchaudio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
- # Defining the transcription function
21
- def snoring(audio):
22
- return text
23
 
24
  # Defining the audio filepaths
25
  audio = gr.inputs.Audio(type="filepath")
26
 
27
  # Loading the gradio framwork
28
- iface = gr.Interface(fn=snoring,inputs=audio, outputs="text", title="Snore.AI", description="Detect Snotring with artificial intelligence.")
29
  iface.launch()
 
1
  # Imports
2
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
3
  import tensorflow_io as tfio
 
4
  from speechbrain.pretrained import EncoderClassifier
5
  import torchaudio
6
+ from sklearn.linear_model import LogisticRegression
7
+ import joblib
8
+ import tensorflow as tf
9
+ import numpy as np
10
+
11
+
12
+
13
+ # Utility function for loading audio files and making sure the sample rate is correct.
14
+ @tf.function
15
+ def load_wav_16k_mono(filename):
16
+ """Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio."""
17
+ file_contents = tf.io.read_file(filename)
18
+ wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
19
+ wav = tf.squeeze(wav, axis=-1)
20
+ sample_rate = tf.cast(sample_rate, dtype=tf.int64)
21
+ wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
22
+ return wav
23
+
24
+ def extract_audio_embeddings(model, wav_audio_file_path: str) -> tuple:
25
+ """Feature extractor that embeds audio into a vector."""
26
+ signal, fs = torchaudio.load(wav_audio_file_path) # Reformat audio signal into a tensor
27
+ embeddings = model.encode_batch(
28
+ signal
29
+ ) # Pass tensor through pretrained neural net and extract representation
30
+ return embeddings
31
+
32
+
33
+ def detect_snoring(audio):
34
+ feature_extractor = EncoderClassifier.from_hparams(
35
+ "speechbrain/spkrec-xvect-voxceleb",
36
+ # run_opts={"device":"cuda"} # Uncomment this to run on GPU if you have one (optional)
37
+ )
38
+ filename = 'finalized_model.sav'
39
+ model = joblib.load(filename)
40
 
41
+ embeddings = extract_audio_embeddings(feature_extractor, audio)
42
+ embeddings_array = embeddings.cpu().numpy()[0]
43
+ output = model.predict_proba(embeddings_array)
44
+ output = np.round(output[:, 1])
45
+ if 1 in output:
46
+ output = "Snoring detected"
47
+ else:
48
+ output = "Snoring is not detected"
49
+ return output
50
 
51
 
 
 
 
52
 
53
  # Defining the audio filepaths
54
  audio = gr.inputs.Audio(type="filepath")
55
 
56
  # Loading the gradio framwork
57
+ iface = gr.Interface(fn=detect_snoring,inputs=audio, outputs="text", title="Snore.AI", description="Detect Snotring with artificial intelligence.")
58
  iface.launch()