deepsync commited on
Commit
d710575
·
verified ·
1 Parent(s): f773b6c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import torchaudio
4
+ from uuid import uuid4
5
+
6
+ import torch
7
+ torch.set_num_threads(1)
8
+
9
+ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
10
+ model='silero_vad',
11
+ force_reload=True,
12
+ onnx=USE_ONNX)
13
+
14
+ (get_speech_timestamps,
15
+ save_audio,
16
+ read_audio,
17
+ VADIterator,
18
+ collect_chunks) = utils
19
+
20
+
21
+ def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms):
22
+ wav, sr = torchaudio.load(audio_fp)
23
+ speech_timestamps = get_speech_timestamps(wav,
24
+ model,
25
+ sampling_rate=sr,
26
+ threshold=threshold,
27
+ min_speech_duration_ms=min_speech_duration_ms,
28
+ min_silence_duration_ms=min_silence_duration_ms,
29
+ return_seconds=True)
30
+ labels = []
31
+ for i, st in enumerate(speech_timestamps):
32
+ labels.append(f"{st['start']}\t{st['end']}\tSound {i+1}")
33
+ fn = str(uuid4()) + ".txt"
34
+ with open(fn, "w") as f:
35
+ f.write("\n".join(labels))
36
+ return fn
37
+
38
+
39
+ interface = gr.Interface(
40
+ get_labels,
41
+ [gr.Audio(type="filepath", label="Audio file"), gr.Slider(0, 1, 0.01, label="Threshold", value=0.5), gr.Number(label="min_speech_duration_ms", value=250), gr.Number(label="min_silence_duration_ms", value=100)],
42
+ gr.File(label="Labels")
43
+ )
44
+
45
+ if __name__ == "__main__":
46
+ interface.queue().launch(auth=os.environ.get("USERNAME"), os.environ.get("PASSWORD"))