vad-audio-labels-experimental

Runtime error

App Files Files Community

deepsync commited on Jul 1, 2024

Commit

d710575

verified ·

1 Parent(s): f773b6c

Create app.py

Browse files

Files changed (1) hide show

app.py +46 -0

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import gradio as gr
+import os
+import torchaudio
+from uuid import uuid4
+import torch
+torch.set_num_threads(1)
+model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
+                              model='silero_vad',
+                              force_reload=True,
+                              onnx=USE_ONNX)
+(get_speech_timestamps,
+ save_audio,
+ read_audio,
+ VADIterator,
+ collect_chunks) = utils
+def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms):
+    wav, sr = torchaudio.load(audio_fp)
+    speech_timestamps = get_speech_timestamps(wav,
+                                            model,
+                                            sampling_rate=sr,
+                                            threshold=threshold,
+                                            min_speech_duration_ms=min_speech_duration_ms,
+                                            min_silence_duration_ms=min_silence_duration_ms,
+                                            return_seconds=True)
+    labels = []
+    for i, st in enumerate(speech_timestamps):
+        labels.append(f"{st['start']}\t{st['end']}\tSound {i+1}")
+    fn = str(uuid4()) + ".txt"
+    with open(fn, "w") as f:
+        f.write("\n".join(labels))
+    return fn
+interface = gr.Interface(
+    get_labels,
+    [gr.Audio(type="filepath", label="Audio file"), gr.Slider(0, 1, 0.01, label="Threshold", value=0.5), gr.Number(label="min_speech_duration_ms", value=250), gr.Number(label="min_silence_duration_ms", value=100)],
+    gr.File(label="Labels")
+)
+if __name__ == "__main__":
+    interface.queue().launch(auth=os.environ.get("USERNAME"), os.environ.get("PASSWORD"))