Spaces:
Runtime error
Runtime error
agorlanov
commited on
Commit
•
da7b5b9
1
Parent(s):
93c280c
diar
Browse files- app.py +23 -0
- packages.txt +1 -0
- requirements.txt +21 -0
- utils/diarization_pipeline.py +34 -0
app.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from scipy.io.wavfile import write
|
4 |
+
from simple_diarizer.diarizer import Diarizer
|
5 |
+
from simple_diarizer.utils import (check_wav_16khz_mono, convert_wavfile)
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def inference(audio):
|
10 |
+
os.makedirs("out", exist_ok=True)
|
11 |
+
write('test.wav', audio[0], audio[1])
|
12 |
+
os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals -d cpu test.wav -o out")
|
13 |
+
return "./out/htdemucs/test/vocals.wav", "./out/htdemucs/test/no_vocals.wav"
|
14 |
+
|
15 |
+
|
16 |
+
title = "audio_denoise and speakser diarization"
|
17 |
+
|
18 |
+
gr.Interface(
|
19 |
+
inference,
|
20 |
+
gr.Audio(type="numpy", label="Input"),
|
21 |
+
[gr.Audio(type="filepath", label="Vocal"), gr.Audio(type="filepath", label="No Vocals / Instrumental")],
|
22 |
+
title=title,
|
23 |
+
).launch(enable_queue=True)
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/openai/whisper.git
|
2 |
+
demucs
|
3 |
+
pydub
|
4 |
+
gradio==3.12
|
5 |
+
ffmpeg-python
|
6 |
+
torch
|
7 |
+
torchaudio
|
8 |
+
tqdm==4.64.1
|
9 |
+
EasyNMT==2.0.2
|
10 |
+
nltk
|
11 |
+
transformers
|
12 |
+
pysrt
|
13 |
+
psutil==5.9.2
|
14 |
+
requests
|
15 |
+
gpuinfo
|
16 |
+
faster-whisper
|
17 |
+
yt-dlp
|
18 |
+
lightning_fabric
|
19 |
+
modelscope
|
20 |
+
rotary_embedding_torch
|
21 |
+
simple-diarizer
|
utils/diarization_pipeline.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from simple_diarizer.diarizer import Diarizer
|
2 |
+
from simple_diarizer.utils import (check_wav_16khz_mono, convert_wavfile)
|
3 |
+
|
4 |
+
import soundfile as sf
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
class DiarizationPipeline:
|
10 |
+
def __init__(self, mode='torch'):
|
11 |
+
super(DiarizationPipeline, self).__init__()
|
12 |
+
self.diar = Diarizer(
|
13 |
+
embed_model='ecapa', # supported types: ['xvec', 'ecapa']
|
14 |
+
cluster_method='ahc', # supported types: ['ahc', 'sc']
|
15 |
+
window=1, # size of window to extract embeddings (in seconds)
|
16 |
+
period=0.1 # hop of window (in seconds)
|
17 |
+
)
|
18 |
+
self.diar
|
19 |
+
|
20 |
+
def __call__(self, wav_file):
|
21 |
+
# wav_file = convert_wavfile(in_file, f"{outdir}/{YOUTUBE_ID}_converted.wav")
|
22 |
+
|
23 |
+
# signal, fs = sf.read(wav_file)
|
24 |
+
#
|
25 |
+
segments = self.diar.diarize(wav_file,
|
26 |
+
num_speakers=None,
|
27 |
+
threshold=9e-1,)
|
28 |
+
|
29 |
+
return segments
|
30 |
+
|
31 |
+
|
32 |
+
if __name__ == '__main__':
|
33 |
+
pipeline = DiarizationPipeline('torch')
|
34 |
+
pipeline('path_audio')
|