agorlanov commited on
Commit
da7b5b9
1 Parent(s): 93c280c
Files changed (4) hide show
  1. app.py +23 -0
  2. packages.txt +1 -0
  3. requirements.txt +21 -0
  4. utils/diarization_pipeline.py +34 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from scipy.io.wavfile import write
4
+ from simple_diarizer.diarizer import Diarizer
5
+ from simple_diarizer.utils import (check_wav_16khz_mono, convert_wavfile)
6
+
7
+
8
+
9
+ def inference(audio):
10
+ os.makedirs("out", exist_ok=True)
11
+ write('test.wav', audio[0], audio[1])
12
+ os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals -d cpu test.wav -o out")
13
+ return "./out/htdemucs/test/vocals.wav", "./out/htdemucs/test/no_vocals.wav"
14
+
15
+
16
+ title = "audio_denoise and speakser diarization"
17
+
18
+ gr.Interface(
19
+ inference,
20
+ gr.Audio(type="numpy", label="Input"),
21
+ [gr.Audio(type="filepath", label="Vocal"), gr.Audio(type="filepath", label="No Vocals / Instrumental")],
22
+ title=title,
23
+ ).launch(enable_queue=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ demucs
3
+ pydub
4
+ gradio==3.12
5
+ ffmpeg-python
6
+ torch
7
+ torchaudio
8
+ tqdm==4.64.1
9
+ EasyNMT==2.0.2
10
+ nltk
11
+ transformers
12
+ pysrt
13
+ psutil==5.9.2
14
+ requests
15
+ gpuinfo
16
+ faster-whisper
17
+ yt-dlp
18
+ lightning_fabric
19
+ modelscope
20
+ rotary_embedding_torch
21
+ simple-diarizer
utils/diarization_pipeline.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from simple_diarizer.diarizer import Diarizer
2
+ from simple_diarizer.utils import (check_wav_16khz_mono, convert_wavfile)
3
+
4
+ import soundfile as sf
5
+
6
+
7
+
8
+
9
+ class DiarizationPipeline:
10
+ def __init__(self, mode='torch'):
11
+ super(DiarizationPipeline, self).__init__()
12
+ self.diar = Diarizer(
13
+ embed_model='ecapa', # supported types: ['xvec', 'ecapa']
14
+ cluster_method='ahc', # supported types: ['ahc', 'sc']
15
+ window=1, # size of window to extract embeddings (in seconds)
16
+ period=0.1 # hop of window (in seconds)
17
+ )
18
+ self.diar
19
+
20
+ def __call__(self, wav_file):
21
+ # wav_file = convert_wavfile(in_file, f"{outdir}/{YOUTUBE_ID}_converted.wav")
22
+
23
+ # signal, fs = sf.read(wav_file)
24
+ #
25
+ segments = self.diar.diarize(wav_file,
26
+ num_speakers=None,
27
+ threshold=9e-1,)
28
+
29
+ return segments
30
+
31
+
32
+ if __name__ == '__main__':
33
+ pipeline = DiarizationPipeline('torch')
34
+ pipeline('path_audio')