Spaces:
Runtime error
Runtime error
agorlanov
commited on
Commit
•
ea4b219
1
Parent(s):
4c18976
mvp
Browse files- app.py +7 -1
- main_pipeline.py +1 -2
- utils/denoise_pipeline.py +5 -2
- utils/diarization_pipeline.py +1 -8
app.py
CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
|
|
3 |
from main_pipeline import main_pipeline
|
4 |
from scipy.io.wavfile import write
|
5 |
|
6 |
-
title = "audio_denoise and speakser diarization"
|
7 |
|
8 |
|
9 |
def app_pipeline(audio):
|
@@ -14,9 +14,15 @@ def app_pipeline(audio):
|
|
14 |
return result_diarization + [None] * (10 - len(result_diarization))
|
15 |
|
16 |
|
|
|
|
|
|
|
17 |
gr.Interface(
|
18 |
app_pipeline,
|
19 |
gr.Audio(type="numpy", label="Input"),
|
20 |
[gr.Audio(visible=True) for i in range(10)],
|
21 |
title=title,
|
|
|
|
|
|
|
22 |
).launch(enable_queue=True)
|
|
|
3 |
from main_pipeline import main_pipeline
|
4 |
from scipy.io.wavfile import write
|
5 |
|
6 |
+
title = "audio_denoise and speakser diarization. Fast inference https://t.me/diarizarion_bot"
|
7 |
|
8 |
|
9 |
def app_pipeline(audio):
|
|
|
14 |
return result_diarization + [None] * (10 - len(result_diarization))
|
15 |
|
16 |
|
17 |
+
example_list = [
|
18 |
+
["dialog.mp3"]
|
19 |
+
]
|
20 |
gr.Interface(
|
21 |
app_pipeline,
|
22 |
gr.Audio(type="numpy", label="Input"),
|
23 |
[gr.Audio(visible=True) for i in range(10)],
|
24 |
title=title,
|
25 |
+
examples=example_list,
|
26 |
+
cache_examples=False
|
27 |
+
|
28 |
).launch(enable_queue=True)
|
main_pipeline.py
CHANGED
@@ -21,7 +21,6 @@ def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=4
|
|
21 |
for _, r in temp_df.iterrows():
|
22 |
start = int(r["start"] * out_f)
|
23 |
end = int(r["end"] * out_f)
|
24 |
-
# output_signal[start:end] = signal[start:end]
|
25 |
output_signal.append(signal[start:end])
|
26 |
|
27 |
out_wav_path = f'{out_folder}/{label}.wav'
|
@@ -42,4 +41,4 @@ def main_pipeline(audio_path):
|
|
42 |
|
43 |
|
44 |
if __name__ == '__main__':
|
45 |
-
main_pipeline('
|
|
|
21 |
for _, r in temp_df.iterrows():
|
22 |
start = int(r["start"] * out_f)
|
23 |
end = int(r["end"] * out_f)
|
|
|
24 |
output_signal.append(signal[start:end])
|
25 |
|
26 |
out_wav_path = f'{out_folder}/{label}.wav'
|
|
|
41 |
|
42 |
|
43 |
if __name__ == '__main__':
|
44 |
+
main_pipeline('dialog.mp3')
|
utils/denoise_pipeline.py
CHANGED
@@ -10,9 +10,12 @@ from demucs.pretrained import get_model
|
|
10 |
demucs_model = get_model('htdemucs')
|
11 |
|
12 |
|
13 |
-
def denoise(filename, device):
|
|
|
14 |
wav_ref, sr = librosa.load(filename, mono=False, sr=44100)
|
|
|
15 |
wav = torch.tensor(wav_ref)
|
|
|
16 |
ref = wav.mean(0)
|
17 |
wav = (wav - ref.mean()) / wav.std()
|
18 |
sources = apply_model(
|
@@ -32,4 +35,4 @@ def denoise(filename, device):
|
|
32 |
|
33 |
if __name__ == '__main__':
|
34 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
35 |
-
denoise(filename='../
|
|
|
10 |
demucs_model = get_model('htdemucs')
|
11 |
|
12 |
|
13 |
+
def denoise(filename: str, device: str) -> str:
|
14 |
+
|
15 |
wav_ref, sr = librosa.load(filename, mono=False, sr=44100)
|
16 |
+
|
17 |
wav = torch.tensor(wav_ref)
|
18 |
+
wav = torch.cat([wav.unsqueeze(0), wav.unsqueeze(0)]) if len(wav.shape) == 1 else wav
|
19 |
ref = wav.mean(0)
|
20 |
wav = (wav - ref.mean()) / wav.std()
|
21 |
sources = apply_model(
|
|
|
35 |
|
36 |
if __name__ == '__main__':
|
37 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
38 |
+
denoise(filename='../oxx.wav', device=device)
|
utils/diarization_pipeline.py
CHANGED
@@ -1,9 +1,5 @@
|
|
1 |
from simple_diarizer.diarizer import Diarizer
|
2 |
|
3 |
-
import librosa
|
4 |
-
|
5 |
-
import soundfile as sf
|
6 |
-
|
7 |
|
8 |
class DiarizationPipeline:
|
9 |
def __init__(self, ):
|
@@ -16,10 +12,7 @@ class DiarizationPipeline:
|
|
16 |
)
|
17 |
|
18 |
def __call__(self, wav_file):
|
19 |
-
|
20 |
-
sf.write("converted.wav", y_16k, 16000, 'PCM_24')
|
21 |
-
|
22 |
-
segments = self.diar.diarize("converted.wav",
|
23 |
num_speakers=None,
|
24 |
threshold=9e-1, )
|
25 |
|
|
|
1 |
from simple_diarizer.diarizer import Diarizer
|
2 |
|
|
|
|
|
|
|
|
|
3 |
|
4 |
class DiarizationPipeline:
|
5 |
def __init__(self, ):
|
|
|
12 |
)
|
13 |
|
14 |
def __call__(self, wav_file):
|
15 |
+
segments = self.diar.diarize(wav_file,
|
|
|
|
|
|
|
16 |
num_speakers=None,
|
17 |
threshold=9e-1, )
|
18 |
|