File size: 1,377 Bytes
a227627
 
c39b8bf
3ff6c9f
 
c39b8bf
3ff6c9f
 
 
 
 
 
 
d6b32ee
3ff6c9f
 
 
c39b8bf
3ff6c9f
 
 
c39b8bf
3ff6c9f
 
 
 
c39b8bf
3ff6c9f
 
c39b8bf
 
 
a227627
2f53d2f
3ff6c9f
d172563
a227627
3ff6c9f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import argparse

from utils.denoise_pipeline import denoise
from utils.diarization_pipeline import DiarizationPipeline


class CleaningPipeline:
    def __init__(self, device):
        """
        Cleaning audio pipeline. Contains:
            - denoising
            - diarization
        """

        self.device = device
        self.denoiser = denoise
        self.diarization = DiarizationPipeline(device)

    def __call__(self, input_audio_path: str):
        denoised_audio_path = self.denoiser(input_audio_path, self.device)
        result_diarization = self.diarization(denoised_audio_path)

        if result_diarization != {}:
            output_diar_audio_paths = result_diarization['output_diar_audio_paths']
            count_speakers = result_diarization['count_speakers']
            return [denoised_audio_path] + output_diar_audio_paths + [None] * (19 - count_speakers)

        else:
            return []


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--audio-path', default='dialog.mp3', help='Path to audio')
    parser.add_argument('--device', default='cpu', help='Path to audio')
    parser.add_argument('--out-folder-path', default='out', help='Path to result folder')
    opt = parser.parse_args()
    cleaning_pipeline = CleaningPipeline('cuda:0')
    cleaning_pipeline(input_audio_path=opt.audio_path)