File size: 1,543 Bytes
e17ddde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1")

# send pipeline to GPU (when available)
import os
from pydub import AudioSegment
import torch
pipeline.to(torch.device("cuda"))
from pathlib import Path

def run_diarization(input_file):
# apply pretrained pipeline
  diarization = pipeline(input_file)
  rttm_out=diarization.to_rttm()
  rttm_file= open(Path(input_file).stem+'.rttm','w')
  rttm_file.write(rttm_out)
  rttm_file.close
  diarization
  diarization_result= []
  # print the result
  for turn, _, speaker in diarization.itertracks(yield_label=True):
    #diarization_results.append([{turn.start:.1f},{turn.end:.1f},{speaker}])
    print_out=f"{turn.start:.1f} {turn.end:.1f} {speaker}"
    diarization_result.append(print_out.split(' '))
    print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
  audio_segments=[]
  for segment in diarization_result:
    start= float(segment[0])
    end= float(segment[1])
    speaker= segment[2]
    start_time= start*1000
    end_time= end*1000
    name=speaker+'_'+'['+str(start)+'_'+str(end)+']'
    audio_segments.append([name+'.wav',start_time,end_time])

  sound= AudioSegment.from_wav(input_file)
  output_directory=Path(input_file).stem+"_segments"
  os.mkdir(output_directory)
  counter=1
  for interval in audio_segments:
    extract= sound[interval[1]:interval[2]]
    segment_name=output_directory+'/'+str(counter)+'_'+interval[0]
    extract.export(segment_name, format='wav')
    counter=counter+1