diarization-demo / run_diarization.py
cherif54's picture
Create run_diarization.py
e17ddde verified
raw history blame
No virus
1.54 kB
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1")
# send pipeline to GPU (when available)
import os
from pydub import AudioSegment
import torch
pipeline.to(torch.device("cuda"))
from pathlib import Path
def run_diarization(input_file):
# apply pretrained pipeline
diarization = pipeline(input_file)
rttm_out=diarization.to_rttm()
rttm_file= open(Path(input_file).stem+'.rttm','w')
rttm_file.write(rttm_out)
rttm_file.close
diarization
diarization_result= []
# print the result
for turn, _, speaker in diarization.itertracks(yield_label=True):
#diarization_results.append([{turn.start:.1f},{turn.end:.1f},{speaker}])
print_out=f"{turn.start:.1f} {turn.end:.1f} {speaker}"
diarization_result.append(print_out.split(' '))
print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
audio_segments=[]
for segment in diarization_result:
start= float(segment[0])
end= float(segment[1])
speaker= segment[2]
start_time= start*1000
end_time= end*1000
name=speaker+'_'+'['+str(start)+'_'+str(end)+']'
audio_segments.append([name+'.wav',start_time,end_time])
sound= AudioSegment.from_wav(input_file)
output_directory=Path(input_file).stem+"_segments"
os.mkdir(output_directory)
counter=1
for interval in audio_segments:
extract= sound[interval[1]:interval[2]]
segment_name=output_directory+'/'+str(counter)+'_'+interval[0]
extract.export(segment_name, format='wav')
counter=counter+1