from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.1") # send pipeline to GPU (when available) import os from pydub import AudioSegment import torch pipeline.to(torch.device("cuda")) from pathlib import Path def run_diarization(input_file): # apply pretrained pipeline diarization = pipeline(input_file) rttm_out=diarization.to_rttm() rttm_file= open(Path(input_file).stem+'.rttm','w') rttm_file.write(rttm_out) rttm_file.close diarization diarization_result= [] # print the result for turn, _, speaker in diarization.itertracks(yield_label=True): #diarization_results.append([{turn.start:.1f},{turn.end:.1f},{speaker}]) print_out=f"{turn.start:.1f} {turn.end:.1f} {speaker}" diarization_result.append(print_out.split(' ')) print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}") audio_segments=[] for segment in diarization_result: start= float(segment[0]) end= float(segment[1]) speaker= segment[2] start_time= start*1000 end_time= end*1000 name=speaker+'_'+'['+str(start)+'_'+str(end)+']' audio_segments.append([name+'.wav',start_time,end_time]) sound= AudioSegment.from_wav(input_file) output_directory=Path(input_file).stem+"_segments" os.mkdir(output_directory) counter=1 for interval in audio_segments: extract= sound[interval[1]:interval[2]] segment_name=output_directory+'/'+str(counter)+'_'+interval[0] extract.export(segment_name, format='wav') counter=counter+1