|
|
import gradio as gr |
|
|
|
|
|
import tempfile |
|
|
import os |
|
|
from pyannote.audio import Pipeline |
|
|
|
|
|
pipeline = Pipeline.from_pretrained( |
|
|
"pyannote/speaker-diarization-community-1", token=os.getenv('HF_TOKEN') |
|
|
) |
|
|
|
|
|
def diarize(audio_file_path): |
|
|
""" |
|
|
Perform speaker diarization on uploaded audio file. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
diarization = pipeline(audio_file_path) |
|
|
|
|
|
|
|
|
results = [] |
|
|
for turn, speaker in diarization.speaker_diarization: |
|
|
results.append( |
|
|
f"{speaker} speaks between t={turn.start:.3f}s and t={turn.end:.3f}s" |
|
|
) |
|
|
return "\n".join(results) |
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=diarize, |
|
|
inputs=gr.Audio(type="filepath", label="Upload Audio (.wav)"), |
|
|
outputs=gr.Textbox(label="Speaker Segments"), |
|
|
title="ποΈ Speaker Diarization Demo", |
|
|
description="Upload an audio file and detect who speaks when using Pyannote Audio." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |