|
import os |
|
from huggingface_hub import login |
|
from pyannote.audio import Pipeline |
|
import gradio as gr |
|
|
|
|
|
token = os.getenv("HUGGINGFACE_HUB_TOKEN") |
|
print(f"Token length: {len(token)}") |
|
login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) |
|
|
|
|
|
pipeline = Pipeline.from_pretrained( |
|
"pyannote/speaker-diarization-3.1", |
|
use_auth_token=True |
|
) |
|
def diarization(audio_file): |
|
if pipeline is None: |
|
raise ValueError("Pipeline could not be initialized. Check your Hugging Face token and permissions.") |
|
|
|
diarization_result = pipeline(audio_file) |
|
results = [] |
|
for turn, _, speaker in diarization_result.itertracks(yield_label=True): |
|
results.append({ |
|
"start": turn.start, |
|
"end": turn.end, |
|
"speaker": speaker |
|
}) |
|
return results |
|
|
|
|
|
interface = gr.Interface( |
|
fn=diarization, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=gr.JSON() |
|
) |
|
|
|
|
|
interface.launch() |
|
|