on1onmangoes's picture
Update app.py
1d48696 verified
raw
history blame
1.58 kB
import gradio as gr
import torch
import io
import base64
import numpy as np
import scipy.io.wavfile
from typing import Text
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
import gradio as gr
import os
import yt_dlp as youtube_dl
from gradio_client import Client
from transformers.pipelines.audio_utils import ffmpeg_read
HF_TOKEN = os.environ.get("HF_TOKEN")
# set up the diarization pipeline
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN)
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN)
if torch.cuda.is_available():
diarization_pipeline.to(torch.device("cuda"))
import gradio as gr
def transcribe(audio_path, num_speakers=2):
# Configure the pipeline to use the provided number of speakers
#diarization_pipeline.n_speakers = num_speakers
# Run diarization
diarization = diarization_pipeline(audio_path,num_speakers=2)
return diarization
title = "SAML Speaker Diarization ⚡️ "
description = """ pyannote speaker diarization running locally"""
article = """SAMLOne Speaker Segmentation or Diarization"""
import gradio as gr
def greet(name):
return "Hello " + name + "!!"
iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), outputs="text")
iface.launch()