Spaces:
Sleeping
Sleeping
File size: 1,910 Bytes
763f6d6 c153aa4 7c5e2f5 c153aa4 ecbfc2d 3ddb6ef c153aa4 93471da 73b9216 2d076c6 7a90588 0256fc1 6185734 c6d9314 7104546 d7511d4 2d076c6 08aba45 39fde0b 1d48696 39fde0b 1d48696 39fde0b 3680dfd c153aa4 94c77bb 9324d2a 0f9bdc0 08aba45 5af6446 08aba45 7bb0efa be4e1da 6c3fca7 be4e1da 08aba45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import torch
import io
import base64
import numpy as np
import scipy.io.wavfile
from typing import Text
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
import gradio as gr
import os
import yt_dlp as youtube_dl
from gradio_client import Client
from transformers.pipelines.audio_utils import ffmpeg_read
HF_TOKEN = os.environ.get("HF_TOKEN")
# set up the diarization pipeline
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN)
#diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN)
if torch.cuda.is_available():
diarization_pipeline.to(torch.device("cuda"))
import gradio as gr
def transcribe(audio_path, num_speakers=2):
# Configure the pipeline to use the provided number of speakers
#diarization_pipeline.n_speakers = num_speakers
# Run diarization
diarization = diarization_pipeline(audio_path,num_speakers=2)
return diarization
title = "SAML Speaker Diarization ⚡️ "
description = """ pyannote speaker diarization running locally"""
article = """SAMLOne Speaker Segmentation or Diarization"""
import gradio as gr
def greet(name):
return "Hello " + name + "!!"
# iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), outputs="text")
# iface.launch()
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
audio_input = gr.Audio(type="filepath")
text_output = gr.Textbox( label="speaker diarization")
speaker_diarization_button = gr.Button("Submit")
speaker_diarization_button.click(fn=transcribe, inputs=[audio_input], outputs=[text_output])
demo.launch(debug=True)
|