Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import io | |
import base64 | |
import numpy as np | |
import scipy.io.wavfile | |
from typing import Text | |
from pyannote.audio import Pipeline | |
from pyannote.audio import Audio | |
from pyannote.core import Segment | |
import gradio as gr | |
import yt_dlp as youtube_dl | |
from gradio_client import Client | |
from transformers.pipelines.audio_utils import ffmpeg_read | |
import pyannote.core.json | |
HF_TOKEN = "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" | |
# set up the diarization pipeline | |
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN) | |
def transcribe(audio_path, task="transcribe", group_by_speaker=True, progress=gr.Progress()): | |
# # run Whisper JAX asynchronously using Gradio client (endpoint) | |
# job = client.submit( | |
# audio_path, | |
# task, | |
# True, | |
# api_name="/predict_1", | |
# ) | |
# run diarization while we wait for Whisper JAX | |
progress(0, desc="Diarizing...") | |
# diarization = diarization_pipeline(audio_path) | |
# segments = diarization.for_json()["content"] | |
# convert diarization to JSON | |
diarization_json = pyannote.core.json.for_json(diarization) | |
segments = diarization_json["content"] | |
transcription = segments | |
# # only fetch the transcription result after performing diarization | |
# progress(0.33, desc="Transcribing...") | |
# transcription, _ = job.result() | |
# # align the ASR transcriptions and diarization timestamps | |
# progress(0.66, desc="Aligning...") | |
# transcription = align(transcription, segments, group_by_speaker=group_by_speaker) | |
return transcription | |
audio_file = gr.Interface( | |
fn=transcribe, | |
inputs=[ | |
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), | |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"), | |
gr.inputs.Checkbox(default=True, label="Group by speaker"), | |
], | |
outputs=[ | |
gr.outputs.Textbox(label="Transcription").style(show_copy_button=True), | |
] | |
# allow_flagging="never", | |
# title=title, | |
# description=description, | |
# article=article, | |
) | |
audio_file.launch() | |
# demo = gr.Blocks() | |
# demo.queue(max_size=10) | |
# demo.launch() | |
# def greet(name): | |
# return "Hello " + name + "!!" | |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
# iface.launch() |