on1onmangoes's picture
Update app.py
8e06021
raw
history blame
2.37 kB
import gradio as gr
import torch
import io
import base64
import numpy as np
import scipy.io.wavfile
from typing import Text
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
import gradio as gr
import yt_dlp as youtube_dl
from gradio_client import Client
from transformers.pipelines.audio_utils import ffmpeg_read
import pyannote.core.json
HF_TOKEN = "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"
# set up the diarization pipeline
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN)
def transcribe(audio_path, task="transcribe", group_by_speaker=True, progress=gr.Progress()):
# # run Whisper JAX asynchronously using Gradio client (endpoint)
# job = client.submit(
# audio_path,
# task,
# True,
# api_name="/predict_1",
# )
# run diarization while we wait for Whisper JAX
progress(0, desc="Diarizing...")
# diarization = diarization_pipeline(audio_path)
# segments = diarization.for_json()["content"]
# convert diarization to JSON
diarization_json = pyannote.core.json.for_json(diarization)
segments = diarization_json["content"]
transcription = segments
# # only fetch the transcription result after performing diarization
# progress(0.33, desc="Transcribing...")
# transcription, _ = job.result()
# # align the ASR transcriptions and diarization timestamps
# progress(0.66, desc="Aligning...")
# transcription = align(transcription, segments, group_by_speaker=group_by_speaker)
return transcription
audio_file = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=True, label="Group by speaker"),
],
outputs=[
gr.outputs.Textbox(label="Transcription").style(show_copy_button=True),
]
# allow_flagging="never",
# title=title,
# description=description,
# article=article,
)
audio_file.launch()
# demo = gr.Blocks()
# demo.queue(max_size=10)
# demo.launch()
# def greet(name):
# return "Hello " + name + "!!"
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()