chinhon's picture
ver1.0
56745c1
raw
history blame
1.31 kB
import gradio as gr
from pytube import YouTube
import whisper
#define transcription function
def whisper_transcript(model_size, url, audio_file):
if url:
link = YouTube(url)
source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
else:
source = audio_file
options = whisper.DecodingOptions(without_timestamps=True)
loaded_model = whisper.load_model(model_size)
transcript = loaded_model.transcribe(source)
return transcript["text"]
#DEFINE GRADIO INTERFACE
gradio_ui = gr.Interface(
fn=whisper_transcript,
title="Transcribe multi-lingual audio clips with Whisper",
description= "**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit.",
article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
inputs=[
gr.Dropdown(
label="Select Model",
choices=["base", "small", "medium", "large"],
value="base",
),
gr.Textbox(label="Paste YouTube link here"),
gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
],
outputs=gr.outputs.Textbox(label="Whisper Transcript"),
)
gradio_ui.queue().launch()