import gradio as gr
from pytube import YouTube
import whisper

#define transcription function
def whisper_transcript(model_size, url, audio_file):
    if url:
        link = YouTube(url)
        source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")

    else:
        source = audio_file

    options = whisper.DecodingOptions(without_timestamps=True)

    loaded_model = whisper.load_model(model_size)
    transcript = loaded_model.transcribe(source)

    return transcript["text"]

#DEFINE GRADIO INTERFACE
gradio_ui = gr.Interface(
    fn=whisper_transcript,
    title="Transcribe multi-lingual audio clips with Whisper",
    description= "**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit.",
    article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
    inputs=[
        gr.Dropdown(
            label="Select Model",
            choices=["base", "small", "medium", "large"],
            value="base",
        ),
        gr.Textbox(label="Paste YouTube link here"),
        gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
    ],
    outputs=gr.outputs.Textbox(label="Whisper Transcript"),
)

gradio_ui.queue().launch()