import gradio as gr
from pytube import YouTube
import whisper

# define function for transcription
def whisper_transcript(model_size, url, audio_file):
    if url:
        link = YouTube(url)
        source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")

    else:
        source = audio_file

    if model_size.endswith(".en"):
        language = "english"

    else:
        language = None

    options = whisper.DecodingOptions(without_timestamps=True)

    loaded_model = whisper.load_model(model_size)
    transcript = loaded_model.transcribe(source, language=language)

    return transcript["text"]

# define Gradio app interface
gradio_ui = gr.Interface(
    fn=whisper_transcript,
    title="Transcribe multi-lingual audio clips with Whisper",
    description="**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit. Select models ending in '.en' if your clip is in English. For clips in other languages, select models without '.en'",
    article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
    inputs=[
        gr.Dropdown(
            label="Select Model",
            choices=[
                "tiny.en",
                "base.en",
                "small.en",
                "medium.en",
                "tiny",
                "base",
                "small",
                "medium",
                "large",
            ],
            value="base",
        ),
        gr.Textbox(label="Paste YouTube link here"),
        gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
    ],
    outputs=gr.outputs.Textbox(label="Whisper Transcript"),
)

gradio_ui.queue().launch()