import gradio as gr
from pytube import YouTube
import whisper

# define function for transcription
def whisper_transcript(model_size, url, audio_file):
    if url:
        link = YouTube(url)
        source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")

    else:
        source = audio_file

    if model_size.endswith(".en"):
        language = "english"

    else:
        language = None

    options = whisper.DecodingOptions(without_timestamps=True)

    loaded_model = whisper.load_model(model_size)
    transcript = loaded_model.transcribe(source, language=language)

    return transcript["text"]

# define Gradio app interface
gradio_ui = gr.Interface(
    fn=whisper_transcript,
    title="Transcribe multi-lingual audio clips with Whisper",
    description="**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit. If your clip is **100% in English, select models ending in ‘.en’**. If the clip is in other languages, or a mix of languages, select models without ‘.en’",
    article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
    inputs=[
        gr.Dropdown(
            label="Select Model",
            choices=[
                "tiny.en",
                "base.en",
                "small.en",
                "medium.en",
                "tiny",
                "base",
                "small",
                "medium",
                "large",
            ],
            value="base",
        ),
        gr.Textbox(label="Paste YouTube link here"),
        gr.Audio(label="Upload Audio File", sources=["upload", "microphone"], type="filepath"),
    ],
    outputs=gr.Textbox(label="Whisper Transcript"),
)

gradio_ui.queue().launch()