import gradio as gr from pytube import YouTube import whisper #define transcription function def whisper_transcript(model_size, url, audio_file): if url: link = YouTube(url) source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4") else: source = audio_file options = whisper.DecodingOptions(without_timestamps=True) loaded_model = whisper.load_model(model_size) transcript = loaded_model.transcribe(source) return transcript["text"] #DEFINE GRADIO INTERFACE gradio_ui = gr.Interface( fn=whisper_transcript, title="Transcribe multi-lingual audio clips with Whisper", description= "**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit.", article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.", inputs=[ gr.Dropdown( label="Select Model", choices=["base", "small", "medium", "large"], value="base", ), gr.Textbox(label="Paste YouTube link here"), gr.Audio(label="Upload Audio File", source="upload", type="filepath"), ], outputs=gr.outputs.Textbox(label="Whisper Transcript"), ) gradio_ui.queue().launch()