import gradio as gr import whisper from pytube import YouTube loaded_model = whisper.load_model("base") current_size = 'base' def inference(link): yt = YouTube(link) path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4") options = whisper.DecodingOptions(without_timestamps=True) results = loaded_model.transcribe(path) return results['text'] def change_model(size): if size == current_size: return loaded_model = whisper.load_model(size) current_size = size def populate_metadata(link): yt = YouTube(link) return yt.thumbnail_url, yt.title title="Youtube Whisperer" description="Speech to text transcription of Youtube videos using OpenAI's Whisper" block = gr.Blocks() with block: gr.HTML( """
Speech to text transcription of Youtube videos using OpenAI's Whisper