import whisper import yt_dlp import gradio as gr import os import re model = whisper.load_model("base") def get_audio(url): try: ydl_opts = { 'format': 'bestaudio/best', 'noplaylist': True, 'quiet': True, 'outtmpl': '%(title)s.%(ext)s' # Specify output template to get the file path } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) # Use 'requested_downloads' to get the downloaded file path audio_file = ydl.prepare_filename(info) return audio_file except Exception as e: raise gr.Error(f"Exception: {e}") def get_text(url): try: if url != '': audio_file = get_audio(url) result = model.transcribe(audio_file) return result['text'].strip() else: return "Please enter a YouTube video URL." except Exception as e: raise gr.Error(f"Exception: {e}") def get_summary(article): try: first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5]) return first_sentences except Exception as e: raise gr.Error(f"Exception: {e}") with gr.Blocks() as demo: gr.Markdown("

Free Fast YouTube URL Video-to-Text using OpenAI's Whisper Model

") gr.Markdown("
Enter the link of any YouTube video to generate a text transcript of the video.
") gr.Markdown("
'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'
") gr.Markdown("
Transcription takes 5-10 seconds per minute of the video (bad audio/hard accents slow it down a bit). #patience
If you have time while waiting, check out my AI blog (opens in new tab).
") input_text_url = gr.Textbox(placeholder='Youtube video URL', label='URL') result_button_transcribe = gr.Button('1. Transcribe') output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript') result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe) demo.queue(default_enabled=True).launch(debug=True)