free-fast-youtube-url-video-to-text-using-openai-whisper

Runtime error

App Files Files Community

syedusama5556 commited on Jan 11, 2024

Commit

cd2dfd4

verified ·

1 Parent(s): 4a71c2c

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -56

app.py CHANGED Viewed

@@ -1,61 +1,81 @@
-import whisper
-from pytube import YouTube
 import gradio as gr
 import os
-import re
 import logging
-logging.basicConfig(level=logging.INFO)
-model = whisper.load_model("base")
-def get_text(url):
-    #try:
-    if url != '':
-        output_text_transcribe = ''
-    yt = YouTube(url)
-    #video_length = yt.length --- doesn't work anymore - using byte file size of the audio file instead now
-    #if video_length < 5400:
-    video = yt.streams.filter(only_audio=True).first()
-    out_file=video.download(output_path=".")
-    file_stats = os.stat(out_file)
-    logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
-    if file_stats.st_size <= 30000000:
-        base, ext = os.path.splitext(out_file)
-        new_file = base+'.mp3'
-        os.rename(out_file, new_file)
-        a = new_file
-        result = model.transcribe(a)
-        return result['text'].strip()
     else:
-        logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
-    #finally:
-    #    raise gr.Error("Exception: There was a problem transcribing the audio.")
-def get_summary(article):
-    first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
-    b = summarizer(first_sentences, min_length = 20, max_length = 120, do_sample = False)
-    b = b[0]['summary_text'].replace(' .', '.').strip()
-    return b
-with gr.Blocks() as demo:
-    gr.Markdown("<h1><center>Free Fast YouTube URL Video-to-Text using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a> Model</center></h1>")
-    #gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video and then create a summary of the video transcript.</center>")
-    gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video.</center>")
-    gr.Markdown("<center><b>'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'</b></center>")
-    gr.Markdown("<center>Transcription takes 5-10 seconds per minute of the video (bad audio/hard accents slow it down a bit). #patience<br />If you have time while waiting, drop a ♥️ and check out my <a href=https://www.artificial-intelligence.blog target=_blank>AI blog</a> (opens in new tab).</center>")
-    input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')
-    result_button_transcribe = gr.Button('Transcribe')
-    output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
-    #result_button_summary = gr.Button('2. Create Summary')
-    #output_text_summary = gr.Textbox(placeholder='Summary of the YouTube video transcript.', label='Summary')
-    result_button_transcribe.click(get_text, inputs = input_text_url, outputs = output_text_transcribe)
-    #result_button_summary.click(get_summary, inputs = output_text_transcribe, outputs = output_text_summary)
-demo.queue(default_enabled = True).launch(debug = True)

 import gradio as gr
+import yt_dlp as yt
+import whisper
 import os
+import torch
 import logging
+# Set up logging
+logging.basicConfig(filename='transcription_logs.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Create a temporary download folder if it doesn't exist
+temp_download_dir = os.path.join(os.getcwd(), "temp_download")
+os.makedirs(temp_download_dir, exist_ok=True)
+# Function to download audio from the given URL
+def download_audio(url):
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': os.path.join(temp_download_dir, '%(title)s.%(ext)s'),
+    }
+    with yt.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(url, download=True)
+        downloaded_file = ydl.prepare_filename(info_dict)
+        # Generate a new file name by replacing spaces with underscores
+        new_filename = os.path.join(temp_download_dir, os.path.basename(downloaded_file).replace(" ", "_"))
+        # Check if the new file name exists and create a unique name if necessary
+        base, extension = os.path.splitext(new_filename)
+        counter = 1
+        while os.path.exists(new_filename):
+            new_filename = f"{base}_{counter}{extension}"
+            counter += 1
+        # Rename the file
+        os.rename(downloaded_file, new_filename)
+        if os.path.exists(new_filename):
+            return new_filename
+        else:
+            raise Exception("Failed to download and rename audio file.")
+# Function to transcribe audio to SRT format
+def transcribe_to_srt(file_path):
+    if torch.cuda.is_available():
+        model = whisper.load_model("medium", device="cuda")
     else:
+        model = whisper.load_model("medium")
+    result = model.transcribe(file_path)
+    srt_content = ""
+    for i, segment in enumerate(result["segments"]):
+        start = segment["start"]
+        end = segment["end"]
+        text = segment["text"]
+        srt_content += f"{i + 1}\n"
+        srt_content += f"{start:.3f}".replace(".", ",") + " --> " + f"{end:.3f}".replace(".", ",") + "\n"
+        srt_content += text + "\n\n"
+    return srt_content
+def transcribe_video(url):
+    try:
+        logging.info(f"Transcribing video from URL: {url}")
+        audio_file = download_audio(url)
+        logging.info(f"Downloaded audio file: {audio_file}")
+        srt_content = transcribe_to_srt(audio_file)
+        logging.info("Transcription completed successfully!")
+        # Optionally, remove the audio file after transcription
+        # os.remove(audio_file)
+        return srt_content
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+        return f"An error occurred: {e}"
+iface = gr.Interface(fn=transcribe_video, inputs="text", outputs="text", live=True, title="YouTube/TikTok Video to SRT Transcription")
+# Display the logs in the interface
+log_viewer = gr.Textbox(text="Logs will appear here...", readonly=True, height=200)
+log_handler = logging.StreamHandler(log_viewer)
+log_handler.setLevel(logging.INFO)
+logging.getLogger().addHandler(log_handler)
+iface.launch()