Spaces:

danilotpnta
/

Youtube-Whisper

Runtime error

App Files Files Community

danilotpnta commited on Sep 12, 2024

Commit

59fe5e1

1 Parent(s): 051ee03

feat: transcribe works now (2min base -> 9sec)

Browse files

Files changed (1) hide show

app.py +46 -22

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import whisper
 import gradio as gr
 import os
-# Function to download the audio from YouTube using yt-dlp
-def download_audio(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'outtmpl': 'audio.%(ext)s',
@@ -17,35 +17,59 @@ def download_audio(url):
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
         audio_file = "audio.mp3"
-        return audio_file
     except Exception as e:
-        return str(e)  # Return the error message for debugging
 # Function to transcribe the downloaded audio using Whisper
-def transcribe_audio(audio_path):
-    model = whisper.load_model("base")  # Use other models like "small", "medium", "large" if necessary
     result = model.transcribe(audio_path)
     return result['text']
-# Main function to integrate download and transcription
-def transcribe_youtube_video(youtube_url):
-    audio_path = download_audio(youtube_url)
-    if not os.path.exists(audio_path):  # Check if an error was returned
-        return f"Error: {audio_path}"  # Return the error message to the user
-    transcription = transcribe_audio(audio_path)
-    return transcription
 # Gradio interface setup using gradio.components
-interface = gr.Interface(
-    fn=transcribe_youtube_video,
-    inputs=gr.components.Textbox(label="YouTube URL"),
-    outputs=gr.components.Textbox(label="Transcription"),
-    title="YouTube Video Transcription",
-    description="Paste a YouTube video link to get the audio transcribed using Whisper."
-)
 # Launch the app
 if __name__ == "__main__":
-    interface.launch(share=True)  # Enables sharing with public link

 import gradio as gr
 import os
+# Function to download the audio and extract metadata from YouTube
+def download_video_info(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'outtmpl': 'audio.%(ext)s',
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=False)  # Extract video info
+            title = info.get('title', 'Unknown Title')
+            thumbnail_url = info.get('thumbnail', '')
+            ydl.download([url])  # Download the audio
         audio_file = "audio.mp3"
+        return audio_file, title, thumbnail_url
     except Exception as e:
+        return None, None, str(e)
 # Function to transcribe the downloaded audio using Whisper
+def transcribe_audio(audio_path, model_size="base"):
+    model = whisper.load_model(model_size)
     result = model.transcribe(audio_path)
     return result['text']
+# Split logic: First fetch title and thumbnail, then transcribe
+def get_video_info_and_transcribe(youtube_url, model_size="base"):
+    # Fetch title and thumbnail first
+    audio_path, title, thumbnail_url = download_video_info(youtube_url)
+    # If fetching video info fails
+    if not audio_path or not os.path.exists(audio_path):
+        return gr.update(value=f"Error fetching video: {thumbnail_url}"), None, None, None
+    # Show title and thumbnail to the user while the transcription is happening
+    title_output = gr.update(value=title)
+    thumbnail_output = gr.update(value=thumbnail_url)
+    # Start transcription
+    transcription = transcribe_audio(audio_path, model_size)
+    return title_output, thumbnail_output, gr.update(value=transcription)
 # Gradio interface setup using gradio.components
+with gr.Blocks() as interface:
+    with gr.Row():
+        youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
+        model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
+    title_output = gr.Textbox(label="Video Title", interactive=False)
+    with gr.Row():
+        thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
+        transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
+    transcribe_button = gr.Button("Transcribe")
+    transcribe_button.click(
+        get_video_info_and_transcribe,
+        inputs=[youtube_url, model_size],
+        outputs=[title_output, thumbnail_output, transcription_output]
+    )
 # Launch the app
 if __name__ == "__main__":
+    interface.launch(share=True)