whisper-large-v2

Running on T4

App Files Files Community

alamin655 commited on May 1, 2023

Commit

33ac9e4

•

1 Parent(s): 66efbc3

Add support for file size limits in audio and YouTube transcription, and use yt_dlp for video downloads

Browse files

This pull request adds support for handling file size limits when transcribing audio files and YouTube videos. The transcribe function has been modified to check the size of the uploaded file and raise an error if it exceeds the limit. Additionally, the yt_dlp library is now used to download YouTube videos and the file size is checked before transcription.

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
 import gradio as gr
-import pytube as pt
 from transformers import pipeline
 MODEL_NAME = "openai/whisper-large-v2"
 BATCH_SIZE = 8
@@ -35,7 +35,7 @@ def transcribe(microphone, file_upload, task):
     elif (microphone is None) and (file_upload is None):
         raise gr.Error("You have to either use the microphone or upload an audio file")
-    file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
     if file_size_mb > FILE_LIMIT_MB:
         raise gr.Error(
                 f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
@@ -59,25 +59,19 @@ def _return_yt_html_embed(yt_url):
     return HTML_str
-def yt_transcribe(yt_url, task, max_filesize=75.0):
-    yt = pt.YouTube(yt_url)
-    html_embed_str = _return_yt_html_embed(yt_url)
-    for attempt in range(YT_ATTEMPT_LIMIT):
         try:
-            yt = pytube.YouTube(yt_url)
-            stream = yt.streams.filter(only_audio=True)[0]
-            break
-        except KeyError:
-            if attempt + 1 == YT_ATTEMPT_LIMIT:
-                raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
-    if stream.filesize_mb > max_filesize:
-        raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
     pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
-    text = pipe("audio.mp3", batch_size=BATCH_SIZE)["text"]
     return html_embed_str, text
@@ -120,8 +114,8 @@ yt_transcribe = gr.Interface(
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-demo.launch(enable_queue=True)

 import torch
 import gradio as gr
+import yt_dlp
 from transformers import pipeline
+import os
 MODEL_NAME = "openai/whisper-large-v2"
 BATCH_SIZE = 8
     elif (microphone is None) and (file_upload is None):
         raise gr.Error("You have to either use the microphone or upload an audio file")
+    file_size_mb = os.stat(file_upload).st_size / (1024 * 1024)
     if file_size_mb > FILE_LIMIT_MB:
         raise gr.Error(
                 f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
     return HTML_str
+def yt_transcribe(yt_url, task, max_filesize=FILE_LIMIT_MB):
+    with yt_dlp.YoutubeDL({'format': 'bestaudio/best'}) as ydl:
         try:
+            info_dict = ydl.extract_info(yt_url, download=True)
+            a = ydl.prepare_filename(info_dict)
+        except Exception as e:
+            raise gr.Error(f"Error downloading YouTube video: {str(e)}")
+    html_embed_str = _return_yt_html_embed(yt_url)
+    if os.stat(a).st_size / (1024 * 1024) > max_filesize:
+        raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {os.stat(a).st_size / (1024 * 1024):.2f}MB.")
     pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
+    text = pipe(a, batch_size=BATCH_SIZE)["text"]
+    os.remove(a)
     return html_embed_str, text
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
+demo.launch(enable_queue=True)