alamin655 commited on
Commit
33ac9e4
1 Parent(s): 66efbc3

Add support for file size limits in audio and YouTube transcription, and use yt_dlp for video downloads

Browse files

This pull request adds support for handling file size limits when transcribing audio files and YouTube videos. The transcribe function has been modified to check the size of the uploaded file and raise an error if it exceeds the limit. Additionally, the yt_dlp library is now used to download YouTube videos and the file size is checked before transcription.

Files changed (1) hide show
  1. app.py +16 -22
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import torch
2
-
3
  import gradio as gr
4
- import pytube as pt
5
  from transformers import pipeline
 
6
 
7
  MODEL_NAME = "openai/whisper-large-v2"
8
  BATCH_SIZE = 8
@@ -35,7 +35,7 @@ def transcribe(microphone, file_upload, task):
35
  elif (microphone is None) and (file_upload is None):
36
  raise gr.Error("You have to either use the microphone or upload an audio file")
37
 
38
- file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
39
  if file_size_mb > FILE_LIMIT_MB:
40
  raise gr.Error(
41
  f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
@@ -59,25 +59,19 @@ def _return_yt_html_embed(yt_url):
59
  return HTML_str
60
 
61
 
62
- def yt_transcribe(yt_url, task, max_filesize=75.0):
63
- yt = pt.YouTube(yt_url)
64
- html_embed_str = _return_yt_html_embed(yt_url)
65
- for attempt in range(YT_ATTEMPT_LIMIT):
66
  try:
67
- yt = pytube.YouTube(yt_url)
68
- stream = yt.streams.filter(only_audio=True)[0]
69
- break
70
- except KeyError:
71
- if attempt + 1 == YT_ATTEMPT_LIMIT:
72
- raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
73
-
74
- if stream.filesize_mb > max_filesize:
75
- raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
76
-
77
  pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
78
-
79
- text = pipe("audio.mp3", batch_size=BATCH_SIZE)["text"]
80
-
81
  return html_embed_str, text
82
 
83
 
@@ -120,8 +114,8 @@ yt_transcribe = gr.Interface(
120
  allow_flagging="never",
121
  )
122
 
 
123
  with demo:
124
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
125
 
126
- demo.launch(enable_queue=True)
127
-
 
1
  import torch
 
2
  import gradio as gr
3
+ import yt_dlp
4
  from transformers import pipeline
5
+ import os
6
 
7
  MODEL_NAME = "openai/whisper-large-v2"
8
  BATCH_SIZE = 8
 
35
  elif (microphone is None) and (file_upload is None):
36
  raise gr.Error("You have to either use the microphone or upload an audio file")
37
 
38
+ file_size_mb = os.stat(file_upload).st_size / (1024 * 1024)
39
  if file_size_mb > FILE_LIMIT_MB:
40
  raise gr.Error(
41
  f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
 
59
  return HTML_str
60
 
61
 
62
+ def yt_transcribe(yt_url, task, max_filesize=FILE_LIMIT_MB):
63
+ with yt_dlp.YoutubeDL({'format': 'bestaudio/best'}) as ydl:
 
 
64
  try:
65
+ info_dict = ydl.extract_info(yt_url, download=True)
66
+ a = ydl.prepare_filename(info_dict)
67
+ except Exception as e:
68
+ raise gr.Error(f"Error downloading YouTube video: {str(e)}")
69
+ html_embed_str = _return_yt_html_embed(yt_url)
70
+ if os.stat(a).st_size / (1024 * 1024) > max_filesize:
71
+ raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {os.stat(a).st_size / (1024 * 1024):.2f}MB.")
 
 
 
72
  pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
73
+ text = pipe(a, batch_size=BATCH_SIZE)["text"]
74
+ os.remove(a)
 
75
  return html_embed_str, text
76
 
77
 
 
114
  allow_flagging="never",
115
  )
116
 
117
+
118
  with demo:
119
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
120
 
121
+ demo.launch(enable_queue=True)