Jesuscarr commited on
Commit
8748207
1 Parent(s): b97a3c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -25
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import torch
2
-
3
  import gradio as gr
4
  import yt_dlp as youtube_dl
5
  from transformers import pipeline
6
  from transformers.pipelines.audio_utils import ffmpeg_read
7
-
8
  import tempfile
9
  import os
10
 
@@ -22,7 +20,6 @@ pipe = pipeline(
22
  device=device,
23
  )
24
 
25
-
26
  def transcribe(microphone, file_upload, task):
27
  warn_output = ""
28
  if (microphone is not None) and (file_upload is not None):
@@ -34,19 +31,18 @@ def transcribe(microphone, file_upload, task):
34
  elif (microphone is None) and (file_upload is None):
35
  raise gr.Error("You have to either use the microphone or upload an audio file")
36
 
37
- file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
 
38
  if file_size_mb > FILE_LIMIT_MB:
39
  raise gr.Error(
40
- f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
41
  )
42
 
43
- file = microphone if microphone is not None else file_upload
44
-
45
- text = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task})["text"]
46
 
47
  return warn_output + text
48
 
49
-
50
  def _return_yt_html_embed(yt_url):
51
  video_id = yt_url.split("?v=")[-1]
52
  HTML_str = (
@@ -61,18 +57,9 @@ def download_yt_audio(yt_url, filename):
61
  info = info_loader.extract_info(yt_url, download=False)
62
  except youtube_dl.utils.DownloadError as err:
63
  raise gr.Error(str(err))
64
- file_length = info["duration_string"]
65
- file_h_m_s = file_length.split(":")
66
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
67
- if len(file_h_m_s) == 1:
68
- file_h_m_s.insert(0, 0)
69
- if len(file_h_m_s) == 2:
70
- file_h_m_s.insert(0, 0)
71
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
72
  if file_length_s > YT_LENGTH_LIMIT_S:
73
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
74
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
75
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
76
  ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
77
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
78
  try:
@@ -80,7 +67,6 @@ def download_yt_audio(yt_url, filename):
80
  except youtube_dl.utils.ExtractorError as err:
81
  raise gr.Error(str(err))
82
 
83
-
84
  def yt_transcribe(yt_url, task, max_filesize=75.0):
85
  html_embed_str = _return_yt_html_embed(yt_url)
86
 
@@ -93,11 +79,10 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
93
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
94
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
95
 
96
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task})["text"]
97
 
98
  return html_embed_str, text
99
 
100
-
101
  demo = gr.Blocks()
102
 
103
  mf_transcribe = gr.Interface(
@@ -140,5 +125,4 @@ yt_transcribe = gr.Interface(
140
  with demo:
141
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
142
 
143
- demo.launch(enable_queue=True)
144
-
 
1
  import torch
 
2
  import gradio as gr
3
  import yt_dlp as youtube_dl
4
  from transformers import pipeline
5
  from transformers.pipelines.audio_utils import ffmpeg_read
 
6
  import tempfile
7
  import os
8
 
 
20
  device=device,
21
  )
22
 
 
23
  def transcribe(microphone, file_upload, task):
24
  warn_output = ""
25
  if (microphone is not None) and (file_upload is not None):
 
31
  elif (microphone is None) and (file_upload is None):
32
  raise gr.Error("You have to either use the microphone or upload an audio file")
33
 
34
+ file = microphone if microphone is not None else file_upload
35
+ file_size_mb = os.stat(file).st_size / (1024 * 1024)
36
  if file_size_mb > FILE_LIMIT_MB:
37
  raise gr.Error(
38
+ f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
39
  )
40
 
41
+ text = pipe(file, batch_size=BATCH_SIZE,
42
+ generate_kwargs={"task": task})["text"]
 
43
 
44
  return warn_output + text
45
 
 
46
  def _return_yt_html_embed(yt_url):
47
  video_id = yt_url.split("?v=")[-1]
48
  HTML_str = (
 
57
  info = info_loader.extract_info(yt_url, download=False)
58
  except youtube_dl.utils.DownloadError as err:
59
  raise gr.Error(str(err))
60
+ file_length_s = info.get('duration', 0)
 
 
 
 
 
 
 
61
  if file_length_s > YT_LENGTH_LIMIT_S:
62
+ raise gr.Error(f"Maximum YouTube length is {YT_LENGTH_LIMIT_S} seconds, got {file_length_s} seconds YouTube video.")
 
 
63
  ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
64
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
65
  try:
 
67
  except youtube_dl.utils.ExtractorError as err:
68
  raise gr.Error(str(err))
69
 
 
70
  def yt_transcribe(yt_url, task, max_filesize=75.0):
71
  html_embed_str = _return_yt_html_embed(yt_url)
72
 
 
79
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
80
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
81
 
82
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task})["text"]
83
 
84
  return html_embed_str, text
85
 
 
86
  demo = gr.Blocks()
87
 
88
  mf_transcribe = gr.Interface(
 
125
  with demo:
126
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
127
 
128
+ demo.launch(enable_queue=True)