nithinraok commited on
Commit
ce8a201
1 Parent(s): 5ffda4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -1,15 +1,20 @@
1
  from nemo.collections.asr.models import EncDecRNNTBPEModel
 
 
 
 
2
  import gradio as gr
3
  from pydub import AudioSegment
4
 
5
  device = "cuda" if torch.cuda.is_available() else "cpu"
6
  MODEL_NAME="nvidia/parakeet-rnnt-1.1b"
 
7
 
 
 
8
 
9
- def get_transcripts(audio_path):
10
 
11
- model = EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-rnnt-1.1b").to(device)
12
- model.eval()
13
  text = model.transcribe([audio_path])[0][0]
14
  return text
15
 
@@ -65,7 +70,7 @@ def download_yt_audio(yt_url, filename):
65
  raise gr.Error(str(err))
66
 
67
 
68
- def yt_transcribe(yt_url, task, max_filesize=75.0):
69
  html_embed_str = _return_yt_html_embed(yt_url)
70
 
71
  with tempfile.TemporaryDirectory() as tmpdirname:
@@ -82,12 +87,11 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
82
  demo = gr.Blocks()
83
 
84
  mf_transcribe = gr.Interface(
85
- fn=transcribe,
86
  inputs=[
87
- gr.inputs.Audio(source="microphone", type="filepath", optional=True)
88
  ],
89
  outputs="text",
90
- layout="horizontal",
91
  theme="huggingface",
92
  title="Parakeet RNNT 1.1B: Transcribe Audio",
93
  description=(
@@ -99,12 +103,11 @@ mf_transcribe = gr.Interface(
99
  )
100
 
101
  file_transcribe = gr.Interface(
102
- fn=transcribe,
103
  inputs=[
104
- gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
105
  ],
106
  outputs="text",
107
- layout="horizontal",
108
  theme="huggingface",
109
  title="Parakeet RNNT 1.1B: Transcribe Audio",
110
  description=(
@@ -115,13 +118,12 @@ file_transcribe = gr.Interface(
115
  allow_flagging="never",
116
  )
117
 
118
- yt_transcribe = gr.Interface(
119
  fn=yt_transcribe,
120
  inputs=[
121
- gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
122
  ],
123
  outputs=["html", "text"],
124
- layout="horizontal",
125
  theme="huggingface",
126
  title="Parakeet RNNT 1.1B: Transcribe Audio",
127
  description=(
@@ -135,4 +137,4 @@ yt_transcribe = gr.Interface(
135
  with demo:
136
  gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
137
 
138
- demo.launch(enable_queue=True)
 
1
  from nemo.collections.asr.models import EncDecRNNTBPEModel
2
+ import yt_dlp as youtube_dl
3
+ import os
4
+ import tempfile
5
+ import torch
6
  import gradio as gr
7
  from pydub import AudioSegment
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
  MODEL_NAME="nvidia/parakeet-rnnt-1.1b"
11
+ YT_LENGTH_LIMIT_S=3600
12
 
13
+ model = EncDecRNNTBPEModel.from_pretrained(model_name=MODEL_NAME).to(device)
14
+ model.eval()
15
 
 
16
 
17
+ def get_transcripts(audio_path):
 
18
  text = model.transcribe([audio_path])[0][0]
19
  return text
20
 
 
70
  raise gr.Error(str(err))
71
 
72
 
73
+ def yt_transcribe(yt_url, max_filesize=75.0):
74
  html_embed_str = _return_yt_html_embed(yt_url)
75
 
76
  with tempfile.TemporaryDirectory() as tmpdirname:
 
87
  demo = gr.Blocks()
88
 
89
  mf_transcribe = gr.Interface(
90
+ fn=get_transcripts,
91
  inputs=[
92
+ gr.Audio(sources="microphone", type="filepath")
93
  ],
94
  outputs="text",
 
95
  theme="huggingface",
96
  title="Parakeet RNNT 1.1B: Transcribe Audio",
97
  description=(
 
103
  )
104
 
105
  file_transcribe = gr.Interface(
106
+ fn=get_transcripts,
107
  inputs=[
108
+ gr.Audio(sources="upload", type="filepath", label="Audio file"),
109
  ],
110
  outputs="text",
 
111
  theme="huggingface",
112
  title="Parakeet RNNT 1.1B: Transcribe Audio",
113
  description=(
 
118
  allow_flagging="never",
119
  )
120
 
121
+ youtube_transcribe = gr.Interface(
122
  fn=yt_transcribe,
123
  inputs=[
124
+ gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
125
  ],
126
  outputs=["html", "text"],
 
127
  theme="huggingface",
128
  title="Parakeet RNNT 1.1B: Transcribe Audio",
129
  description=(
 
137
  with demo:
138
  gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
139
 
140
+ demo.launch()