nithinraok commited on
Commit
50262ab
β€’
1 Parent(s): ede25a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from nemo.collections.asr.models import EncDecRNNTBPEModel
2
  import yt_dlp as youtube_dl
3
  import os
4
  import tempfile
@@ -7,10 +7,10 @@ import gradio as gr
7
  from pydub import AudioSegment
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
- MODEL_NAME="nvidia/parakeet-rnnt-1.1b"
11
  YT_LENGTH_LIMIT_S=3600
12
 
13
- model = EncDecRNNTBPEModel.from_pretrained(model_name=MODEL_NAME).to(device)
14
  model.eval()
15
 
16
  def get_transcripts(audio_path):
@@ -19,8 +19,8 @@ def get_transcripts(audio_path):
19
 
20
  article = (
21
  "<p style='text-align: center'>"
22
- "<a href='https://huggingface.co/nvidia/parakeet-rnnt-1.1b' target='_blank'>πŸŽ™οΈ Learn more about Parakeet model</a> | "
23
- "<a href='https://arxiv.org/abs/2305.05084' target='_blank'>πŸ“š FastConformer paper</a> | "
24
  "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>πŸ§‘β€πŸ’» Repository</a>"
25
  "</p>"
26
  )
@@ -92,11 +92,11 @@ mf_transcribe = gr.Interface(
92
  ],
93
  outputs="text",
94
  theme="huggingface",
95
- title="Parakeet RNNT 1.1B: Transcribe Audio",
96
  description=(
97
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
98
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
99
- " of arbitrary length."
100
  ),
101
  allow_flagging="never",
102
  )
@@ -108,11 +108,11 @@ file_transcribe = gr.Interface(
108
  ],
109
  outputs="text",
110
  theme="huggingface",
111
- title="Parakeet RNNT 1.1B: Transcribe Audio",
112
  description=(
113
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
114
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
115
- " of arbitrary length."
116
  ),
117
  allow_flagging="never",
118
  )
@@ -124,11 +124,11 @@ youtube_transcribe = gr.Interface(
124
  ],
125
  outputs=["html", "text"],
126
  theme="huggingface",
127
- title="Parakeet RNNT 1.1B: Transcribe Audio",
128
  description=(
129
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
130
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
131
- " of arbitrary length."
132
  ),
133
  allow_flagging="never",
134
  )
 
1
+ from nemo.collections.asr.models import ASRModel
2
  import yt_dlp as youtube_dl
3
  import os
4
  import tempfile
 
7
  from pydub import AudioSegment
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ MODEL_NAME="nvidia/parakeet-tdt-1.1b"
11
  YT_LENGTH_LIMIT_S=3600
12
 
13
+ model = ASRModel.from_pretrained(model_name=MODEL_NAME).to(device)
14
  model.eval()
15
 
16
  def get_transcripts(audio_path):
 
19
 
20
  article = (
21
  "<p style='text-align: center'>"
22
+ "<a href='https://huggingface.co/nvidia/parakeet-tdt-1.1b' target='_blank'>πŸŽ™οΈ Learn more about Parakeet TDT model</a> | "
23
+ "<a href='https://arxiv.org/abs/2304.06795' target='_blank'>πŸ“š TDT ICML paper</a> | "
24
  "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>πŸ§‘β€πŸ’» Repository</a>"
25
  "</p>"
26
  )
 
92
  ],
93
  outputs="text",
94
  theme="huggingface",
95
+ title="Parakeet TDT 1.1B: Transcribe Audio",
96
  description=(
97
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
98
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
99
+ " of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
100
  ),
101
  allow_flagging="never",
102
  )
 
108
  ],
109
  outputs="text",
110
  theme="huggingface",
111
+ title="Parakeet TDT 1.1B: Transcribe Audio",
112
  description=(
113
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
114
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
115
+ " of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
116
  ),
117
  allow_flagging="never",
118
  )
 
124
  ],
125
  outputs=["html", "text"],
126
  theme="huggingface",
127
+ title="Parakeet TDT 1.1B: Transcribe Audio",
128
  description=(
129
  "Transcribe microphone or audio inputs with the click of a button! Demo uses the"
130
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
131
+ " of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
132
  ),
133
  allow_flagging="never",
134
  )