Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import tempfile
|
|
7 |
import uuid
|
8 |
|
9 |
import torch
|
|
|
10 |
|
11 |
from nemo.collections.asr.models import ASRModel
|
12 |
from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
|
@@ -149,23 +150,6 @@ def on_src_or_tgt_lang_change(src_lang_value, tgt_lang_value, pnc_value):
|
|
149 |
chosen "values" of each Gradio component
|
150 |
Returns:
|
151 |
src_lang, tgt_lang, pnc - these are the new Gradio components that will be displayed
|
152 |
-
|
153 |
-
Note: I found the required logic is easier to understand if you think about the possible src & tgt langs as
|
154 |
-
a matrix, e.g. with English, Spanish, French, German as the langs, and only transcription in the same language,
|
155 |
-
and X -> English and English -> X translation being allowed, the matrix looks like the diagram below ("Y" means it is
|
156 |
-
allowed to go into that state).
|
157 |
-
It is easier to understand the code if you think about which state you are in, given the current src_lang_value and
|
158 |
-
tgt_lang_value, and then which states you can go to from there.
|
159 |
-
tgt lang
|
160 |
-
- |EN |ES |FR |DE
|
161 |
-
------------------
|
162 |
-
EN| Y | Y | Y | Y
|
163 |
-
------------------
|
164 |
-
src ES| Y | Y | |
|
165 |
-
lang ------------------
|
166 |
-
FR| Y | | Y |
|
167 |
-
------------------
|
168 |
-
DE| Y | | | Y
|
169 |
"""
|
170 |
|
171 |
if src_lang_value == "English" and tgt_lang_value == "English":
|
@@ -238,7 +222,7 @@ def on_src_or_tgt_lang_change(src_lang_value, tgt_lang_value, pnc_value):
|
|
238 |
|
239 |
|
240 |
with gr.Blocks(
|
241 |
-
title="
|
242 |
css="""
|
243 |
textarea { font-size: 18px;}
|
244 |
#model_output_text_box span {
|
@@ -249,21 +233,17 @@ with gr.Blocks(
|
|
249 |
theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
|
250 |
) as demo:
|
251 |
|
252 |
-
gr.HTML("<h1 style='text-align: center'>
|
253 |
|
254 |
with gr.Row():
|
255 |
with gr.Column():
|
256 |
gr.HTML(
|
257 |
-
"<p
|
258 |
-
|
259 |
-
"<p style='color: #A0A0A0;'>This demo supports audio files up to 10 mins long. "
|
260 |
-
"You can transcribe longer files locally with this NeMo "
|
261 |
-
"<a href='https://github.com/NVIDIA/NeMo/blob/main/examples/asr/speech_multitask/speech_to_text_aed_chunked_infer.py'>script</a>.</p>"
|
262 |
)
|
263 |
|
264 |
audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
|
265 |
|
266 |
-
gr.HTML("<p
|
267 |
|
268 |
src_lang = gr.Dropdown(
|
269 |
choices=["English", "Spanish", "French", "German"],
|
@@ -300,14 +280,6 @@ with gr.Blocks(
|
|
300 |
# elem_id="llm_output_text_box",
|
301 |
# )
|
302 |
|
303 |
-
with gr.Row():
|
304 |
-
|
305 |
-
gr.HTML(
|
306 |
-
"<p style='text-align: center'>"
|
307 |
-
"π€ <a href='https://huggingface.co/nvidia/canary-1b' target='_blank'>Canary model</a> | "
|
308 |
-
"π§βπ» <a href='https://github.com/NVIDIA/NeMo' target='_blank'>NeMo Repository</a>"
|
309 |
-
"</p>"
|
310 |
-
)
|
311 |
|
312 |
go_button.click(
|
313 |
fn=transcribe,
|
|
|
7 |
import uuid
|
8 |
|
9 |
import torch
|
10 |
+
import tranformers
|
11 |
|
12 |
from nemo.collections.asr.models import ASRModel
|
13 |
from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
|
|
|
150 |
chosen "values" of each Gradio component
|
151 |
Returns:
|
152 |
src_lang, tgt_lang, pnc - these are the new Gradio components that will be displayed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
"""
|
154 |
|
155 |
if src_lang_value == "English" and tgt_lang_value == "English":
|
|
|
222 |
|
223 |
|
224 |
with gr.Blocks(
|
225 |
+
title="MyAlexa",
|
226 |
css="""
|
227 |
textarea { font-size: 18px;}
|
228 |
#model_output_text_box span {
|
|
|
233 |
theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
|
234 |
) as demo:
|
235 |
|
236 |
+
gr.HTML("<h1 style='text-align: center'>MyAlexa</h1>")
|
237 |
|
238 |
with gr.Row():
|
239 |
with gr.Column():
|
240 |
gr.HTML(
|
241 |
+
"<p>Upload an audio file or record with your microphone.</p>"
|
|
|
|
|
|
|
|
|
242 |
)
|
243 |
|
244 |
audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
|
245 |
|
246 |
+
gr.HTML("<p>Choose the input and output language.</p>")
|
247 |
|
248 |
src_lang = gr.Dropdown(
|
249 |
choices=["English", "Spanish", "French", "German"],
|
|
|
280 |
# elem_id="llm_output_text_box",
|
281 |
# )
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
go_button.click(
|
285 |
fn=transcribe,
|