almncarlo commited on
Commit
5d407ad
β€’
1 Parent(s): c099276

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -33
app.py CHANGED
@@ -7,6 +7,7 @@ import tempfile
7
  import uuid
8
 
9
  import torch
 
10
 
11
  from nemo.collections.asr.models import ASRModel
12
  from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
@@ -149,23 +150,6 @@ def on_src_or_tgt_lang_change(src_lang_value, tgt_lang_value, pnc_value):
149
  chosen "values" of each Gradio component
150
  Returns:
151
  src_lang, tgt_lang, pnc - these are the new Gradio components that will be displayed
152
-
153
- Note: I found the required logic is easier to understand if you think about the possible src & tgt langs as
154
- a matrix, e.g. with English, Spanish, French, German as the langs, and only transcription in the same language,
155
- and X -> English and English -> X translation being allowed, the matrix looks like the diagram below ("Y" means it is
156
- allowed to go into that state).
157
- It is easier to understand the code if you think about which state you are in, given the current src_lang_value and
158
- tgt_lang_value, and then which states you can go to from there.
159
- tgt lang
160
- - |EN |ES |FR |DE
161
- ------------------
162
- EN| Y | Y | Y | Y
163
- ------------------
164
- src ES| Y | Y | |
165
- lang ------------------
166
- FR| Y | | Y |
167
- ------------------
168
- DE| Y | | | Y
169
  """
170
 
171
  if src_lang_value == "English" and tgt_lang_value == "English":
@@ -238,7 +222,7 @@ def on_src_or_tgt_lang_change(src_lang_value, tgt_lang_value, pnc_value):
238
 
239
 
240
  with gr.Blocks(
241
- title="NeMo Canary Model",
242
  css="""
243
  textarea { font-size: 18px;}
244
  #model_output_text_box span {
@@ -249,21 +233,17 @@ with gr.Blocks(
249
  theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
250
  ) as demo:
251
 
252
- gr.HTML("<h1 style='text-align: center'>NeMo Canary model: Transcribe & Translate audio</h1>")
253
 
254
  with gr.Row():
255
  with gr.Column():
256
  gr.HTML(
257
- "<p><b>Step 1:</b> Upload an audio file or record with your microphone.</p>"
258
-
259
- "<p style='color: #A0A0A0;'>This demo supports audio files up to 10 mins long. "
260
- "You can transcribe longer files locally with this NeMo "
261
- "<a href='https://github.com/NVIDIA/NeMo/blob/main/examples/asr/speech_multitask/speech_to_text_aed_chunked_infer.py'>script</a>.</p>"
262
  )
263
 
264
  audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
265
 
266
- gr.HTML("<p><b>Step 2:</b> Choose the input and output language.</p>")
267
 
268
  src_lang = gr.Dropdown(
269
  choices=["English", "Spanish", "French", "German"],
@@ -300,14 +280,6 @@ with gr.Blocks(
300
  # elem_id="llm_output_text_box",
301
  # )
302
 
303
- with gr.Row():
304
-
305
- gr.HTML(
306
- "<p style='text-align: center'>"
307
- "🐀 <a href='https://huggingface.co/nvidia/canary-1b' target='_blank'>Canary model</a> | "
308
- "πŸ§‘β€πŸ’» <a href='https://github.com/NVIDIA/NeMo' target='_blank'>NeMo Repository</a>"
309
- "</p>"
310
- )
311
 
312
  go_button.click(
313
  fn=transcribe,
 
7
  import uuid
8
 
9
  import torch
10
+ import tranformers
11
 
12
  from nemo.collections.asr.models import ASRModel
13
  from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
 
150
  chosen "values" of each Gradio component
151
  Returns:
152
  src_lang, tgt_lang, pnc - these are the new Gradio components that will be displayed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  """
154
 
155
  if src_lang_value == "English" and tgt_lang_value == "English":
 
222
 
223
 
224
  with gr.Blocks(
225
+ title="MyAlexa",
226
  css="""
227
  textarea { font-size: 18px;}
228
  #model_output_text_box span {
 
233
  theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
234
  ) as demo:
235
 
236
+ gr.HTML("<h1 style='text-align: center'>MyAlexa</h1>")
237
 
238
  with gr.Row():
239
  with gr.Column():
240
  gr.HTML(
241
+ "<p>Upload an audio file or record with your microphone.</p>"
 
 
 
 
242
  )
243
 
244
  audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
245
 
246
+ gr.HTML("<p>Choose the input and output language.</p>")
247
 
248
  src_lang = gr.Dropdown(
249
  choices=["English", "Spanish", "French", "German"],
 
280
  # elem_id="llm_output_text_box",
281
  # )
282
 
 
 
 
 
 
 
 
 
283
 
284
  go_button.click(
285
  fn=transcribe,