reach-vb HF staff hysts HF staff commited on
Commit
ee0444e
1 Parent(s): 5f23a53

fix type annotation and add ngram filtering (#7)

Browse files

- Fix type annotation and add ngram_filtering (412664701c7f9488addf9005fd9a5a81a0d0179c)


Co-authored-by: hysts <hysts@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +38 -20
app.py CHANGED
@@ -1,6 +1,7 @@
 
 
1
  import os
2
 
3
- from typing import Union
4
  import gradio as gr
5
  import numpy as np
6
  import torch
@@ -49,14 +50,14 @@ translator = Translator(
49
  def predict(
50
  task_name: str,
51
  audio_source: str,
52
- input_audio_mic: Union[str, None],
53
- input_audio_file: Union[str, None],
54
- input_text: Union[str, None],
55
- source_language: Union[str, None],
56
  target_language: str,
57
- ) -> tuple[Union[tuple[int, np.ndarray], None], str]:
58
  task_name = task_name.split()[0]
59
- source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
60
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
61
 
62
  if task_name in ["S2ST", "S2TT", "ASR"]:
@@ -79,6 +80,7 @@ def predict(
79
  task_str=task_name,
80
  tgt_lang=target_language_code,
81
  src_lang=source_language_code,
 
82
  )
83
  if task_name in ["S2ST", "T2ST"]:
84
  return (sr, wav.cpu().detach().numpy()), text_out
@@ -86,7 +88,7 @@ def predict(
86
  return None, text_out
87
 
88
 
89
- def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
90
  return predict(
91
  task_name="S2ST",
92
  audio_source="file",
@@ -98,7 +100,7 @@ def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[s
98
  )
99
 
100
 
101
- def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
102
  return predict(
103
  task_name="S2TT",
104
  audio_source="file",
@@ -110,7 +112,9 @@ def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[s
110
  )
111
 
112
 
113
- def process_t2st_example(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
 
 
114
  return predict(
115
  task_name="T2ST",
116
  audio_source="",
@@ -122,7 +126,9 @@ def process_t2st_example(input_text: str, source_language: str, target_language:
122
  )
123
 
124
 
125
- def process_t2tt_example(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
 
 
126
  return predict(
127
  task_name="T2TT",
128
  audio_source="",
@@ -134,7 +140,7 @@ def process_t2tt_example(input_text: str, source_language: str, target_language:
134
  )
135
 
136
 
137
- def process_asr_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
138
  return predict(
139
  task_name="ASR",
140
  audio_source="file",
@@ -317,10 +323,16 @@ with gr.Blocks(css="style.css") as demo:
317
  examples=[
318
  ["My favorite animal is the elephant.", "English", "French"],
319
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
320
- ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
321
- "English", "Hindi"],
322
- ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
323
- "English", "Spanish"],
 
 
 
 
 
 
324
  ],
325
  inputs=[input_text, source_language, target_language],
326
  outputs=[output_audio, output_text],
@@ -332,10 +344,16 @@ with gr.Blocks(css="style.css") as demo:
332
  examples=[
333
  ["My favorite animal is the elephant.", "English", "French"],
334
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
335
- ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
336
- "English", "Hindi"],
337
- ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
338
- "English", "Spanish"],
 
 
 
 
 
 
339
  ],
340
  inputs=[input_text, source_language, target_language],
341
  outputs=[output_audio, output_text],
 
1
+ from __future__ import annotations
2
+
3
  import os
4
 
 
5
  import gradio as gr
6
  import numpy as np
7
  import torch
 
50
  def predict(
51
  task_name: str,
52
  audio_source: str,
53
+ input_audio_mic: str | None,
54
+ input_audio_file: str | None,
55
+ input_text: str | None,
56
+ source_language: str | None,
57
  target_language: str,
58
+ ) -> tuple[tuple[int, np.ndarray] | None, str]:
59
  task_name = task_name.split()[0]
60
+ source_language_code = LANGUAGE_NAME_TO_CODE[source_language] if source_language else None
61
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
62
 
63
  if task_name in ["S2ST", "S2TT", "ASR"]:
 
80
  task_str=task_name,
81
  tgt_lang=target_language_code,
82
  src_lang=source_language_code,
83
+ ngram_filtering=True,
84
  )
85
  if task_name in ["S2ST", "T2ST"]:
86
  return (sr, wav.cpu().detach().numpy()), text_out
 
88
  return None, text_out
89
 
90
 
91
+ def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
92
  return predict(
93
  task_name="S2ST",
94
  audio_source="file",
 
100
  )
101
 
102
 
103
+ def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
104
  return predict(
105
  task_name="S2TT",
106
  audio_source="file",
 
112
  )
113
 
114
 
115
+ def process_t2st_example(
116
+ input_text: str, source_language: str, target_language: str
117
+ ) -> tuple[tuple[int, np.ndarray] | None, str]:
118
  return predict(
119
  task_name="T2ST",
120
  audio_source="",
 
126
  )
127
 
128
 
129
+ def process_t2tt_example(
130
+ input_text: str, source_language: str, target_language: str
131
+ ) -> tuple[tuple[int, np.ndarray] | None, str]:
132
  return predict(
133
  task_name="T2TT",
134
  audio_source="",
 
140
  )
141
 
142
 
143
+ def process_asr_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
144
  return predict(
145
  task_name="ASR",
146
  audio_source="file",
 
323
  examples=[
324
  ["My favorite animal is the elephant.", "English", "French"],
325
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
326
+ [
327
+ "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
328
+ "English",
329
+ "Hindi",
330
+ ],
331
+ [
332
+ "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
333
+ "English",
334
+ "Spanish",
335
+ ],
336
  ],
337
  inputs=[input_text, source_language, target_language],
338
  outputs=[output_audio, output_text],
 
344
  examples=[
345
  ["My favorite animal is the elephant.", "English", "French"],
346
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
347
+ [
348
+ "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
349
+ "English",
350
+ "Hindi",
351
+ ],
352
+ [
353
+ "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
354
+ "English",
355
+ "Spanish",
356
+ ],
357
  ],
358
  inputs=[input_text, source_language, target_language],
359
  outputs=[output_audio, output_text],