k1ngtai commited on
Commit
5361747
1 Parent(s): 4f3218b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -47
app.py CHANGED
@@ -1,54 +1,41 @@
1
  import gradio as gr
2
  import librosa
3
- from asr import transcribe
4
- from tts import synthesize, TTS_EXAMPLES
5
-
6
- ALL_LANGUAGES = {}
7
-
8
- for task in ["asr", "tts", "lid"]:
9
- ALL_LANGUAGES.setdefault(task, {})
10
- with open(f"data/{task}/all_langs.tsv") as f:
11
- for line in f:
12
- iso, name = line.split(" ", 1)
13
- ALL_LANGUAGES[task][iso] = name
14
-
15
-
16
- def identify(microphone, file_upload):
17
- LID_SAMPLING_RATE = 16_000
18
-
19
- warn_output = ""
20
- if (microphone is not None) and (file_upload is not None):
21
- warn_output = (
22
- "WARNING: You've uploaded an audio file and used the microphone. "
23
- "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
24
- )
25
-
26
- elif (microphone is None) and (file_upload is None):
27
- return "ERROR: You have to either use the microphone or upload an audio file"
28
-
29
- audio_fp = microphone if microphone is not None else file_upload
30
- inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
31
-
32
- raw_output = {"eng": 0.9, "hin": 0.04, "heb": 0.03, "ara": 0.02, "fra": 0.01}
33
- return {(k + ": " + ALL_LANGUAGES["lid"][k]): v for k, v in raw_output.items()}
34
 
35
 
36
  demo = gr.Blocks()
37
 
 
 
 
 
 
 
 
 
 
38
  mms_transcribe = gr.Interface(
39
  fn=transcribe,
40
  inputs=[
41
- gr.Audio(source="microphone", type="filepath"),
42
- gr.Audio(source="upload", type="filepath"),
 
43
  gr.Dropdown(
44
- [f"{k}: {v}" for k, v in ALL_LANGUAGES["asr"].items()],
45
  label="Language",
46
- value="shn: Shan",
47
  ),
 
48
  ],
49
  outputs="text",
 
50
  title="Speech-to-text",
51
- description=("Transcribe audio!"),
 
 
 
52
  allow_flagging="never",
53
  )
54
 
@@ -57,9 +44,9 @@ mms_synthesize = gr.Interface(
57
  inputs=[
58
  gr.Text(label="Input text"),
59
  gr.Dropdown(
60
- [f"{k}: {v}" for k, v in ALL_LANGUAGES["tts"].items()],
61
  label="Language",
62
- value="shn: Shan",
63
  ),
64
  gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
65
  ],
@@ -69,26 +56,77 @@ mms_synthesize = gr.Interface(
69
  ],
70
  examples=TTS_EXAMPLES,
71
  title="Text-to-speech",
72
- description=("Generate audio!"),
73
  allow_flagging="never",
74
  )
75
 
 
 
 
 
 
 
 
 
 
76
  mms_identify = gr.Interface(
77
  fn=identify,
78
  inputs=[
79
- gr.Audio(source="microphone", type="filepath"),
80
- gr.Audio(source="upload", type="filepath"),
 
81
  ],
82
  outputs=gr.Label(num_top_classes=10),
 
83
  title="Language Identification",
84
- description=("Identity the language of audio!"),
85
  allow_flagging="never",
86
  )
87
 
88
- with demo:
89
- gr.TabbedInterface(
90
- [mms_transcribe, mms_synthesize, mms_identify],
91
- ["Speech-to-text", "Text-to-speech", "Language Identification"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- demo.launch()
 
 
1
  import gradio as gr
2
  import librosa
3
+ from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
4
+ from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
5
+ from lid import identify, LID_EXAMPLES
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  demo = gr.Blocks()
9
 
10
+ mms_select_source_trans = gr.Radio(
11
+ ["Record from Mic", "Upload audio"],
12
+ label="Audio input",
13
+ value="Record from Mic",
14
+ )
15
+ mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
16
+ mms_upload_source_trans = gr.Audio(
17
+ source="upload", type="filepath", label="Upload file", visible=False
18
+ )
19
  mms_transcribe = gr.Interface(
20
  fn=transcribe,
21
  inputs=[
22
+ mms_select_source_trans,
23
+ mms_mic_source_trans,
24
+ mms_upload_source_trans,
25
  gr.Dropdown(
26
+ [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
27
  label="Language",
28
+ value="eng English",
29
  ),
30
+ # gr.Checkbox(label="Use Language Model (if available)", default=True),
31
  ],
32
  outputs="text",
33
+ examples=ASR_EXAMPLES,
34
  title="Speech-to-text",
35
+ description=(
36
+ "Transcribe audio from a microphone or input file in your desired language."
37
+ ),
38
+ article=ASR_NOTE,
39
  allow_flagging="never",
40
  )
41
 
 
44
  inputs=[
45
  gr.Text(label="Input text"),
46
  gr.Dropdown(
47
+ [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
48
  label="Language",
49
+ value="eng English",
50
  ),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
52
  ],
 
56
  ],
57
  examples=TTS_EXAMPLES,
58
  title="Text-to-speech",
59
+ description=("Generate audio in your desired language from input text."),
60
  allow_flagging="never",
61
  )
62
 
63
+ mms_select_source_iden = gr.Radio(
64
+ ["Record from Mic", "Upload audio"],
65
+ label="Audio input",
66
+ value="Record from Mic",
67
+ )
68
+ mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
69
+ mms_upload_source_iden = gr.Audio(
70
+ source="upload", type="filepath", label="Upload file", visible=False
71
+ )
72
  mms_identify = gr.Interface(
73
  fn=identify,
74
  inputs=[
75
+ mms_select_source_iden,
76
+ mms_mic_source_iden,
77
+ mms_upload_source_iden,
78
  ],
79
  outputs=gr.Label(num_top_classes=10),
80
+ examples=LID_EXAMPLES,
81
  title="Language Identification",
82
+ description=("Identity the language of input audio."),
83
  allow_flagging="never",
84
  )
85
 
86
+ tabbed_interface = gr.TabbedInterface(
87
+ [mms_transcribe, mms_synthesize, mms_identify],
88
+ ["Speech-to-text", "Text-to-speech", "Language Identification"],
89
+ )
90
+
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown(
93
+ "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
94
+ )
95
+ gr.HTML(
96
+ """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos. </center>"""
97
+ )
98
+ gr.HTML(
99
+ """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
100
+ )
101
+
102
+ tabbed_interface.render()
103
+ mms_select_source_trans.change(
104
+ lambda x: [
105
+ gr.update(visible=True if x == "Record from Mic" else False),
106
+ gr.update(visible=True if x == "Upload audio" else False),
107
+ ],
108
+ inputs=[mms_select_source_trans],
109
+ outputs=[mms_mic_source_trans, mms_upload_source_trans],
110
+ queue=False,
111
  )
112
+ mms_select_source_iden.change(
113
+ lambda x: [
114
+ gr.update(visible=True if x == "Record from Mic" else False),
115
+ gr.update(visible=True if x == "Upload audio" else False),
116
+ ],
117
+ inputs=[mms_select_source_iden],
118
+ outputs=[mms_mic_source_iden, mms_upload_source_iden],
119
+ queue=False,
120
+ )
121
+ gr.HTML(
122
+ """
123
+ <div class="footer" style="text-align:center">
124
+ <p>
125
+ Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
126
+ </p>
127
+ </div>
128
+ """
129
+ )
130
 
131
+ demo.queue(concurrency_count=3)
132
+ demo.launch()