DHEIVER commited on
Commit
5b5f8d9
1 Parent(s): bfd6986

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -157
app.py CHANGED
@@ -2,214 +2,81 @@ import logging
2
  import os
3
  import time
4
  import uuid
5
-
6
  import gradio as gr
7
  import soundfile as sf
8
-
9
  from model import get_pretrained_model, language_to_models
10
 
11
  title = "# Next-gen Kaldi: Text-to-speech (TTS)"
12
 
13
  description = """
14
  This space shows how to convert text to speech with Next-gen Kaldi.
15
-
16
  It is running on CPU within a docker container provided by Hugging Face.
17
-
18
  See more information by visiting the following links:
19
-
20
  - <https://github.com/k2-fsa/sherpa-onnx>
21
-
22
- If you want to deploy it locally, please see
23
- <https://k2-fsa.github.io/sherpa/>
24
-
25
- If you want to use Android APKs, please see
26
- <https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
27
-
28
- If you want to use Android text-to-speech engine APKs, please see
29
- <https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
30
-
31
- If you want to download an all-in-one exe for Windows, please see
32
- <https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
33
  """
34
 
35
- css = """
36
- .result {display:flex;flex-direction:column}
37
- .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
38
- .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
39
- .result_item_error {background-color:#ff7070;color:white;align-self:start}
40
- """
41
 
42
- # Simplified examples for Portuguese only
43
- examples = [
44
- ["Portuguese", "csukuangfj/vits-mms-por", "Eu desejo uma versão simplificada para português.", 0, 1.0],
45
- ]
46
 
47
- # Use only Portuguese as a language choice
48
  language_choices = ["Portuguese"]
49
 
50
- def update_model_dropdown(language: str):
51
- if language in language_to_models:
52
- choices = language_to_models[language]
53
- return gr.Dropdown(
54
- choices=choices,
55
- value=choices[0],
56
- interactive=True,
57
- )
58
-
59
- raise ValueError(f"Unsupported language: {language}")
60
-
61
-
62
- def build_html_output(s: str, style: str = "result_item_success"):
63
- return f"""
64
- <div class='result'>
65
- <div class='result_item {style}'>
66
- {s}
67
- </div>
68
- </div>
69
- """
70
 
 
 
71
 
72
- def process(language: str, repo_id: str, text: str, sid: str, speed: float):
73
  logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
74
  sid = int(sid)
75
  tts = get_pretrained_model(repo_id, speed)
76
-
77
  start = time.time()
78
  audio = tts.generate(text, sid=sid)
79
  end = time.time()
80
-
81
  if len(audio.samples) == 0:
82
- raise ValueError(
83
- "Error in generating audios. Please read previous error messages."
84
- )
85
-
86
  duration = len(audio.samples) / audio.sample_rate
87
-
88
  elapsed_seconds = end - start
89
  rtf = elapsed_seconds / duration
90
-
91
- info = f"""
92
- Wave duration : {duration:.3f} s <br/>
93
- Processing time: {elapsed_seconds:.3f} s <br/>
94
- RTF: {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f} <br/>
95
- """
96
-
97
  logging.info(info)
98
  logging.info(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
99
-
100
- filename = str(uuid.uuid4())
101
- filename = f"{filename}.wav"
102
- sf.write(
103
- filename,
104
- audio.samples,
105
- samplerate=audio.sample_rate,
106
- subtype="PCM_16",
107
- )
108
-
109
  return filename, build_html_output(info)
110
 
111
-
112
  demo = gr.Blocks(css=css)
113
 
114
  with demo:
115
  gr.Markdown(title)
116
-
117
- # Use Radio instead of Dropdown for language choice
118
- language_radio = gr.Radio(
119
- label="Language",
120
- choices=language_choices,
121
- value=language_choices[0],
122
- )
123
-
124
- # Initialize model_dropdown with Portuguese models
125
- model_dropdown = gr.Dropdown(
126
- choices=language_to_models["Portuguese"],
127
- label="Select a model",
128
- value=language_to_models["Portuguese"][0],
129
- )
130
-
131
- # No need to update model_dropdown for a single language
132
 
133
  with gr.Tabs():
134
  with gr.TabItem("Please input your text"):
135
- input_text = gr.Textbox(
136
- label="Input text",
137
- info="Your text",
138
- lines=3,
139
- placeholder="Please input your text here",
140
- )
141
-
142
- input_sid = gr.Textbox(
143
- label="Speaker ID",
144
- info="Speaker ID",
145
- lines=1,
146
- max_lines=1,
147
- value="0",
148
- placeholder="Speaker ID. Valid only for mult-speaker model",
149
- )
150
-
151
- input_speed = gr.Slider(
152
- minimum=0.1,
153
- maximum=10,
154
- value=1,
155
- step=0.1,
156
- label="Speed (larger->faster; smaller->slower)",
157
- )
158
-
159
  input_button = gr.Button("Submit")
160
-
161
  output_audio = gr.Audio(label="Output")
162
-
163
  output_info = gr.HTML(label="Info")
 
164
 
165
- gr.Examples(
166
- examples=examples,
167
- fn=process,
168
- inputs=[
169
- language_radio,
170
- model_dropdown,
171
- input_text,
172
- input_sid,
173
- input_speed,
174
- ],
175
- outputs=[
176
- output_audio,
177
- output_info,
178
- ],
179
- )
180
-
181
- input_button.click(
182
- process,
183
- inputs=[
184
- language_radio,
185
- model_dropdown,
186
- input_text,
187
- input_sid,
188
- input_speed,
189
- ],
190
- outputs=[
191
- output_audio,
192
- output_info,
193
- ],
194
- )
195
 
196
  gr.Markdown(description)
197
 
198
-
199
  def download_espeak_ng_data():
200
- os.system(
201
- """
202
- cd /tmp
203
- wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
204
- tar xf espeak-ng-data.tar.bz2
205
- """
206
- )
207
-
208
 
209
  if __name__ == "__main__":
210
  download_espeak_ng_data()
211
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
212
-
213
  logging.basicConfig(format=formatter, level=logging.INFO)
214
-
215
  demo.launch()
 
2
  import os
3
  import time
4
  import uuid
 
5
  import gradio as gr
6
  import soundfile as sf
 
7
  from model import get_pretrained_model, language_to_models
8
 
9
  title = "# Next-gen Kaldi: Text-to-speech (TTS)"
10
 
11
  description = """
12
  This space shows how to convert text to speech with Next-gen Kaldi.
 
13
  It is running on CPU within a docker container provided by Hugging Face.
 
14
  See more information by visiting the following links:
 
15
  - <https://github.com/k2-fsa/sherpa-onnx>
16
+ If you want to deploy it locally, please see <https://k2-fsa.github.io/sherpa/>
17
+ If you want to use Android APKs, please see <https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
18
+ If you want to use Android text-to-speech engine APKs, please see <https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
19
+ If you want to download an all-in-one exe for Windows, please see <https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
 
 
 
 
 
 
 
 
20
  """
21
 
22
+ css = """.result {display:flex;flex-direction:column}.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}.result_item_error {background-color:#ff7070;color:white;align-self:start}"""
 
 
 
 
 
23
 
24
+ examples = [["Portuguese", "csukuangfj/vits-mms-por", "Eu desejo uma versão simplificada para português.", 0, 1.0]]
 
 
 
25
 
 
26
  language_choices = ["Portuguese"]
27
 
28
+ def update_model_dropdown(language):
29
+ return gr.Dropdown(choices=language_to_models.get(language, []), value=language_to_models.get(language, [""])[0], interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def build_html_output(s, style="result_item_success"):
32
+ return f"""<div class='result'><div class='result_item {style}'>{s}</div></div>"""
33
 
34
+ def process(language, repo_id, text, sid, speed):
35
  logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
36
  sid = int(sid)
37
  tts = get_pretrained_model(repo_id, speed)
 
38
  start = time.time()
39
  audio = tts.generate(text, sid=sid)
40
  end = time.time()
 
41
  if len(audio.samples) == 0:
42
+ raise ValueError("Error in generating audios. Please read previous error messages.")
 
 
 
43
  duration = len(audio.samples) / audio.sample_rate
 
44
  elapsed_seconds = end - start
45
  rtf = elapsed_seconds / duration
46
+ info = f"""Wave duration : {duration:.3f} s <br/>Processing time: {elapsed_seconds:.3f} s <br/>RTF: {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f} <br/>"""
 
 
 
 
 
 
47
  logging.info(info)
48
  logging.info(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
49
+ filename = str(uuid.uuid4()) + ".wav"
50
+ sf.write(filename, audio.samples, samplerate=audio.sample_rate, subtype="PCM_16")
 
 
 
 
 
 
 
 
51
  return filename, build_html_output(info)
52
 
 
53
  demo = gr.Blocks(css=css)
54
 
55
  with demo:
56
  gr.Markdown(title)
57
+ language_radio = gr.Radio(label="Language", choices=language_choices, value=language_choices[0])
58
+ model_dropdown = gr.Dropdown(choices=language_to_models["Portuguese"], label="Select a model", value=language_to_models["Portuguese"][0])
59
+ language_radio.change(update_model_dropdown, inputs=language_radio, outputs=model_dropdown)
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  with gr.Tabs():
62
  with gr.TabItem("Please input your text"):
63
+ input_text = gr.Textbox(label="Input text", info="Your text", lines=3, placeholder="Please input your text here")
64
+ input_sid = gr.Textbox(label="Speaker ID", info="Speaker ID", lines=1, max_lines=1, value="0", placeholder="Speaker ID. Valid only for mult-speaker model")
65
+ input_speed = gr.Slider(minimum=0.1, maximum=10, value=1, step=0.1, label="Speed (larger->faster; smaller->slower)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  input_button = gr.Button("Submit")
 
67
  output_audio = gr.Audio(label="Output")
 
68
  output_info = gr.HTML(label="Info")
69
+ gr.Examples(examples=examples, fn=process, inputs=[language_radio, model_dropdown, input_text, input_sid, input_speed], outputs=[output_audio, output_info])
70
 
71
+ input_button.click(process, inputs=[language_radio, model_dropdown, input_text, input_sid, input_speed], outputs=[output_audio, output_info])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  gr.Markdown(description)
74
 
 
75
  def download_espeak_ng_data():
76
+ os.system("""cd /tmp; wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2; tar xf espeak-ng-data.tar.bz2""")
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
  download_espeak_ng_data()
80
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
 
81
  logging.basicConfig(format=formatter, level=logging.INFO)
 
82
  demo.launch()