Kit-Lemonfoot
commited on
Upload app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from infer import get_net_g, infer
|
|
15 |
from tools.log import logger
|
16 |
|
17 |
is_hf_spaces = os.getenv("SYSTEM") == "spaces"
|
18 |
-
limit =
|
19 |
|
20 |
|
21 |
class Model:
|
@@ -186,9 +186,11 @@ class ModelHolder:
|
|
186 |
device=self.device,
|
187 |
)
|
188 |
styles = list(self.current_model.style2id.keys())
|
|
|
189 |
return (
|
190 |
gr.Dropdown(choices=styles, value=styles[0]),
|
191 |
-
gr.update(interactive=True, value="
|
|
|
192 |
)
|
193 |
|
194 |
def update_model_files_dropdown(self, model_name):
|
@@ -207,6 +209,8 @@ class ModelHolder:
|
|
207 |
|
208 |
|
209 |
def tts_fn(
|
|
|
|
|
210 |
text,
|
211 |
language,
|
212 |
reference_audio_path,
|
@@ -221,25 +225,32 @@ def tts_fn(
|
|
221 |
use_style_text,
|
222 |
emotion,
|
223 |
emotion_weight,
|
|
|
224 |
):
|
225 |
-
|
226 |
-
|
227 |
-
logger.info(f"
|
228 |
-
logger.info(f"
|
229 |
-
logger.info(f"
|
|
|
|
|
230 |
|
231 |
if is_hf_spaces and len(text) > limit:
|
232 |
-
|
233 |
-
# raise Exception(f"文字数が{limit}文字を超えています")
|
234 |
-
return f"文字数が{limit}文字を超えています", (44100, "")
|
235 |
|
236 |
assert model_holder.current_model is not None
|
237 |
|
|
|
|
|
|
|
|
|
|
|
238 |
start_time = datetime.datetime.now()
|
239 |
|
240 |
sr, audio = model_holder.current_model.infer(
|
241 |
text=text,
|
242 |
language=language,
|
|
|
243 |
reference_audio_path=reference_audio_path,
|
244 |
sdp_ratio=sdp_ratio,
|
245 |
noise=noise_scale,
|
@@ -256,112 +267,41 @@ def tts_fn(
|
|
256 |
|
257 |
end_time = datetime.datetime.now()
|
258 |
duration = (end_time - start_time).total_seconds()
|
259 |
-
logger.info(f"
|
260 |
return f"Success, time: {duration} seconds.", (sr, audio)
|
261 |
|
262 |
|
263 |
-
initial_text = "
|
264 |
-
|
265 |
-
example_local = [
|
266 |
-
[initial_text, "JP"],
|
267 |
-
[
|
268 |
-
"""あなたがそんなこと言うなんて、私はとっても嬉しい。
|
269 |
-
あなたがそんなこと言うなんて、私はとっても怒ってる。
|
270 |
-
あなたがそんなこと言うなんて、私はとっても驚いてる。
|
271 |
-
あなたがそんなこと言うなんて、私はとっても辛い。""",
|
272 |
-
"JP",
|
273 |
-
],
|
274 |
-
[ # ChatGPTに考えてもらった告白セリフ
|
275 |
-
"""私、ずっと前からあなたのことを見てきました。あなたの笑顔、優しさ、強さに、心惹かれていたんです。
|
276 |
-
友達として過ごす中で、あなたのことがだんだんと特別な存在になっていくのがわかりました。
|
277 |
-
えっと、私、あなたのことが好きです!もしよければ、私と付き合ってくれませんか?""",
|
278 |
-
"JP",
|
279 |
-
],
|
280 |
-
[ # 夏目漱石『吾輩は猫である』
|
281 |
-
"""吾輩は猫である。名前はまだ無い。
|
282 |
-
どこで生れたかとんと見当がつかぬ。なんでも薄暗いじめじめした所でニャーニャー泣いていた事だけは記憶している。
|
283 |
-
吾輩はここで始めて人間というものを見た。しかもあとで聞くと、それは書生という、人間中で一番獰悪な種族であったそうだ。
|
284 |
-
この書生というのは時々我々を捕まえて煮て食うという話である。""",
|
285 |
-
"JP",
|
286 |
-
],
|
287 |
-
[ # 梶井基次郎『桜の樹の下には』
|
288 |
-
"""桜の樹の下には屍体が埋まっている!これは信じていいことなんだよ。
|
289 |
-
何故って、桜の花があんなにも見事に咲くなんて信じられないことじゃないか。俺はあの美しさが信じられないので、このにさんにち不安だった。
|
290 |
-
しかしいま、��っとわかるときが来た。桜の樹の下には屍体が埋まっている。これは信じていいことだ。""",
|
291 |
-
"JP",
|
292 |
-
],
|
293 |
-
[ # ChatGPTと考えた、感情を表すセリフ
|
294 |
-
"""やったー!テストで満点取れた!私とっても嬉しいな!
|
295 |
-
どうして私の意見を無視するの?許せない!ムカつく!あんたなんか死ねばいいのに。
|
296 |
-
あはははっ!この漫画めっちゃ笑える、見てよこれ、ふふふ、あはは。
|
297 |
-
あなたがいなくなって、私は一人になっちゃって、泣いちゃいそうなほど悲しい。""",
|
298 |
-
"JP",
|
299 |
-
],
|
300 |
-
[ # 上の丁寧語バージョン
|
301 |
-
"""やりました!テストで満点取れましたよ!私とっても嬉しいです!
|
302 |
-
どうして私の意見を無視するんですか?許せません!ムカつきます!あんたなんか死んでください。
|
303 |
-
あはははっ!この漫画めっちゃ笑えます、見てくださいこれ、ふふふ、あはは。
|
304 |
-
あなたがいなくなって、私は一人になっちゃって、泣いちゃいそうなほど悲しいです。""",
|
305 |
-
"JP",
|
306 |
-
],
|
307 |
-
[ # ChatGPTに考えてもらった音声合成の説明文章
|
308 |
-
"""音声合成は、機械学習を活用して、テキストから人の声を再現する技術です。この技術は、言語の構造を解析し、それに基づいて音声を生成します。
|
309 |
-
この分野の最新の研究成果を使うと、より自然で表現豊かな音声の生成が可能である。深層学習の応用により、感情やアクセントを含む声質の微妙な変化も再現することが出来る。""",
|
310 |
-
"JP",
|
311 |
-
],
|
312 |
-
[
|
313 |
-
"Speech synthesis is the artificial production of human speech. A computer system used for this purpose is called a speech synthesizer, and can be implemented in software or hardware products.",
|
314 |
-
"EN",
|
315 |
-
],
|
316 |
-
["语音合成是人工制造人类语音。用于此目的的计算机系统称为语音合成器,可以通过软件或硬件产品实现。", "ZH"],
|
317 |
-
]
|
318 |
-
|
319 |
-
example_hf_spaces = [
|
320 |
-
[initial_text, "JP"],
|
321 |
-
["えっと、私、あなたのことが好きです!もしよければ付き合ってくれませんか?", "JP"],
|
322 |
-
["吾輩は猫である。名前はまだ無い。", "JP"],
|
323 |
-
["桜の樹の下には屍体が埋まっている!これは信じていいことなんだよ。", "JP"],
|
324 |
-
["やったー!テストで満点取れたよ!私とっても嬉しいな!", "JP"],
|
325 |
-
["どうして私の意見を無視するの?許せない!ムカつく!あんたなんか死ねばいいのに。", "JP"],
|
326 |
-
["あはははっ!この漫画めっちゃ笑える、見てよこれ、ふふふ、あはは。", "JP"],
|
327 |
-
["あなたがいなくなって、私は一人になっちゃって、泣いちゃいそうなほど悲しい。", "JP"],
|
328 |
-
["深層学習の応用により、感情やアクセントを含む声質の微妙な変化も再現されている。", "JP"],
|
329 |
-
[
|
330 |
-
"Speech synthesis is the artificial production of human speech.",
|
331 |
-
"EN",
|
332 |
-
],
|
333 |
-
["语音合成是人工制造人类语音。用于此目的的计算机系统称为语音合成器,可以通过软件或硬件产品实现。", "ZH"],
|
334 |
-
]
|
335 |
|
336 |
initial_md = """
|
337 |
-
#
|
338 |
-
|
339 |
-
|
|
|
340 |
|
341 |
-
|
342 |
|
343 |
-
このデモでは[jvnvのモデル](https://huggingface.co/litagin/style_bert_vits2_jvnv)を使っており、[JVNVコーパス(言語音声と非言語音声を持つ日本語感情音声コーパス)](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus)で学習されたモデルです。
|
344 |
"""
|
345 |
|
346 |
style_md = """
|
347 |
-
-
|
348 |
-
-
|
349 |
-
-
|
350 |
-
-
|
351 |
-
-
|
352 |
"""
|
353 |
|
354 |
|
355 |
def make_interactive():
|
356 |
-
return gr.update(interactive=True, value="
|
357 |
|
358 |
|
359 |
def make_non_interactive():
|
360 |
-
return gr.update(interactive=False, value="
|
361 |
|
362 |
|
363 |
def gr_util(item):
|
364 |
-
if item == "
|
365 |
return (gr.update(visible=True), gr.Audio(visible=False, value=None))
|
366 |
else:
|
367 |
return (gr.update(visible=False), gr.update(visible=True))
|
@@ -383,46 +323,46 @@ if __name__ == "__main__":
|
|
383 |
|
384 |
model_holder = ModelHolder(model_dir, device)
|
385 |
|
386 |
-
languages = ["
|
387 |
-
examples = example_hf_spaces if is_hf_spaces else example_local
|
388 |
|
389 |
model_names = model_holder.model_names
|
390 |
if len(model_names) == 0:
|
391 |
-
logger.error(f"
|
392 |
sys.exit(1)
|
393 |
initial_id = 0
|
394 |
initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
|
395 |
|
396 |
-
with gr.Blocks(theme="
|
397 |
gr.Markdown(initial_md)
|
398 |
with gr.Row():
|
399 |
with gr.Column():
|
400 |
with gr.Row():
|
401 |
with gr.Column(scale=3):
|
402 |
model_name = gr.Dropdown(
|
403 |
-
label="
|
404 |
choices=model_names,
|
405 |
value=model_names[initial_id],
|
406 |
)
|
407 |
model_path = gr.Dropdown(
|
408 |
-
label="
|
409 |
choices=initial_pth_files,
|
410 |
value=initial_pth_files[0],
|
411 |
)
|
412 |
-
refresh_button = gr.Button("
|
413 |
-
load_button = gr.Button("
|
414 |
-
text_input = gr.TextArea(label="
|
415 |
|
416 |
-
line_split = gr.Checkbox(label="
|
417 |
split_interval = gr.Slider(
|
418 |
minimum=0.0,
|
419 |
maximum=2,
|
420 |
value=0.5,
|
421 |
step=0.1,
|
422 |
-
label="
|
423 |
)
|
424 |
-
language = gr.Dropdown(choices=languages, value="
|
425 |
-
|
|
|
426 |
sdp_ratio = gr.Slider(
|
427 |
minimum=0, maximum=1, value=0.2, step=0.1, label="SDP Ratio"
|
428 |
)
|
@@ -435,11 +375,11 @@ if __name__ == "__main__":
|
|
435 |
length_scale = gr.Slider(
|
436 |
minimum=0.1, maximum=2, value=1.0, step=0.1, label="Length"
|
437 |
)
|
438 |
-
use_style_text = gr.Checkbox(label="
|
439 |
style_text = gr.Textbox(
|
440 |
label="Style text",
|
441 |
-
placeholder="
|
442 |
-
info="
|
443 |
visible=False,
|
444 |
)
|
445 |
style_text_weight = gr.Slider(
|
@@ -447,7 +387,7 @@ if __name__ == "__main__":
|
|
447 |
maximum=1,
|
448 |
value=0.7,
|
449 |
step=0.1,
|
450 |
-
label="
|
451 |
visible=False,
|
452 |
)
|
453 |
use_style_text.change(
|
@@ -456,37 +396,37 @@ if __name__ == "__main__":
|
|
456 |
outputs=[style_text, style_text_weight],
|
457 |
)
|
458 |
with gr.Column():
|
459 |
-
with gr.Accordion("
|
460 |
gr.Markdown(style_md)
|
461 |
style_mode = gr.Radio(
|
462 |
-
["
|
463 |
-
label="
|
464 |
-
value="
|
465 |
)
|
466 |
style = gr.Dropdown(
|
467 |
-
label="
|
468 |
-
choices=["
|
469 |
-
value="
|
470 |
)
|
471 |
style_weight = gr.Slider(
|
472 |
minimum=0,
|
473 |
maximum=50,
|
474 |
value=5,
|
475 |
step=0.1,
|
476 |
-
label="
|
477 |
)
|
478 |
-
ref_audio_path = gr.Audio(label="
|
479 |
tts_button = gr.Button(
|
480 |
-
"
|
481 |
)
|
482 |
-
text_output = gr.Textbox(label="
|
483 |
-
audio_output = gr.Audio(label="
|
484 |
-
with gr.Accordion("テキスト例", open=False):
|
485 |
-
gr.Examples(examples, inputs=[text_input, language])
|
486 |
|
487 |
tts_button.click(
|
488 |
tts_fn,
|
489 |
inputs=[
|
|
|
|
|
490 |
text_input,
|
491 |
language,
|
492 |
ref_audio_path,
|
@@ -501,6 +441,7 @@ if __name__ == "__main__":
|
|
501 |
use_style_text,
|
502 |
style,
|
503 |
style_weight,
|
|
|
504 |
],
|
505 |
outputs=[text_output, audio_output],
|
506 |
)
|
@@ -521,7 +462,7 @@ if __name__ == "__main__":
|
|
521 |
load_button.click(
|
522 |
model_holder.load_model,
|
523 |
inputs=[model_name, model_path],
|
524 |
-
outputs=[style, tts_button],
|
525 |
)
|
526 |
|
527 |
style_mode.change(
|
|
|
15 |
from tools.log import logger
|
16 |
|
17 |
is_hf_spaces = os.getenv("SYSTEM") == "spaces"
|
18 |
+
limit = 150
|
19 |
|
20 |
|
21 |
class Model:
|
|
|
186 |
device=self.device,
|
187 |
)
|
188 |
styles = list(self.current_model.style2id.keys())
|
189 |
+
speakers = list(self.current_model.spk2id.keys())
|
190 |
return (
|
191 |
gr.Dropdown(choices=styles, value=styles[0]),
|
192 |
+
gr.update(interactive=True, value="Synthesize"),
|
193 |
+
gr.Dropdown(choices=speakers, value=speakers[0]),
|
194 |
)
|
195 |
|
196 |
def update_model_files_dropdown(self, model_name):
|
|
|
209 |
|
210 |
|
211 |
def tts_fn(
|
212 |
+
model_name,
|
213 |
+
model_path,
|
214 |
text,
|
215 |
language,
|
216 |
reference_audio_path,
|
|
|
225 |
use_style_text,
|
226 |
emotion,
|
227 |
emotion_weight,
|
228 |
+
speaker,
|
229 |
):
|
230 |
+
if not text:
|
231 |
+
return "Please enter some text.", (44100, None)
|
232 |
+
#logger.info(f"Start TTS with {language}:\n{text}")
|
233 |
+
#logger.info(f"Model: {model_holder.current_model.model_path}")
|
234 |
+
#logger.info(f"SDP: {sdp_ratio}, Noise: {noise_scale}, Noise_W: {noise_scale_w}, Length: {length_scale}")
|
235 |
+
#logger.info(f"Style text enabled: {use_style_text}, Style text: {style_text}, Style weight: {style_weight}")
|
236 |
+
#logger.info(f"Style: {emotion}, Style weight: {emotion_weight}")
|
237 |
|
238 |
if is_hf_spaces and len(text) > limit:
|
239 |
+
return f"Too long! There is a character limit of {limit} characters.", (44100, None)
|
|
|
|
|
240 |
|
241 |
assert model_holder.current_model is not None
|
242 |
|
243 |
+
if(model_holder.current_model.model_path != model_path):
|
244 |
+
model_holder.load_model(model_name, model_path)
|
245 |
+
|
246 |
+
speaker_id = model_holder.current_model.spk2id[speaker]
|
247 |
+
|
248 |
start_time = datetime.datetime.now()
|
249 |
|
250 |
sr, audio = model_holder.current_model.infer(
|
251 |
text=text,
|
252 |
language=language,
|
253 |
+
sid=speaker_id,
|
254 |
reference_audio_path=reference_audio_path,
|
255 |
sdp_ratio=sdp_ratio,
|
256 |
noise=noise_scale,
|
|
|
267 |
|
268 |
end_time = datetime.datetime.now()
|
269 |
duration = (end_time - start_time).total_seconds()
|
270 |
+
logger.info(f"Successful inference, took {duration}s | {speaker} | {sdp_ratio}/{noise_scale}/{noise_scale_w}/{length_scale} | {text}")
|
271 |
return f"Success, time: {duration} seconds.", (sr, audio)
|
272 |
|
273 |
|
274 |
+
initial_text = "Hi there! How are you doing?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
initial_md = """
|
277 |
+
# LemonfootSBV2 😊🍋
|
278 |
+
### Space by [Kit Lemonfoot](https://huggingface.co/Kit-Lemonfoot) / [Noel Shirogane's High Flying Birds](https://www.youtube.com/channel/UCG9A0OJsJTluLOXfMZjJ9xA)
|
279 |
+
### Based on code originally by [fishaudio](https://github.com/fishaudio) and [litagin02](https://github.com/litagin02)
|
280 |
+
This HuggingFace space is designed to demonstrate multiple experimental [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2) models made by Kit Lemonfoot.
|
281 |
|
282 |
+
Do no evil.
|
283 |
|
|
|
284 |
"""
|
285 |
|
286 |
style_md = """
|
287 |
+
- You can control things like voice tone, emotion, and reading style through presets or through voice files.
|
288 |
+
- Neutral acts as an average across all speakers. Styling options act as an override to Neutral.
|
289 |
+
- Setting the intensity too high will likely break the output.
|
290 |
+
- The required intensity will depend based on the speaker and the desired style.
|
291 |
+
- If you're using preexisting audio data to style the output, try to use a voice that is similar to the desired speaker.
|
292 |
"""
|
293 |
|
294 |
|
295 |
def make_interactive():
|
296 |
+
return gr.update(interactive=True, value="Synthesize")
|
297 |
|
298 |
|
299 |
def make_non_interactive():
|
300 |
+
return gr.update(interactive=False, value="Synthesize (Please load a model!)")
|
301 |
|
302 |
|
303 |
def gr_util(item):
|
304 |
+
if item == "Select from presets":
|
305 |
return (gr.update(visible=True), gr.Audio(visible=False, value=None))
|
306 |
else:
|
307 |
return (gr.update(visible=False), gr.update(visible=True))
|
|
|
323 |
|
324 |
model_holder = ModelHolder(model_dir, device)
|
325 |
|
326 |
+
languages = ["EN", "JP", "ZH"]
|
|
|
327 |
|
328 |
model_names = model_holder.model_names
|
329 |
if len(model_names) == 0:
|
330 |
+
logger.error(f"No models found. Please place the model in {model_dir}.")
|
331 |
sys.exit(1)
|
332 |
initial_id = 0
|
333 |
initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
|
334 |
|
335 |
+
with gr.Blocks(theme=gr.themes.Base(primary_hue="emerald", secondary_hue="green"), title="LemonfootSBV2") as app:
|
336 |
gr.Markdown(initial_md)
|
337 |
with gr.Row():
|
338 |
with gr.Column():
|
339 |
with gr.Row():
|
340 |
with gr.Column(scale=3):
|
341 |
model_name = gr.Dropdown(
|
342 |
+
label="Available Models",
|
343 |
choices=model_names,
|
344 |
value=model_names[initial_id],
|
345 |
)
|
346 |
model_path = gr.Dropdown(
|
347 |
+
label="Model File",
|
348 |
choices=initial_pth_files,
|
349 |
value=initial_pth_files[0],
|
350 |
)
|
351 |
+
refresh_button = gr.Button("Refresh", scale=1, visible=not is_hf_spaces)
|
352 |
+
load_button = gr.Button("Load", scale=1, variant="primary")
|
353 |
+
text_input = gr.TextArea(label="Text", value=initial_text)
|
354 |
|
355 |
+
line_split = gr.Checkbox(label="Divide text seperately by line breaks", value=True)
|
356 |
split_interval = gr.Slider(
|
357 |
minimum=0.0,
|
358 |
maximum=2,
|
359 |
value=0.5,
|
360 |
step=0.1,
|
361 |
+
label="Length of division seperation time (in seconds)",
|
362 |
)
|
363 |
+
language = gr.Dropdown(choices=languages, value="EN", label="Language")
|
364 |
+
speaker = gr.Dropdown(label="Speaker")
|
365 |
+
with gr.Accordion(label="Advanced Settings", open=False):
|
366 |
sdp_ratio = gr.Slider(
|
367 |
minimum=0, maximum=1, value=0.2, step=0.1, label="SDP Ratio"
|
368 |
)
|
|
|
375 |
length_scale = gr.Slider(
|
376 |
minimum=0.1, maximum=2, value=1.0, step=0.1, label="Length"
|
377 |
)
|
378 |
+
use_style_text = gr.Checkbox(label="Use stylization text", value=False)
|
379 |
style_text = gr.Textbox(
|
380 |
label="Style text",
|
381 |
+
placeholder="Why are you ignoring me? You're unforgivable and disgusting! I hope you die.",
|
382 |
+
info="The voice will be similar in tone and emotion to the text, however inflection and tempo may be worse as a result.",
|
383 |
visible=False,
|
384 |
)
|
385 |
style_text_weight = gr.Slider(
|
|
|
387 |
maximum=1,
|
388 |
value=0.7,
|
389 |
step=0.1,
|
390 |
+
label="Text stylization strength",
|
391 |
visible=False,
|
392 |
)
|
393 |
use_style_text.change(
|
|
|
396 |
outputs=[style_text, style_text_weight],
|
397 |
)
|
398 |
with gr.Column():
|
399 |
+
with gr.Accordion("Styling Guide", open=False):
|
400 |
gr.Markdown(style_md)
|
401 |
style_mode = gr.Radio(
|
402 |
+
["Select from presets", "Use an audio file"],
|
403 |
+
label="Style Specification",
|
404 |
+
value="Select from presets",
|
405 |
)
|
406 |
style = gr.Dropdown(
|
407 |
+
label="Current style (Neutral is an average style)",
|
408 |
+
choices=["Please load a model first!"],
|
409 |
+
value="Please load a model first!",
|
410 |
)
|
411 |
style_weight = gr.Slider(
|
412 |
minimum=0,
|
413 |
maximum=50,
|
414 |
value=5,
|
415 |
step=0.1,
|
416 |
+
label="Style strength",
|
417 |
)
|
418 |
+
ref_audio_path = gr.Audio(label="Reference Audio", type="filepath", visible=False)
|
419 |
tts_button = gr.Button(
|
420 |
+
"Synthesize (Please load a model!)", variant="primary", interactive=False
|
421 |
)
|
422 |
+
text_output = gr.Textbox(label="Info")
|
423 |
+
audio_output = gr.Audio(label="Result")
|
|
|
|
|
424 |
|
425 |
tts_button.click(
|
426 |
tts_fn,
|
427 |
inputs=[
|
428 |
+
model_name,
|
429 |
+
model_path,
|
430 |
text_input,
|
431 |
language,
|
432 |
ref_audio_path,
|
|
|
441 |
use_style_text,
|
442 |
style,
|
443 |
style_weight,
|
444 |
+
speaker,
|
445 |
],
|
446 |
outputs=[text_output, audio_output],
|
447 |
)
|
|
|
462 |
load_button.click(
|
463 |
model_holder.load_model,
|
464 |
inputs=[model_name, model_path],
|
465 |
+
outputs=[style, tts_button, speaker],
|
466 |
)
|
467 |
|
468 |
style_mode.change(
|