zhzluke96 commited on
Commit
bf13828
·
1 Parent(s): 00c033c
.env.webui CHANGED
@@ -18,3 +18,5 @@ MAX_BATCH_SIZE=12
18
 
19
  V_GIT_TAG=🤗hf
20
  V_GIT_COMMIT=main
 
 
 
18
 
19
  V_GIT_TAG=🤗hf
20
  V_GIT_COMMIT=main
21
+
22
+ LANGUAGE=zh-CN
language/en.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "TTS": "TTS",
3
+ "🎛️Sampling": "🎛️Sampling Configuration",
4
+ "Temperature": "Temperature",
5
+ "Top P": "Top P",
6
+ "Top K": "Top K",
7
+ "Batch Size": "Batch Size",
8
+ "🎭Style": "🎭Style",
9
+ "🗣️Speaker": "🗣️Voice",
10
+ "Pick": "Select",
11
+ "🎲": "🎲",
12
+ "Upload": "Upload",
13
+ "Speaker (Upload)": "Voice (Upload)",
14
+ "📝Speaker info": "📝Speaker Information",
15
+ "empty": "empty",
16
+ "💃Inference Seed": "💃Inference Seed",
17
+ "Inference Seed": "Inference Seed",
18
+ "Use Decoder": "Use Decoder",
19
+ "📝Text Input": "📝Text Input",
20
+ "[laugh]": "[laugh]",
21
+ "[uv_break]": "[uv_break]",
22
+ "[v_break]": "[v_break]",
23
+ "[lbreak]": "[lbreak]",
24
+ "🎄Examples": "🎄Examples",
25
+ "🎨Output": "🎨Output",
26
+ "Generated Audio": "Generated Audio",
27
+ "🎶Refiner": "🎶Refiner",
28
+ "✍️Refine Text": "✍️Refine Text",
29
+ "🔧Prompt engineering": "🔧Prompt Engineering",
30
+ "prompt_audio": "prompt_audio",
31
+ "🔊Generate": "🔊Generate",
32
+ "Disable Normalize": "Disable Normalize",
33
+ "💪🏼Enhance": "💪🏼Enhance",
34
+ "Enable Enhance": "Enable Enhance",
35
+ "Enable De-noise": "Enable De-noise",
36
+ "🔊Generate Audio": "🔊Generate Audio",
37
+ "SSML": "SSML",
38
+ "Editor": "Editor",
39
+ "📝SSML Input": "📝SSML Input",
40
+ "🔊Synthesize SSML": "🔊Synthesize SSML",
41
+ "🎛️Parameters": "🎛️Parameters",
42
+ "Spilter": "Splitter",
43
+ "🗣️Seed": "🗣️Seed",
44
+ "📩Send to SSML": "📩Send to Editor",
45
+ "📝Long Text Input": "📝Long Text Input",
46
+ "🔪Split Text": "🔪Split Text",
47
+ "Podcast": "Podcast",
48
+ "Add": "Add",
49
+ "Undo": "Undo",
50
+ "Clear": "Clear",
51
+ "📔Script": "📔Script",
52
+ "Speaker": "Voice",
53
+ "Creator": "Creator",
54
+ "ℹ️Speaker info": "ℹ️Speaker Information",
55
+ "Seed": "Seed",
56
+ "Random Speaker": "Random Voice",
57
+ "🔊Generate speaker.pt": "🔊Generate speaker.pt",
58
+ "Save .pt file": "Save .pt file",
59
+ "Save to File": "Save to File",
60
+ "🎤Test voice": "🎤Test Voice",
61
+ "Test Voice": "Test Voice",
62
+ "Current Seed": "Current Seed",
63
+ "Output Audio": "Output Audio",
64
+ "Merger": "Merger",
65
+ "🔄": "🔄",
66
+ "Weight A": "Weight A",
67
+ "Weight B": "Weight B",
68
+ "Weight C": "Weight C",
69
+ "Weight D": "Weight D",
70
+ "🗃️Save to file": "🗃️Save to File",
71
+ "Save Speaker": "Save Voice",
72
+ "Merged Speaker": "Merged Voice",
73
+ "Inpainting": "Inpainting",
74
+ "🚧 Under construction": "🚧 Under Construction",
75
+ "ASR": "ASR",
76
+ "System": "System",
77
+ "info": "info",
78
+ "Enable Experimental Features": "Enable Experimental Features",
79
+ "README": "README",
80
+ "readme": "readme",
81
+ "changelog": "changelog",
82
+ "TTS_STYLE_GUIDE": [
83
+ "Suffix _p indicates prompt, which has stronger effect but may impact quality."
84
+ ],
85
+ "SSML_SPLITER_GUIDE": [
86
+ "- Character limit details can be found in README. Excess will be truncated.",
87
+ "- If the last character is swallowed and not read, try adding `[lbreak]` at the end.",
88
+ "- If the text is all in English, it is recommended to disable text standardization."
89
+ ],
90
+ "SPEAKER_CREATOR_GUIDE": [
91
+ "### Speaker Creator",
92
+ "Use this panel to quickly draw cards to generate speaker.pt files.",
93
+ "",
94
+ "1. Generate Speaker: Enter a seed, name, gender, and description. Click the \"Generate speaker.pt\" button, and the generated speaker configuration will be saved as a .pt file.",
95
+ "2. Test Speaker Voice: Enter a test text. Click the \"Test Voice\" button, and the generated audio will play in the \"Output Audio\" section.",
96
+ "3. Randomly Generate Speaker: Click the \"Random Speaker\" button to randomly generate a seed and name, which can then be further edited and tested."
97
+ ],
98
+
99
+ "SSML_TEXT_GUIDE": [
100
+ "- Maximum {webui_config.ssml_max:,} characters. Excess will be truncated.",
101
+ "- For more information about SSML, refer to this [documentation](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
102
+ ],
103
+
104
+ "TTS_TEXT_GUIDE": [
105
+ "- Character limit. Excess will be truncated.",
106
+ "- If the last character is swallowed and not read, try adding `[lbreak]` at the end.",
107
+ "- If the input text is all in English, it is recommended to disable text standardization."
108
+ ],
109
+
110
+ "SPEAKER_MERGER_GUIDE": [
111
+ "### Speaker Merger",
112
+ "In this panel, you can select multiple speakers and specify their weights to synthesize a new voice and test it. Below are detailed explanations of each feature:",
113
+ "",
114
+ "1. Select Speakers: You can choose up to four speakers (A, B, C, D) from the dropdown menu, each with a corresponding weight slider ranging from 0 to 10. The weight determines the influence of each speaker on the synthesized voice.",
115
+ "2. Synthesize Voice: After selecting the speakers and setting the weights, you can input a test text in the \"Test Text\" box and click the \"Test Voice\" button to generate and play the synthesized voice.",
116
+ "3. Save Speaker: You can also fill in a new speaker's name, gender, and description in the \"Speaker Information\" section on the right, and click \"Save Speaker\" to save the synthesized voice. The saved speaker file will be displayed in the \"Merged Speaker\" section for download."
117
+ ]
118
+ }
language/zh-CN.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "TTS": "TTS",
3
+ "🎛️Sampling": "🎛️采样配置",
4
+ "Temperature": "温度",
5
+ "Top P": "Top P",
6
+ "Top K": "Top K",
7
+ "Batch Size": "批量大小",
8
+ "🎭Style": "🎭风格",
9
+ "🗣️Speaker": "🗣️音色",
10
+ "Pick": "选择",
11
+ "🎲": "🎲",
12
+ "Upload": "上传",
13
+ "Speaker (Upload)": "音色 (上传)",
14
+ "📝Speaker info": "📝音色信息",
15
+ "empty": "empty",
16
+ "💃Inference Seed": "💃推理种子",
17
+ "Inference Seed": "推理种子",
18
+ "Use Decoder": "使用解码器",
19
+ "📝Text Input": "📝文本输入",
20
+ "[laugh]": "[laugh]",
21
+ "[uv_break]": "[uv_break]",
22
+ "[v_break]": "[v_break]",
23
+ "[lbreak]": "[lbreak]",
24
+ "🎄Examples": "🎄示例",
25
+ "🎨Output": "🎨输出",
26
+ "Generated Audio": "生成的音频",
27
+ "🎶Refiner": "🎶优化器",
28
+ "✍️Refine Text": "✍️优化文本",
29
+ "🔧Prompt engineering": "🔧提示工程",
30
+ "prompt_audio": "提示音频",
31
+ "🔊Generate": "🔊生成",
32
+ "Disable Normalize": "禁用归一化",
33
+ "💪🏼Enhance": "💪🏼增强",
34
+ "Enable Enhance": "启用增强",
35
+ "Enable De-noise": "启用降噪",
36
+ "🔊Generate Audio": "🔊生成音频",
37
+ "SSML": "SSML",
38
+ "Editor": "编辑器",
39
+ "📝SSML Input": "📝SSML输入",
40
+ "🔊Synthesize SSML": "🔊合成SSML",
41
+ "🎛️Parameters": "🎛️参数",
42
+ "Spilter": "分割器",
43
+ "🗣️Seed": "🗣️种子",
44
+ "📩Send to SSML": "📩发送到Editor",
45
+ "📝Long Text Input": "📝长文本输入",
46
+ "🔪Split Text": "🔪分割文本",
47
+ "Podcast": "播客",
48
+ "Add": "添加",
49
+ "Undo": "撤销",
50
+ "Clear": "清除",
51
+ "📔Script": "📔脚本",
52
+ "Speaker": "音色",
53
+ "Creator": "创建者",
54
+ "ℹ️Speaker info": "ℹ️音色信息",
55
+ "Seed": "种子",
56
+ "Random Speaker": "随机音色",
57
+ "🔊Generate speaker.pt": "🔊生成 speaker.pt",
58
+ "Save .pt file": "保存.pt文件",
59
+ "Save to File": "保存到文件",
60
+ "🎤Test voice": "🎤测试语音",
61
+ "Test Voice": "测试语音",
62
+ "Current Seed": "当前种子",
63
+ "Output Audio": "输出音频",
64
+ "Merger": "融合",
65
+ "🔄": "🔄",
66
+ "Weight A": "权重A",
67
+ "Weight B": "权重B",
68
+ "Weight C": "权重C",
69
+ "Weight D": "权重D",
70
+ "🗃️Save to file": "🗃️保存到文件",
71
+ "Save Speaker": "保存音色",
72
+ "Merged Speaker": "融合的音色",
73
+ "Inpainting": "修复",
74
+ "🚧 Under construction": "🚧 施工中",
75
+ "ASR": "ASR",
76
+ "System": "系统",
77
+ "info": "信息",
78
+ "Enable Experimental Features": "启用实验性功能",
79
+ "README": "README",
80
+ "readme": "readme",
81
+ "changelog": "changelog",
82
+ "TTS_STYLE_GUIDE": ["后缀为 _p 表示带prompt,效果更强但是影响质量"],
83
+ "SSML_SPLITER_GUIDE": [
84
+ "- 字数限制详见README,超过部分将截断",
85
+ "- 如果尾字吞字不读,可以试试结尾加上 `[lbreak]`",
86
+ "- 如果文本为全英文,建议关闭文本标准化"
87
+ ],
88
+ "SPEAKER_CREATOR_GUIDE": [
89
+ "### Speaker Creator",
90
+ "使用本面板快捷抽卡生成 speaker.pt 文件。",
91
+ "",
92
+ "1. 生成说话人:输入种子、名字、性别和描述。点击 \"Generate speaker.pt\" 按钮,生成的说话人配置会保存为.pt文件。",
93
+ "2. 测试说话人声音:输入测试文本。点击 \"Test Voice\" 按钮,生成的音频会在 \"Output Audio\" 中播放。",
94
+ "3. 随机生成说话人:点击 \"Random Speaker\" 按钮,随机生成一个种子和名字,可以进一步编辑其他信息并测试。"
95
+ ],
96
+
97
+ "SSML_TEXT_GUIDE": [
98
+ "- 最长{webui_config.ssml_max:,}字符,超过会被截断",
99
+ "- 关于SSML可以看这个 [文档](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
100
+ ],
101
+
102
+ "TTS_TEXT_GUIDE": [
103
+ "- 字数限制,超过部分将截断",
104
+ "- 如果尾字吞字不读,可以试试结尾加上 `[lbreak]`",
105
+ "- If the input text is all in English, it is recommended to check disable_normalize"
106
+ ],
107
+
108
+ "SPEAKER_MERGER_GUIDE": [
109
+ "### Speaker Merger",
110
+ "在本面板中,您可以选择多个说话人并指定他们的权重,合成新的语音并进行测试。以下是各个功能的详细说明:",
111
+ "",
112
+ "1. 选择说话人: 您可以从下拉菜单中选择最多四个说话人(A、B、C、D),每个说话人都有一个对应的权重滑块,范围从0到10。权重决定了每个说话人在合成语音中的影响程度。",
113
+ "2. 合成语音: 在选择好说话人和设置好权重后,您可以在“Test Text”框中输入要测试的文本,然后点击“测试语音”按钮来生成并播放合成的语音。",
114
+ "3. 保存说话人: 您还可以在右侧的“说话人信息”部分填写新的说话人的名称、性别和描述,并点击“Save Speaker”按钮来保存合成的说话人。保存后的说话人文件将显示在“Merged Speaker”栏中,供下载使用。"
115
+ ]
116
+ }
modules/ChatTTS/ChatTTS/core.py CHANGED
@@ -1,13 +1,11 @@
1
  import os
2
  import logging
3
- from functools import partial
4
  from omegaconf import OmegaConf
5
 
6
  import torch
7
  from vocos import Vocos
8
  from .model.dvae import DVAE
9
  from .model.gpt import GPT_warpper
10
- from .utils.gpu_utils import select_device
11
  from .utils.infer_utils import (
12
  count_invalid_characters,
13
  detect_language,
@@ -107,9 +105,7 @@ class Chat:
107
  dtype_gpt: torch.dtype = None,
108
  dtype_decoder: torch.dtype = None,
109
  ):
110
- if not device:
111
- device = select_device(4096)
112
- self.logger.log(logging.INFO, f"use {device}")
113
 
114
  dtype_vocos = dtype_vocos or dtype
115
  dtype_dvae = dtype_dvae or dtype
@@ -179,8 +175,6 @@ class Chat:
179
  params_refine_text={},
180
  params_infer_code={"prompt": "[speed_5]"},
181
  use_decoder=True,
182
- do_text_normalization=True,
183
- lang=None,
184
  ):
185
 
186
  assert self.check_model(use_decoder=use_decoder)
@@ -188,14 +182,6 @@ class Chat:
188
  if not isinstance(text, list):
189
  text = [text]
190
 
191
- if do_text_normalization:
192
- for i, t in enumerate(text):
193
- _lang = detect_language(t) if lang is None else lang
194
- self.init_normalizer(_lang)
195
- text[i] = self.normalizer[_lang](t)
196
- if _lang == "zh":
197
- text[i] = apply_half2full_map(text[i])
198
-
199
  for i, t in enumerate(text):
200
  reserved_tokens = self.pretrain_models[
201
  "tokenizer"
@@ -251,8 +237,6 @@ class Chat:
251
  self,
252
  text,
253
  params_refine_text={},
254
- do_text_normalization=True,
255
- lang=None,
256
  ) -> str:
257
 
258
  # assert self.check_model(use_decoder=False)
@@ -260,14 +244,6 @@ class Chat:
260
  if not isinstance(text, list):
261
  text = [text]
262
 
263
- if do_text_normalization:
264
- for i, t in enumerate(text):
265
- _lang = detect_language(t) if lang is None else lang
266
- self.init_normalizer(_lang)
267
- text[i] = self.normalizer[_lang](t)
268
- if _lang == "zh":
269
- text[i] = apply_half2full_map(text[i])
270
-
271
  for i, t in enumerate(text):
272
  reserved_tokens = self.pretrain_models[
273
  "tokenizer"
@@ -305,7 +281,10 @@ class Chat:
305
  prompt = [params_infer_code.get("prompt", "") + i for i in prompt]
306
  params_infer_code.pop("prompt", "")
307
  result = infer_code(
308
- self.pretrain_models, prompt, **params_infer_code, return_hidden=use_decoder
 
 
 
309
  )
310
 
311
  if use_decoder:
@@ -326,37 +305,7 @@ class Chat:
326
  def sample_random_speaker(
327
  self,
328
  ) -> torch.Tensor:
329
-
330
  dim = self.pretrain_models["gpt"].gpt.layers[0].mlp.gate_proj.in_features
331
  std, mean = self.pretrain_models["spk_stat"].chunk(2)
332
  return torch.randn(dim, device=std.device) * std + mean
333
-
334
- def init_normalizer(self, lang):
335
-
336
- if lang not in self.normalizer:
337
- if lang == "zh":
338
- try:
339
- from tn.chinese.normalizer import Normalizer
340
- except:
341
- self.logger.log(
342
- logging.WARNING,
343
- f"Package WeTextProcessing not found! \
344
- Run: conda install -c conda-forge pynini=2.1.5 && pip install WeTextProcessing",
345
- )
346
- self.normalizer[lang] = Normalizer().normalize
347
- else:
348
- try:
349
- from nemo_text_processing.text_normalization.normalize import (
350
- Normalizer,
351
- )
352
- except:
353
- self.logger.log(
354
- logging.WARNING,
355
- f"Package nemo_text_processing not found! \
356
- Run: conda install -c conda-forge pynini=2.1.5 && pip install nemo_text_processing",
357
- )
358
- self.normalizer[lang] = partial(
359
- Normalizer(input_case="cased", lang=lang).normalize,
360
- verbose=False,
361
- punct_post_process=True,
362
- )
 
1
  import os
2
  import logging
 
3
  from omegaconf import OmegaConf
4
 
5
  import torch
6
  from vocos import Vocos
7
  from .model.dvae import DVAE
8
  from .model.gpt import GPT_warpper
 
9
  from .utils.infer_utils import (
10
  count_invalid_characters,
11
  detect_language,
 
105
  dtype_gpt: torch.dtype = None,
106
  dtype_decoder: torch.dtype = None,
107
  ):
108
+ assert device is not None, "device should not be None"
 
 
109
 
110
  dtype_vocos = dtype_vocos or dtype
111
  dtype_dvae = dtype_dvae or dtype
 
175
  params_refine_text={},
176
  params_infer_code={"prompt": "[speed_5]"},
177
  use_decoder=True,
 
 
178
  ):
179
 
180
  assert self.check_model(use_decoder=use_decoder)
 
182
  if not isinstance(text, list):
183
  text = [text]
184
 
 
 
 
 
 
 
 
 
185
  for i, t in enumerate(text):
186
  reserved_tokens = self.pretrain_models[
187
  "tokenizer"
 
237
  self,
238
  text,
239
  params_refine_text={},
 
 
240
  ) -> str:
241
 
242
  # assert self.check_model(use_decoder=False)
 
244
  if not isinstance(text, list):
245
  text = [text]
246
 
 
 
 
 
 
 
 
 
247
  for i, t in enumerate(text):
248
  reserved_tokens = self.pretrain_models[
249
  "tokenizer"
 
281
  prompt = [params_infer_code.get("prompt", "") + i for i in prompt]
282
  params_infer_code.pop("prompt", "")
283
  result = infer_code(
284
+ self.pretrain_models,
285
+ prompt,
286
+ return_hidden=use_decoder,
287
+ **params_infer_code,
288
  )
289
 
290
  if use_decoder:
 
305
  def sample_random_speaker(
306
  self,
307
  ) -> torch.Tensor:
308
+ assert self.pretrain_models["gpt"] is not None, "gpt model not loaded"
309
  dim = self.pretrain_models["gpt"].gpt.layers[0].mlp.gate_proj.in_features
310
  std, mean = self.pretrain_models["spk_stat"].chunk(2)
311
  return torch.randn(dim, device=std.device) * std + mean
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/webui/app.py CHANGED
@@ -5,9 +5,10 @@ import torch
5
  import gradio as gr
6
 
7
  from modules import config
8
- from modules.webui import webui_config
9
 
10
  from modules.webui.changelog_tab import create_changelog_tab
 
11
  from modules.webui.ssml.podcast_tab import create_ssml_podcast_tab
12
  from modules.webui.system_tab import create_system_tab
13
  from modules.webui.tts_tab import create_tts_interface
@@ -27,6 +28,11 @@ def webui_init():
27
  torch._dynamo.config.suppress_errors = True
28
  torch.set_float32_matmul_precision("high")
29
 
 
 
 
 
 
30
  logger.info("WebUI module initialized")
31
 
32
 
@@ -44,11 +50,13 @@ def create_app_footer():
44
  f"""
45
  🍦 [ChatTTS-Forge](https://github.com/lenML/ChatTTS-Forge)
46
  version: [{git_tag}](https://github.com/lenML/ChatTTS-Forge/commit/{git_commit}) | branch: `{git_branch}` | python: `{python_version}` | torch: `{torch_version}`
47
- """
 
48
  )
49
 
50
 
51
  def create_interface():
 
52
 
53
  js_func = """
54
  function refresh() {
@@ -117,4 +125,8 @@ def create_interface():
117
  create_changelog_tab()
118
 
119
  create_app_footer()
 
 
 
 
120
  return demo
 
5
  import gradio as gr
6
 
7
  from modules import config
8
+ from modules.webui import gradio_extensions, localization, webui_config, gradio_hijack
9
 
10
  from modules.webui.changelog_tab import create_changelog_tab
11
+ from modules.webui.localization_runtime import ENLocalizationVars, ZHLocalizationVars
12
  from modules.webui.ssml.podcast_tab import create_ssml_podcast_tab
13
  from modules.webui.system_tab import create_system_tab
14
  from modules.webui.tts_tab import create_tts_interface
 
28
  torch._dynamo.config.suppress_errors = True
29
  torch.set_float32_matmul_precision("high")
30
 
31
+ if config.runtime_env_vars.language == "en":
32
+ webui_config.localization = ENLocalizationVars()
33
+ else:
34
+ webui_config.localization = ZHLocalizationVars()
35
+
36
  logger.info("WebUI module initialized")
37
 
38
 
 
50
  f"""
51
  🍦 [ChatTTS-Forge](https://github.com/lenML/ChatTTS-Forge)
52
  version: [{git_tag}](https://github.com/lenML/ChatTTS-Forge/commit/{git_commit}) | branch: `{git_branch}` | python: `{python_version}` | torch: `{torch_version}`
53
+ """,
54
+ elem_classes=["no-translate"],
55
  )
56
 
57
 
58
  def create_interface():
59
+ gradio_extensions.reload_javascript()
60
 
61
  js_func = """
62
  function refresh() {
 
125
  create_changelog_tab()
126
 
127
  create_app_footer()
128
+
129
+ # Dump the English config for the localization
130
+ # ** JUST for developer
131
+ # localization.dump_english_config(gradio_hijack.all_components)
132
  return demo
modules/webui/changelog_tab.py CHANGED
@@ -10,4 +10,4 @@ def read_local_changelog():
10
 
11
  def create_changelog_tab():
12
  changelog_content = read_local_changelog()
13
- gr.Markdown(changelog_content)
 
10
 
11
  def create_changelog_tab():
12
  changelog_content = read_local_changelog()
13
+ gr.Markdown(changelog_content, elem_classes=["no-translate"])
modules/webui/css/style.css ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/style.css */
2
+
3
+ .loader-container {
4
+ display: flex; /* Use flex to align items horizontally */
5
+ align-items: center; /* Center items vertically within the container */
6
+ white-space: nowrap; /* Prevent line breaks within the container */
7
+ }
8
+
9
+ .loader {
10
+ border: 8px solid #f3f3f3; /* Light grey */
11
+ border-top: 8px solid #3498db; /* Blue */
12
+ border-radius: 50%;
13
+ width: 30px;
14
+ height: 30px;
15
+ animation: spin 2s linear infinite;
16
+ }
17
+
18
+ @keyframes spin {
19
+ 0% { transform: rotate(0deg); }
20
+ 100% { transform: rotate(360deg); }
21
+ }
22
+
23
+ /* Style the progress bar */
24
+ progress {
25
+ appearance: none; /* Remove default styling */
26
+ height: 20px; /* Set the height of the progress bar */
27
+ border-radius: 5px; /* Round the corners of the progress bar */
28
+ background-color: #f3f3f3; /* Light grey background */
29
+ width: 100%;
30
+ vertical-align: middle !important;
31
+ }
32
+
33
+ /* Style the progress bar container */
34
+ .progress-container {
35
+ margin-left: 20px;
36
+ margin-right: 20px;
37
+ flex-grow: 1; /* Allow the progress container to take up remaining space */
38
+ }
39
+
40
+ /* Set the color of the progress bar fill */
41
+ progress::-webkit-progress-value {
42
+ background-color: #3498db; /* Blue color for the fill */
43
+ }
44
+
45
+ progress::-moz-progress-bar {
46
+ background-color: #3498db; /* Blue color for the fill in Firefox */
47
+ }
48
+
49
+ /* Style the text on the progress bar */
50
+ progress::after {
51
+ content: attr(value '%'); /* Display the progress value followed by '%' */
52
+ position: absolute;
53
+ top: 50%;
54
+ left: 50%;
55
+ transform: translate(-50%, -50%);
56
+ color: white; /* Set text color */
57
+ font-size: 14px; /* Set font size */
58
+ }
59
+
60
+ /* Style other texts */
61
+ .loader-container > span {
62
+ margin-left: 5px; /* Add spacing between the progress bar and the text */
63
+ }
64
+
65
+ .progress-bar > .generating {
66
+ display: none !important;
67
+ }
68
+
69
+ .progress-bar{
70
+ height: 30px !important;
71
+ }
72
+
73
+ .progress-bar span {
74
+ text-align: right;
75
+ width: 215px;
76
+ }
77
+ div:has(> #positive_prompt) {
78
+ border: none;
79
+ }
80
+
81
+ #positive_prompt {
82
+ padding: 1px;
83
+ background: var(--background-fill-primary);
84
+ }
85
+
86
+ .type_row {
87
+ height: 84px !important;
88
+ }
89
+
90
+ .type_row_half {
91
+ height: 34px !important;
92
+ }
93
+
94
+ .refresh_button {
95
+ border: none !important;
96
+ background: none !important;
97
+ font-size: none !important;
98
+ box-shadow: none !important;
99
+ }
100
+
101
+ .advanced_check_row {
102
+ width: 250px !important;
103
+ }
104
+
105
+ .min_check {
106
+ min-width: min(1px, 100%) !important;
107
+ }
108
+
109
+ .resizable_area {
110
+ resize: vertical;
111
+ overflow: auto !important;
112
+ }
113
+
114
+ .performance_selection label {
115
+ width: 140px !important;
116
+ }
117
+
118
+ .aspect_ratios label {
119
+ flex: calc(50% - 5px) !important;
120
+ }
121
+
122
+ .aspect_ratios label span {
123
+ white-space: nowrap !important;
124
+ }
125
+
126
+ .aspect_ratios label input {
127
+ margin-left: -5px !important;
128
+ }
129
+
130
+ .lora_enable label {
131
+ height: 100%;
132
+ }
133
+
134
+ .lora_enable label input {
135
+ margin: auto;
136
+ }
137
+
138
+ .lora_enable label span {
139
+ display: none;
140
+ }
141
+
142
+ @-moz-document url-prefix() {
143
+ .lora_weight input[type=number] {
144
+ width: 80px;
145
+ }
146
+ }
147
+
148
+ #context-menu{
149
+ z-index:9999;
150
+ position:absolute;
151
+ display:block;
152
+ padding:0px 0;
153
+ border:2px solid #a55000;
154
+ border-radius:8px;
155
+ box-shadow:1px 1px 2px #CE6400;
156
+ width: 200px;
157
+ }
158
+
159
+ .context-menu-items{
160
+ list-style: none;
161
+ margin: 0;
162
+ padding: 0;
163
+ }
164
+
165
+ .context-menu-items a{
166
+ display:block;
167
+ padding:5px;
168
+ cursor:pointer;
169
+ }
170
+
171
+ .context-menu-items a:hover{
172
+ background: #a55000;
173
+ }
174
+
175
+ .canvas-tooltip-info {
176
+ position: absolute;
177
+ top: 28px;
178
+ left: 2px;
179
+ cursor: help;
180
+ background-color: rgba(0, 0, 0, 0.3);
181
+ width: 20px;
182
+ height: 20px;
183
+ border-radius: 50%;
184
+ display: flex;
185
+ align-items: center;
186
+ justify-content: center;
187
+ flex-direction: column;
188
+
189
+ z-index: 100;
190
+ }
191
+
192
+ .canvas-tooltip-info::after {
193
+ content: '';
194
+ display: block;
195
+ width: 2px;
196
+ height: 7px;
197
+ background-color: white;
198
+ margin-top: 2px;
199
+ }
200
+
201
+ .canvas-tooltip-info::before {
202
+ content: '';
203
+ display: block;
204
+ width: 2px;
205
+ height: 2px;
206
+ background-color: white;
207
+ }
208
+
209
+ .canvas-tooltip-content {
210
+ display: none;
211
+ background-color: #f9f9f9;
212
+ color: #333;
213
+ border: 1px solid #ddd;
214
+ padding: 15px;
215
+ position: absolute;
216
+ top: 40px;
217
+ left: 10px;
218
+ width: 250px;
219
+ font-size: 16px;
220
+ opacity: 0;
221
+ border-radius: 8px;
222
+ box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
223
+
224
+ z-index: 100;
225
+ }
226
+
227
+ .canvas-tooltip:hover .canvas-tooltip-content {
228
+ display: block;
229
+ animation: fadeIn 0.5s;
230
+ opacity: 1;
231
+ }
232
+
233
+ @keyframes fadeIn {
234
+ from {opacity: 0;}
235
+ to {opacity: 1;}
236
+ }
237
+
238
+ .styler {
239
+ overflow:inherit !important;
240
+ }
241
+
242
+ .gradio-container{
243
+ overflow: visible;
244
+ }
245
+
246
+ /* fullpage image viewer */
247
+
248
+ #lightboxModal{
249
+ display: none;
250
+ position: fixed;
251
+ z-index: 1001;
252
+ left: 0;
253
+ top: 0;
254
+ width: 100%;
255
+ height: 100%;
256
+ overflow: auto;
257
+ background-color: rgba(20, 20, 20, 0.95);
258
+ user-select: none;
259
+ -webkit-user-select: none;
260
+ flex-direction: column;
261
+ }
262
+
263
+ .modalControls {
264
+ display: flex;
265
+ position: absolute;
266
+ right: 0px;
267
+ left: 0px;
268
+ gap: 1em;
269
+ padding: 1em;
270
+ background-color:rgba(0,0,0,0);
271
+ z-index: 1;
272
+ transition: 0.2s ease background-color;
273
+ }
274
+ .modalControls:hover {
275
+ background-color:rgba(0,0,0,0.9);
276
+ }
277
+ .modalClose {
278
+ margin-left: auto;
279
+ }
280
+ .modalControls span{
281
+ color: white;
282
+ text-shadow: 0px 0px 0.25em black;
283
+ font-size: 35px;
284
+ font-weight: bold;
285
+ cursor: pointer;
286
+ width: 1em;
287
+ }
288
+
289
+ .modalControls span:hover, .modalControls span:focus{
290
+ color: #999;
291
+ text-decoration: none;
292
+ }
293
+
294
+ #lightboxModal > img {
295
+ display: block;
296
+ margin: auto;
297
+ width: auto;
298
+ }
299
+
300
+ #lightboxModal > img.modalImageFullscreen{
301
+ object-fit: contain;
302
+ height: 100%;
303
+ width: 100%;
304
+ min-height: 0;
305
+ }
306
+
307
+ .modalPrev,
308
+ .modalNext {
309
+ cursor: pointer;
310
+ position: absolute;
311
+ top: 50%;
312
+ width: auto;
313
+ padding: 16px;
314
+ margin-top: -50px;
315
+ color: white;
316
+ font-weight: bold;
317
+ font-size: 20px;
318
+ transition: 0.6s ease;
319
+ border-radius: 0 3px 3px 0;
320
+ user-select: none;
321
+ -webkit-user-select: none;
322
+ }
323
+
324
+ .modalNext {
325
+ right: 0;
326
+ border-radius: 3px 0 0 3px;
327
+ }
328
+
329
+ .modalPrev:hover,
330
+ .modalNext:hover {
331
+ background-color: rgba(0, 0, 0, 0.8);
332
+ }
333
+
334
+ #imageARPreview {
335
+ position: absolute;
336
+ top: 0px;
337
+ left: 0px;
338
+ border: 2px solid red;
339
+ background: rgba(255, 0, 0, 0.3);
340
+ z-index: 900;
341
+ pointer-events: none;
342
+ display: none;
343
+ }
344
+
345
+ #stylePreviewOverlay {
346
+ opacity: 0;
347
+ pointer-events: none;
348
+ width: 128px;
349
+ height: 128px;
350
+ position: fixed;
351
+ top: 0px;
352
+ left: 0px;
353
+ border: solid 1px lightgrey;
354
+ transform: translate(-140px, 20px);
355
+ background-size: cover;
356
+ background-position: center;
357
+ background-color: rgba(0, 0, 0, 0.3);
358
+ border-radius: 5px;
359
+ z-index: 100;
360
+ transition: transform 0.1s ease, opacity 0.3s ease;
361
+ }
362
+
363
+ #stylePreviewOverlay.lower-half {
364
+ transform: translate(-140px, -140px);
365
+ }
366
+
367
+ /* scrollable box for style selections */
368
+ .contain .tabs {
369
+ height: 100%;
370
+ }
371
+
372
+ .contain .tabs .tabitem.style_selections_tab {
373
+ height: 100%;
374
+ }
375
+
376
+ .contain .tabs .tabitem.style_selections_tab > div:first-child {
377
+ height: 100%;
378
+ }
379
+
380
+ .contain .tabs .tabitem.style_selections_tab .style_selections {
381
+ min-height: 200px;
382
+ height: 100%;
383
+ }
384
+
385
+ .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] {
386
+ position: absolute; /* remove this to disable scrolling within the checkbox-group */
387
+ overflow: auto;
388
+ padding-right: 2px;
389
+ max-height: 100%;
390
+ }
391
+
392
+ .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] label {
393
+ /* max-width: calc(35% - 15px) !important; */ /* add this to enable 3 columns layout */
394
+ flex: calc(50% - 5px) !important;
395
+ }
396
+
397
+ .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] label span {
398
+ /* white-space:nowrap; */ /* add this to disable text wrapping (better choice for 3 columns layout) */
399
+ overflow: hidden;
400
+ text-overflow: ellipsis;
401
+ }
402
+
403
+ /* styles preview tooltip */
404
+ .preview-tooltip {
405
+ background-color: #fff8;
406
+ font-family: monospace;
407
+ text-align: center;
408
+ border-radius: 5px 5px 0px 0px;
409
+ display: none; /* remove this to enable tooltip in preview image */
410
+ }
411
+
412
+ #inpaint_canvas .canvas-tooltip-info {
413
+ top: 2px;
414
+ }
415
+
416
+ #inpaint_brush_color input[type=color]{
417
+ background: none;
418
+ }
modules/webui/gradio_extensions.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/modules/ui_gradio_extensions.py
2
+
3
+ import os
4
+ from pathlib import Path
5
+ import gradio as gr
6
+
7
+ from modules import config
8
+ from .localization import localization_js
9
+
10
+
11
+ GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
12
+
13
+ WEBUI_DIR_PATH = Path(os.path.dirname(os.path.realpath(__file__)))
14
+
15
+
16
+ def read_file(fp):
17
+ with open(WEBUI_DIR_PATH / fp, "r") as f:
18
+ return f.read()
19
+
20
+
21
+ def javascript_html():
22
+ def s(text: str):
23
+ return f'<script type="text/javascript">{text}</script>\n'
24
+
25
+ def src(src: str):
26
+ return f"<script src='{src}'></script>\n"
27
+
28
+ def sf(fp: str):
29
+ return s(read_file(fp))
30
+
31
+ head = ""
32
+ head += src("https://jsd.onmicrosoft.cn/npm/marked@12.0.2")
33
+ head += s(localization_js(config.runtime_env_vars.language))
34
+ head += sf("js/index.js")
35
+ head += sf("js/localization.js")
36
+
37
+ if config.runtime_env_vars.theme:
38
+ head += s(f"set_theme('{config.runtime_env_vars.theme}');")
39
+
40
+ return head
41
+
42
+
43
+ def css_html():
44
+ head = f'<style>{read_file("css/style.css")}</style>'
45
+ return head
46
+
47
+
48
+ def reload_javascript():
49
+ js = javascript_html()
50
+ css = css_html()
51
+
52
+ def template_response(*args, **kwargs):
53
+ res = GradioTemplateResponseOriginal(*args, **kwargs)
54
+ res.body = res.body.replace(b"</head>", f"{js}</head>".encode("utf8"))
55
+ res.body = res.body.replace(b"</body>", f"{css}</body>".encode("utf8"))
56
+ res.init_headers()
57
+ return res
58
+
59
+ gr.routes.templates.TemplateResponse = template_response
modules/webui/gradio_hijack.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio.components.base import Block
2
+
3
+ all_components = []
4
+
5
+ if not hasattr(Block, "original__init__"):
6
+ Block.original_init = Block.__init__
7
+
8
+
9
+ def blk_ini(self, *args, **kwargs):
10
+ all_components.append(self)
11
+ return Block.original_init(self, *args, **kwargs)
12
+
13
+
14
+ Block.__init__ = blk_ini
modules/webui/js/index.js ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/script.js
2
+ function gradioApp() {
3
+ const elems = document.getElementsByTagName("gradio-app");
4
+ const elem = elems.length == 0 ? document : elems[0];
5
+
6
+ if (elem !== document) {
7
+ elem.getElementById = function (id) {
8
+ return document.getElementById(id);
9
+ };
10
+ }
11
+ return elem.shadowRoot ? elem.shadowRoot : elem;
12
+ }
13
+
14
+ /**
15
+ * Get the currently selected top-level UI tab button (e.g. the button that says "Extras").
16
+ */
17
+ function get_uiCurrentTab() {
18
+ return gradioApp().querySelector("#tabs > .tab-nav > button.selected");
19
+ }
20
+
21
+ /**
22
+ * Get the first currently visible top-level UI tab content (e.g. the div hosting the "txt2img" UI).
23
+ */
24
+ function get_uiCurrentTabContent() {
25
+ return gradioApp().querySelector(
26
+ '#tabs > .tabitem[id^=tab_]:not([style*="display: none"])'
27
+ );
28
+ }
29
+
30
+ var uiUpdateCallbacks = [];
31
+ var uiAfterUpdateCallbacks = [];
32
+ var uiLoadedCallbacks = [];
33
+ var uiTabChangeCallbacks = [];
34
+ var optionsChangedCallbacks = [];
35
+ var uiAfterUpdateTimeout = null;
36
+ var uiCurrentTab = null;
37
+
38
+ /**
39
+ * Register callback to be called at each UI update.
40
+ * The callback receives an array of MutationRecords as an argument.
41
+ */
42
+ function onUiUpdate(callback) {
43
+ uiUpdateCallbacks.push(callback);
44
+ }
45
+
46
+ /**
47
+ * Register callback to be called soon after UI updates.
48
+ * The callback receives no arguments.
49
+ *
50
+ * This is preferred over `onUiUpdate` if you don't need
51
+ * access to the MutationRecords, as your function will
52
+ * not be called quite as often.
53
+ */
54
+ function onAfterUiUpdate(callback) {
55
+ uiAfterUpdateCallbacks.push(callback);
56
+ }
57
+
58
+ /**
59
+ * Register callback to be called when the UI is loaded.
60
+ * The callback receives no arguments.
61
+ */
62
+ function onUiLoaded(callback) {
63
+ uiLoadedCallbacks.push(callback);
64
+ }
65
+
66
+ /**
67
+ * Register callback to be called when the UI tab is changed.
68
+ * The callback receives no arguments.
69
+ */
70
+ function onUiTabChange(callback) {
71
+ uiTabChangeCallbacks.push(callback);
72
+ }
73
+
74
+ /**
75
+ * Register callback to be called when the options are changed.
76
+ * The callback receives no arguments.
77
+ * @param callback
78
+ */
79
+ function onOptionsChanged(callback) {
80
+ optionsChangedCallbacks.push(callback);
81
+ }
82
+
83
+ function executeCallbacks(queue, arg) {
84
+ for (const callback of queue) {
85
+ try {
86
+ callback(arg);
87
+ } catch (e) {
88
+ console.error("error running callback", callback, ":", e);
89
+ }
90
+ }
91
+ }
92
+
93
+ /**
94
+ * Schedule the execution of the callbacks registered with onAfterUiUpdate.
95
+ * The callbacks are executed after a short while, unless another call to this function
96
+ * is made before that time. IOW, the callbacks are executed only once, even
97
+ * when there are multiple mutations observed.
98
+ */
99
+ function scheduleAfterUiUpdateCallbacks() {
100
+ clearTimeout(uiAfterUpdateTimeout);
101
+ uiAfterUpdateTimeout = setTimeout(function () {
102
+ executeCallbacks(uiAfterUpdateCallbacks);
103
+ }, 200);
104
+ }
105
+
106
+ var executedOnLoaded = false;
107
+
108
+ document.addEventListener("DOMContentLoaded", function () {
109
+ var mutationObserver = new MutationObserver(function (m) {
110
+ if (!executedOnLoaded && gradioApp().querySelector("#generate_button")) {
111
+ executedOnLoaded = true;
112
+ executeCallbacks(uiLoadedCallbacks);
113
+ }
114
+
115
+ executeCallbacks(uiUpdateCallbacks, m);
116
+ scheduleAfterUiUpdateCallbacks();
117
+ const newTab = get_uiCurrentTab();
118
+ if (newTab && newTab !== uiCurrentTab) {
119
+ uiCurrentTab = newTab;
120
+ executeCallbacks(uiTabChangeCallbacks);
121
+ }
122
+ });
123
+ mutationObserver.observe(gradioApp(), { childList: true, subtree: true });
124
+ });
125
+
126
+ var onAppend = function (elem, f) {
127
+ var observer = new MutationObserver(function (mutations) {
128
+ mutations.forEach(function (m) {
129
+ if (m.addedNodes.length) {
130
+ f(m.addedNodes);
131
+ }
132
+ });
133
+ });
134
+ observer.observe(elem, { childList: true });
135
+ };
136
+
137
+ function addObserverIfDesiredNodeAvailable(querySelector, callback) {
138
+ var elem = document.querySelector(querySelector);
139
+ if (!elem) {
140
+ window.setTimeout(
141
+ () => addObserverIfDesiredNodeAvailable(querySelector, callback),
142
+ 1000
143
+ );
144
+ return;
145
+ }
146
+
147
+ onAppend(elem, callback);
148
+ }
149
+
150
+ /**
151
+ * Show reset button on toast "Connection errored out."
152
+ */
153
+ addObserverIfDesiredNodeAvailable(".toast-wrap", function (added) {
154
+ added.forEach(function (element) {
155
+ if (element.innerText.includes("Connection errored out.")) {
156
+ window.setTimeout(function () {
157
+ document.getElementById("reset_button").classList.remove("hidden");
158
+ document.getElementById("generate_button").classList.add("hidden");
159
+ document.getElementById("skip_button").classList.add("hidden");
160
+ document.getElementById("stop_button").classList.add("hidden");
161
+ });
162
+ }
163
+ });
164
+ });
165
+
166
+ /**
167
+ * Add a ctrl+enter as a shortcut to start a generation
168
+ */
169
+ document.addEventListener("keydown", function (e) {
170
+ const isModifierKey = e.metaKey || e.ctrlKey || e.altKey;
171
+ const isEnterKey = e.key == "Enter" || e.keyCode == 13;
172
+
173
+ if (isModifierKey && isEnterKey) {
174
+ const generateButton = gradioApp().querySelector(
175
+ "button:not(.hidden)[id=generate_button]"
176
+ );
177
+ if (generateButton) {
178
+ generateButton.click();
179
+ e.preventDefault();
180
+ return;
181
+ }
182
+
183
+ const stopButton = gradioApp().querySelector(
184
+ "button:not(.hidden)[id=stop_button]"
185
+ );
186
+ if (stopButton) {
187
+ stopButton.click();
188
+ e.preventDefault();
189
+ return;
190
+ }
191
+ }
192
+ });
193
+
194
+ /**
195
+ * checks that a UI element is not in another hidden element or tab content
196
+ */
197
+ function uiElementIsVisible(el) {
198
+ if (el === document) {
199
+ return true;
200
+ }
201
+
202
+ const computedStyle = getComputedStyle(el);
203
+ const isVisible = computedStyle.display !== "none";
204
+
205
+ if (!isVisible) return false;
206
+ return uiElementIsVisible(el.parentNode);
207
+ }
208
+
209
+ function uiElementInSight(el) {
210
+ const clRect = el.getBoundingClientRect();
211
+ const windowHeight = window.innerHeight;
212
+ const isOnScreen = clRect.bottom > 0 && clRect.top < windowHeight;
213
+
214
+ return isOnScreen;
215
+ }
216
+
217
+ function playNotification() {
218
+ gradioApp().querySelector("#audio_notification audio")?.play();
219
+ }
220
+
221
+ function set_theme(theme) {
222
+ var gradioURL = window.location.href;
223
+ if (!gradioURL.includes("?__theme=")) {
224
+ window.location.replace(gradioURL + "?__theme=" + theme);
225
+ }
226
+ }
227
+
228
+ function htmlDecode(input) {
229
+ var doc = new DOMParser().parseFromString(input, "text/html");
230
+ return doc.documentElement.textContent;
231
+ }
modules/webui/js/localization.js ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ var re_num = /^[.\d]+$/;
2
+
3
+ var original_lines = {};
4
+ var translated_lines = {};
5
+
6
+ function hasLocalization() {
7
+ return window.localization && Object.keys(window.localization).length > 0;
8
+ }
9
+
10
+ function textNodesUnder(el) {
11
+ var n,
12
+ a = [],
13
+ walk = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null, false);
14
+ while ((n = walk.nextNode())) a.push(n);
15
+ return a;
16
+ }
17
+
18
+ function canBeTranslated(node, text) {
19
+ if (!text) return false;
20
+ if (!node.parentElement) return false;
21
+ var parentType = node.parentElement.nodeName;
22
+ if (
23
+ parentType == "SCRIPT" ||
24
+ parentType == "STYLE" ||
25
+ parentType == "TEXTAREA"
26
+ )
27
+ return false;
28
+ if (re_num.test(text)) return false;
29
+ return true;
30
+ }
31
+
32
+ function getTranslation(text) {
33
+ if (!text) return undefined;
34
+
35
+ if (translated_lines[text] === undefined) {
36
+ original_lines[text] = 1;
37
+ }
38
+
39
+ var tl = localization[text];
40
+ if (tl !== undefined) {
41
+ translated_lines[tl] = 1;
42
+ }
43
+
44
+ return tl;
45
+ }
46
+
47
+ function processTextNode(node) {
48
+ var text = node.textContent.trim();
49
+
50
+ if (!canBeTranslated(node, text)) return;
51
+
52
+ var tl = getTranslation(text);
53
+ if (tl !== undefined) {
54
+ node.textContent = tl;
55
+ if (text && node.parentElement) {
56
+ node.parentElement.setAttribute("data-original-text", text);
57
+ }
58
+ }
59
+ }
60
+
61
+ /**
62
+ *
63
+ * @param {HTMLElement} node
64
+ * @returns
65
+ */
66
+ function processMDNode(node) {
67
+ const text = node.children[0].textContent.trim();
68
+ let tl = getTranslation(text);
69
+
70
+ if (!tl) return;
71
+ if (Array.isArray(tl)) {
72
+ tl = tl.join("\n");
73
+ }
74
+ const md = marked.marked(tl);
75
+ node.innerHTML = md;
76
+
77
+ node.setAttribute("data-original-text", text);
78
+ }
79
+
80
+ function is_md_child(node) {
81
+ while (node.parentElement !== document.body) {
82
+ if (node?.classList?.contains("md")) {
83
+ return true;
84
+ }
85
+ node = node.parentElement;
86
+ if (!node) break;
87
+ }
88
+ return false;
89
+ }
90
+
91
+ function processNode(node) {
92
+ if (node.nodeType == 3) {
93
+ processTextNode(node);
94
+ return;
95
+ }
96
+ if (node.classList.contains("md")) {
97
+ processMDNode(node);
98
+ return;
99
+ }
100
+ if (is_md_child(node)) return;
101
+
102
+ if (node.title) {
103
+ let tl = getTranslation(node.title);
104
+ if (tl !== undefined) {
105
+ node.title = tl;
106
+ }
107
+ }
108
+
109
+ if (node.placeholder) {
110
+ let tl = getTranslation(node.placeholder);
111
+ if (tl !== undefined) {
112
+ node.placeholder = tl;
113
+ }
114
+ }
115
+
116
+ textNodesUnder(node).forEach(function (node) {
117
+ if (is_md_child(node)) return;
118
+ processTextNode(node);
119
+ });
120
+ }
121
+
122
+ function refresh_style_localization() {
123
+ processNode(document.querySelector(".style_selections"));
124
+ }
125
+
126
+ function refresh_aspect_ratios_label(value) {
127
+ label = document.querySelector("#aspect_ratios_accordion div span");
128
+ translation = getTranslation("Aspect Ratios");
129
+ if (typeof translation == "undefined") {
130
+ translation = "Aspect Ratios";
131
+ }
132
+ label.textContent = translation + " " + htmlDecode(value);
133
+ }
134
+
135
+ function localizeWholePage() {
136
+ processNode(gradioApp());
137
+
138
+ function elem(comp) {
139
+ var elem_id = comp.props.elem_id
140
+ ? comp.props.elem_id
141
+ : "component-" + comp.id;
142
+ return gradioApp().getElementById(elem_id);
143
+ }
144
+
145
+ for (var comp of window.gradio_config.components) {
146
+ if (comp.props.webui_tooltip) {
147
+ let e = elem(comp);
148
+
149
+ let tl = e ? getTranslation(e.title) : undefined;
150
+ if (tl !== undefined) {
151
+ e.title = tl;
152
+ }
153
+ }
154
+ if (comp.props.placeholder) {
155
+ let e = elem(comp);
156
+ let textbox = e ? e.querySelector("[placeholder]") : null;
157
+
158
+ let tl = textbox ? getTranslation(textbox.placeholder) : undefined;
159
+ if (tl !== undefined) {
160
+ textbox.placeholder = tl;
161
+ }
162
+ }
163
+ }
164
+ }
165
+
166
+ document.addEventListener("DOMContentLoaded", function () {
167
+ if (!hasLocalization()) {
168
+ return;
169
+ }
170
+
171
+ onUiUpdate(function (m) {
172
+ m.forEach(function (mutation) {
173
+ mutation.addedNodes.forEach(function (node) {
174
+ processNode(node);
175
+ });
176
+ });
177
+ });
178
+
179
+ localizeWholePage();
180
+
181
+ if (localization.rtl) {
182
+ // if the language is from right to left,
183
+ new MutationObserver((mutations, observer) => {
184
+ // wait for the style to load
185
+ mutations.forEach((mutation) => {
186
+ mutation.addedNodes.forEach((node) => {
187
+ if (node.tagName === "STYLE") {
188
+ observer.disconnect();
189
+
190
+ for (const x of node.sheet.rules) {
191
+ // find all rtl media rules
192
+ if (Array.from(x.media || []).includes("rtl")) {
193
+ x.media.appendMedium("all"); // enable them
194
+ }
195
+ }
196
+ }
197
+ });
198
+ });
199
+ }).observe(gradioApp(), { childList: true });
200
+ }
201
+ });
modules/webui/localization.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import gradio as gr
4
+
5
+
6
+ current_translation = {}
7
+ localization_root = os.path.join(
8
+ os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "language"
9
+ )
10
+
11
+
12
+ def localization_js(filename):
13
+ global current_translation
14
+
15
+ if isinstance(filename, str):
16
+ full_name = os.path.abspath(os.path.join(localization_root, filename + ".json"))
17
+ if os.path.exists(full_name):
18
+ try:
19
+ with open(full_name, encoding="utf-8") as f:
20
+ current_translation = json.load(f)
21
+ assert isinstance(current_translation, dict)
22
+ for k, v in current_translation.items():
23
+ assert isinstance(k, str), f"Key is not a string, got {k}"
24
+ assert isinstance(v, str) or isinstance(
25
+ v, list
26
+ ), f"Value for key {k} is not a string or list"
27
+ except Exception as e:
28
+ print(str(e))
29
+ print(f"Failed to load localization file {full_name}")
30
+
31
+ # current_translation = {k: 'XXX' for k in current_translation.keys()} # use this to see if all texts are covered
32
+
33
+ return f"window.localization = {json.dumps(current_translation)}"
34
+
35
+
36
+ def dump_english_config(components):
37
+ all_texts = []
38
+ for c in components:
39
+ if isinstance(c, gr.Markdown) and "no-translate" in c.elem_classes:
40
+ continue
41
+ if isinstance(c, gr.Dropdown):
42
+ continue
43
+ if isinstance(c, gr.HTML):
44
+ continue
45
+ if isinstance(c, gr.Textbox):
46
+ continue
47
+
48
+ label = getattr(c, "label", None)
49
+ value = getattr(c, "value", None)
50
+ choices = getattr(c, "choices", None)
51
+ info = getattr(c, "info", None)
52
+
53
+ if isinstance(label, str):
54
+ all_texts.append(label)
55
+ if isinstance(value, str):
56
+ all_texts.append(value)
57
+ if isinstance(info, str):
58
+ all_texts.append(info)
59
+ if isinstance(choices, list):
60
+ for x in choices:
61
+ if isinstance(x, str):
62
+ all_texts.append(x)
63
+ if isinstance(x, tuple):
64
+ for y in x:
65
+ if isinstance(y, str):
66
+ all_texts.append(y)
67
+
68
+ config_dict = {k: k for k in all_texts if k != "" and "progress-container" not in k}
69
+ full_name = os.path.abspath(os.path.join(localization_root, "en.json"))
70
+
71
+ with open(full_name, "w", encoding="utf-8") as json_file:
72
+ json.dump(config_dict, json_file, indent=4, ensure_ascii=False)
73
+
74
+ return
modules/webui/localization_runtime.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class LocalizationVars:
2
+ def __init__(self):
3
+ self.DEFAULT_TTS_TEXT = ""
4
+ self.DEFAULT_SPEAKER_TEST_TEXT = ""
5
+ self.DEFAULT_SPEAKER_MERAGE_TEXT = ""
6
+ self.DEFAULT_SSML_TEXT = ""
7
+
8
+ self.ssml_examples = []
9
+ self.tts_examples = []
10
+
11
+
12
+ class ZHLocalizationVars(LocalizationVars):
13
+ def __init__(self):
14
+ super().__init__()
15
+ self.DEFAULT_TTS_TEXT = "chat T T S 是一款强大的对话式文本转语音模型。它有中英混读和多说话人的能力。"
16
+ self.DEFAULT_SPEAKER_TEST_TEXT = (
17
+ "说话人测试 123456789 [uv_break] ok, test done [lbreak]"
18
+ )
19
+ self.DEFAULT_SPEAKER_MERAGE_TEXT = (
20
+ "说话人合并测试 123456789 [uv_break] ok, test done [lbreak]"
21
+ )
22
+ self.DEFAULT_SSML_TEXT = """
23
+ <speak version="0.1">
24
+ <voice spk="Bob" seed="42" style="narration-relaxed">
25
+ 这里是一个简单的 SSML 示例 [lbreak]
26
+ </voice>
27
+ </speak>
28
+ """.strip()
29
+
30
+ self.ssml_examples = [
31
+ """
32
+ <speak version="0.1">
33
+ <voice spk="Bob" seed="42" style="narration-relaxed">
34
+ 下面是一个 ChatTTS 用于合成多角色多情感的有声书示例[lbreak]
35
+ </voice>
36
+ <voice spk="Bob" seed="42" style="narration-relaxed">
37
+ 黛玉冷笑道:[lbreak]
38
+ </voice>
39
+ <voice spk="female2" seed="42" style="angry">
40
+ 我说呢 [uv_break] ,亏了绊住,不然,早就飞起来了[lbreak]
41
+ </voice>
42
+ <voice spk="Bob" seed="42" style="narration-relaxed">
43
+ 宝玉道:[lbreak]
44
+ </voice>
45
+ <voice spk="Alice" seed="42" style="unfriendly">
46
+ “只许和你玩 [uv_break] ,替你解闷。不过偶然到他那里,就说这些闲话。”[lbreak]
47
+ </voice>
48
+ <voice spk="female2" seed="42" style="angry">
49
+ “好没意思的话![uv_break] 去不去,关我什么事儿? 又没叫你替我解闷儿 [uv_break],还许你不理我呢” [lbreak]
50
+ </voice>
51
+ <voice spk="Bob" seed="42" style="narration-relaxed">
52
+ 说着,便赌气回房去了 [lbreak]
53
+ </voice>
54
+ </speak>
55
+ """,
56
+ """
57
+ <speak version="0.1">
58
+ <voice spk="Bob" seed="42" style="narration-relaxed">
59
+ 使用 prosody 控制生成文本的语速语调和音量,示例如下 [lbreak]
60
+
61
+ <prosody>
62
+ 无任何限制将会继承父级voice配置进行生成 [lbreak]
63
+ </prosody>
64
+ <prosody rate="1.5">
65
+ 设置 rate 大于1表示加速,小于1为减速 [lbreak]
66
+ </prosody>
67
+ <prosody pitch="6">
68
+ 设置 pitch 调整音调,设置为6表示提高6个半音 [lbreak]
69
+ </prosody>
70
+ <prosody volume="2">
71
+ 设置 volume 调整音量,设置为2表示提高2个分贝 [lbreak]
72
+ </prosody>
73
+
74
+ 在 voice 中无prosody包裹的文本即为默认生成状态下的语音 [lbreak]
75
+ </voice>
76
+ </speak>
77
+ """,
78
+ """
79
+ <speak version="0.1">
80
+ <voice spk="Bob" seed="42" style="narration-relaxed">
81
+ 使用 break 标签将会简单的 [lbreak]
82
+
83
+ <break time="500" />
84
+
85
+ 插入一段空白到生成结果中 [lbreak]
86
+ </voice>
87
+ </speak>
88
+ """,
89
+ ]
90
+
91
+ self.tts_examples = [
92
+ {
93
+ "text": "大🍌,一条大🍌,嘿,你的感觉真的很奇妙 [lbreak]",
94
+ },
95
+ {
96
+ "text": "Big 🍌, a big 🍌, hey, your feeling is really wonderful [lbreak]"
97
+ },
98
+ {
99
+ "text": """
100
+ # 这是 markdown 标题
101
+
102
+ ```
103
+ 代码块将跳过
104
+ ```
105
+
106
+ - **文本标准化**:
107
+ - **Markdown**: 自动检测处理 markdown 格式文本。
108
+ - **数字转写**: 自动将数字转为模型可识别的文本。
109
+ - **Emoji 适配**: 自动翻译 emoji 为可读文本。
110
+ - **基于分词器**: 基于 tokenizer 预处理文本,覆盖模型所有不支持字符范围。
111
+ - **中英文识别**: 适配英文环境。
112
+ """
113
+ },
114
+ {
115
+ "text": "天气预报显示,今天会有小雨,请大家出门时记得带伞。降温的天气也提醒我们要适时添衣保暖 [lbreak]",
116
+ },
117
+ {
118
+ "text": "公司的年度总结会议将在下周三举行,请各部门提前准备好相关材料,确保会议顺利进行 [lbreak]",
119
+ },
120
+ {
121
+ "text": "今天的午餐菜单包括烤鸡、沙拉和蔬菜汤,大家可以根据自己的口味选择适合的菜品 [lbreak]",
122
+ },
123
+ {
124
+ "text": "请注意,电梯将在下午两点进行例行维护,预计需要一个小时的时间,请大家在此期间使用楼梯 [lbreak]",
125
+ },
126
+ {
127
+ "text": "图书馆新到了一批书籍,涵盖了文学、科学和历史等多个领域,欢迎大家前来借阅 [lbreak]",
128
+ },
129
+ {
130
+ "text": "电影中梁朝伟扮演的陈永仁的编号27149 [lbreak]",
131
+ },
132
+ {
133
+ "text": "这块黄金重达324.75克 [lbreak]",
134
+ },
135
+ {
136
+ "text": "我们班的最高总分为583分 [lbreak]",
137
+ },
138
+ {
139
+ "text": "12~23 [lbreak]",
140
+ },
141
+ {
142
+ "text": "-1.5~2 [lbreak]",
143
+ },
144
+ {
145
+ "text": "她出生于86年8月18日,她弟弟出生于1995年3月1日 [lbreak]",
146
+ },
147
+ {
148
+ "text": "等会请在12:05请通知我 [lbreak]",
149
+ },
150
+ {
151
+ "text": "今天的最低气温达到-10°C [lbreak]",
152
+ },
153
+ {
154
+ "text": "现场有7/12的观众投出了赞成票 [lbreak]",
155
+ },
156
+ {
157
+ "text": "明天有62%的概率降雨 [lbreak]",
158
+ },
159
+ {
160
+ "text": "随便来几个价格12块5,34.5元,20.1万 [lbreak]",
161
+ },
162
+ {
163
+ "text": "这是固话0421-33441122 [lbreak]",
164
+ },
165
+ {
166
+ "text": "这是手机+86 18544139121 [lbreak]",
167
+ },
168
+ ]
169
+
170
+
171
+ class ENLocalizationVars(LocalizationVars):
172
+ def __init__(self):
173
+ super().__init__()
174
+ self.DEFAULT_TTS_TEXT = "Chat T T S is a powerful conversational text-to-speech model. It has the ability to mix Chinese and English and multiple speakers."
175
+ self.DEFAULT_SPEAKER_TEST_TEXT = (
176
+ "Speaker test 123456789 [uv_break] ok, test done [lbreak]"
177
+ )
178
+ self.DEFAULT_SPEAKER_MERAGE_TEXT = (
179
+ "Speaker merge test 123456789 [uv_break] ok, test done [lbreak]"
180
+ )
181
+ self.DEFAULT_SSML_TEXT = """
182
+ <speak version="0.1">
183
+ <voice spk="Bob" seed="42" style="narration-relaxed">
184
+ Here is a simple SSML example [lbreak]
185
+ </voice>
186
+ </speak>
187
+ """.strip()
188
+
189
+ self.ssml_examples = [
190
+ """
191
+ <speak version="0.1">
192
+ <voice spk="Bob" seed="42" style="narration-relaxed">
193
+ Below is an example of ChatTTS synthesizing an audiobook with multiple roles and emotions [lbreak]
194
+ </voice>
195
+ <voice spk="Bob" seed="42" style="narration-relaxed">
196
+ Daiyu sneered: [lbreak]
197
+ </voice>
198
+ <voice spk="female2" seed="42" style="angry">
199
+ I said [uv_break], it's a loss to trip, otherwise, I would have flown up long ago [lbreak]
200
+ </voice>
201
+ <voice spk="Bob" seed="42" style="narration-relaxed">
202
+ Bao Yu said: [lbreak]
203
+ </voice>
204
+ <voice spk="Alice" seed="42" style="unfriendly">
205
+ "Only play with you [uv_break], to relieve your boredom. But occasionally go to his place, just say these idle words." [lbreak]
206
+ </voice>
207
+ <voice spk="female2" seed="42" style="angry">
208
+ "What a boring thing! [uv_break] Go or not, it's none of my business? I didn't ask you to relieve my boredom [uv_break], and you don't even care about me." [lbreak]
209
+ </voice>
210
+ <voice spk="Bob" seed="42" style="narration-relaxed">
211
+ Saying that, he went back to the room in anger [lbreak]
212
+ </voice>
213
+ </speak>""",
214
+ ]
215
+
216
+ self.tts_examples = [
217
+ {
218
+ "text": "I guess it comes down a simple choice. Get busy living or get busy dying.",
219
+ },
220
+ {
221
+ "text": "You got a dream, you gotta protect it. People can't do something themselves, they wanna tell you you can't do it. If you want something, go get it.",
222
+ },
223
+ {
224
+ "text": "Don't ever let somebody tell you you can't do something. Not even me. Alright? You got a dream, you gotta protect it. When people can't do something themselves, they're gonna tell you that you can't do it. You want something, go get it. Period.",
225
+ },
226
+ ]
modules/webui/readme_tab.py CHANGED
@@ -10,4 +10,4 @@ def read_local_readme():
10
 
11
  def create_readme_tab():
12
  readme_content = read_local_readme()
13
- gr.Markdown(readme_content)
 
10
 
11
  def create_readme_tab():
12
  readme_content = read_local_readme()
13
+ gr.Markdown(readme_content, elem_classes=["no-translate"])
modules/webui/speaker/speaker_creator.py CHANGED
@@ -5,6 +5,7 @@ from modules.utils.SeedContext import SeedContext
5
  from modules.hf import spaces
6
  from modules.models import load_chat_tts
7
  from modules.utils.rng import np_rng
 
8
  from modules.webui.webui_utils import get_speakers, tts_generate
9
 
10
  import tempfile
@@ -88,16 +89,6 @@ def random_speaker():
88
  return seed, name
89
 
90
 
91
- creator_ui_desc = """
92
- ## Speaker Creator
93
- 使用本面板快捷抽卡生成 speaker.pt 文件。
94
-
95
- 1. **生成说话人**:输入种子、名字、性别和描述。点击 "Generate speaker.pt" 按钮,生成的说话人配置会保存为.pt文件。
96
- 2. **测试说话人声音**:输入测试文本。点击 "Test Voice" 按钮,生成的音频会在 "Output Audio" 中播放。
97
- 3. **随机生成说话人**:点击 "Random Speaker" 按钮,随机生成一个种子和名字,可以进一步编辑其他信息并测试。
98
- """
99
-
100
-
101
  def speaker_creator_ui():
102
  def on_generate(seed, name, gender, desc):
103
  file_path = create_spk_from_seed(seed, name, gender, desc)
@@ -113,7 +104,7 @@ def speaker_creator_ui():
113
  test_text = gr.Textbox(
114
  label="Test Text",
115
  placeholder="Please input test text",
116
- value="说话人测试 123456789 [uv_break] ok, test done [lbreak]",
117
  )
118
  with gr.Row():
119
  current_seed = gr.Label(label="Current Seed", value=-1)
@@ -131,7 +122,7 @@ def speaker_creator_ui():
131
  outputs=[current_seed],
132
  )
133
 
134
- gr.Markdown(creator_ui_desc)
135
 
136
  with gr.Row():
137
  with gr.Column(scale=2):
 
5
  from modules.hf import spaces
6
  from modules.models import load_chat_tts
7
  from modules.utils.rng import np_rng
8
+ from modules.webui import webui_config
9
  from modules.webui.webui_utils import get_speakers, tts_generate
10
 
11
  import tempfile
 
89
  return seed, name
90
 
91
 
 
 
 
 
 
 
 
 
 
 
92
  def speaker_creator_ui():
93
  def on_generate(seed, name, gender, desc):
94
  file_path = create_spk_from_seed(seed, name, gender, desc)
 
104
  test_text = gr.Textbox(
105
  label="Test Text",
106
  placeholder="Please input test text",
107
+ value=webui_config.localization.DEFAULT_SPEAKER_TEST_TEXT,
108
  )
109
  with gr.Row():
110
  current_seed = gr.Label(label="Current Seed", value=-1)
 
122
  outputs=[current_seed],
123
  )
124
 
125
+ gr.Markdown("SPEAKER_CREATOR_GUIDE")
126
 
127
  with gr.Row():
128
  with gr.Column(scale=2):
modules/webui/speaker/speaker_merger.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import torch
4
 
5
  from modules.hf import spaces
6
- from modules.webui import webui_utils
7
  from modules.webui.webui_utils import get_speakers, tts_generate
8
  from modules.speaker import speaker_mgr, Speaker
9
 
@@ -128,17 +128,6 @@ def merge_spk_to_file(
128
  return tmp_file_path
129
 
130
 
131
- merge_desc = """
132
- ## Speaker Merger
133
-
134
- 在本面板中,您可以选择多个说话人并指定他们的权重,合成新的语音并进行测试。以下是各个功能的详细说明:
135
-
136
- 1. 选择说话人: 您可以从下拉菜单中选择最多四个说话人(A、B、C、D),每个说话人都有一个对应的权重滑块,范围从0到10。权重决定了每个说话人在合成语音中的影响程度。
137
- 2. 合成语音: 在选择好说话人和设置好权重后,您可以在“Test Text”框中输入要测试的文本,然后点击“测试语音”按钮来生成并播放合成的语音。
138
- 3. 保存说话人: 您还可以在右侧的“说话人信息”部分填写新的说话人的名称、性别和描述,并点击“Save Speaker”按钮来保存合成的说话人。保存后的说话人文件将显示在“Merged Speaker”栏中,供下载使用。
139
- """
140
-
141
-
142
  # 显示 a b c d 四个选择框,选择一个或多个,然后可以试音,并导出
143
  def create_speaker_merger():
144
  def get_spk_choices():
@@ -146,7 +135,7 @@ def create_speaker_merger():
146
  speaker_names = ["None"] + speaker_names
147
  return speaker_names
148
 
149
- gr.Markdown(merge_desc)
150
 
151
  def spk_picker(label_tail: str):
152
  with gr.Row():
@@ -198,7 +187,7 @@ def create_speaker_merger():
198
  test_text = gr.Textbox(
199
  label="Test Text",
200
  placeholder="Please input test text",
201
- value="说话人合并测试 123456789 [uv_break] ok, test done [lbreak]",
202
  )
203
 
204
  output_audio = gr.Audio(
 
3
  import torch
4
 
5
  from modules.hf import spaces
6
+ from modules.webui import webui_config, webui_utils
7
  from modules.webui.webui_utils import get_speakers, tts_generate
8
  from modules.speaker import speaker_mgr, Speaker
9
 
 
128
  return tmp_file_path
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
131
  # 显示 a b c d 四个选择框,选择一个或多个,然后可以试音,并导出
132
  def create_speaker_merger():
133
  def get_spk_choices():
 
135
  speaker_names = ["None"] + speaker_names
136
  return speaker_names
137
 
138
+ gr.Markdown("SPEAKER_MERGER_GUIDE")
139
 
140
  def spk_picker(label_tail: str):
141
  with gr.Row():
 
187
  test_text = gr.Textbox(
188
  label="Test Text",
189
  placeholder="Please input test text",
190
+ value=webui_config.localization.DEFAULT_SPEAKER_MERAGE_TEXT,
191
  )
192
 
193
  output_audio = gr.Audio(
modules/webui/ssml/spliter_tab.py CHANGED
@@ -95,8 +95,7 @@ def create_spliter_tab(ssml_input, tabs1, tabs2):
95
  with gr.Column(scale=3):
96
  with gr.Group():
97
  gr.Markdown("📝Long Text Input")
98
- gr.Markdown("- 此页面用于处理超长文本")
99
- gr.Markdown("- 切割后,可以选择说话人、风格、seed,然后发送到SSML")
100
  long_text_input = gr.Textbox(
101
  label="Long Text Input",
102
  lines=10,
 
95
  with gr.Column(scale=3):
96
  with gr.Group():
97
  gr.Markdown("📝Long Text Input")
98
+ gr.Markdown("SSML_SPLITER_GUIDE")
 
99
  long_text_input = gr.Textbox(
100
  label="Long Text Input",
101
  lines=10,
modules/webui/ssml/ssml_tab.py CHANGED
@@ -3,7 +3,6 @@ from modules.webui.webui_utils import (
3
  synthesize_ssml,
4
  )
5
  from modules.webui import webui_config
6
- from modules.webui.examples import ssml_examples, default_ssml
7
 
8
 
9
  def create_ssml_interface():
@@ -11,15 +10,11 @@ def create_ssml_interface():
11
  with gr.Column(scale=3):
12
  with gr.Group():
13
  gr.Markdown("📝SSML Input")
14
- gr.Markdown(f"- 最长{webui_config.ssml_max:,}字符,超过会被截断")
15
- gr.Markdown("- 尽量保证使用相同的 seed")
16
- gr.Markdown(
17
- "- 关于SSML可以看这个 [文档](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
18
- )
19
  ssml_input = gr.Textbox(
20
  label="SSML Input",
21
  lines=10,
22
- value=default_ssml,
23
  placeholder="输入 SSML 或选择示例",
24
  elem_id="ssml_input",
25
  show_label=False,
@@ -46,7 +41,7 @@ def create_ssml_interface():
46
  with gr.Group():
47
  gr.Markdown("🎄Examples")
48
  gr.Examples(
49
- examples=ssml_examples,
50
  inputs=[ssml_input],
51
  )
52
 
 
3
  synthesize_ssml,
4
  )
5
  from modules.webui import webui_config
 
6
 
7
 
8
  def create_ssml_interface():
 
10
  with gr.Column(scale=3):
11
  with gr.Group():
12
  gr.Markdown("📝SSML Input")
13
+ gr.Markdown("SSML_TEXT_GUIDE")
 
 
 
 
14
  ssml_input = gr.Textbox(
15
  label="SSML Input",
16
  lines=10,
17
+ value=webui_config.localization.DEFAULT_SSML_TEXT,
18
  placeholder="输入 SSML 或选择示例",
19
  elem_id="ssml_input",
20
  show_label=False,
 
41
  with gr.Group():
42
  gr.Markdown("🎄Examples")
43
  gr.Examples(
44
+ examples=webui_config.localization.ssml_examples,
45
  inputs=[ssml_input],
46
  )
47
 
modules/webui/tts_tab.py CHANGED
@@ -8,12 +8,6 @@ from modules.webui.webui_utils import (
8
  tts_generate,
9
  )
10
  from modules.webui import webui_config
11
- from modules.webui.examples import example_texts
12
- from modules import config
13
-
14
- default_text_content = """
15
- chat T T S 是一款强大的对话式文本转语音模型。它有中英混读和多说话人的能力。
16
- """.strip()
17
 
18
 
19
  def create_tts_interface():
@@ -53,7 +47,7 @@ def create_tts_interface():
53
  with gr.Row():
54
  with gr.Group():
55
  gr.Markdown("🎭Style")
56
- gr.Markdown("- 后缀为 `_p` 表示带prompt,效果更强但是影响质量")
57
  style_input_dropdown = gr.Dropdown(
58
  choices=styles,
59
  # label="Choose Style",
@@ -138,18 +132,14 @@ def create_tts_interface():
138
  "📝Text Input",
139
  elem_id="input-title",
140
  )
141
- gr.Markdown(f"- 字数限制{webui_config.tts_max:,}字,超过部分截断")
142
- gr.Markdown("- 如果尾字吞字不读,可以试试结尾加上 `[lbreak]`")
143
- gr.Markdown(
144
- "- If the input text is all in English, it is recommended to check disable_normalize"
145
- )
146
  text_input = gr.Textbox(
147
  show_label=False,
148
  label="Text to Speech",
149
  lines=10,
150
  placeholder="输入文本或选择示例",
151
  elem_id="text-input",
152
- value=default_text_content,
153
  )
154
  # TODO 字数统计,其实实现很好写,但是就是会触发loading...并且还要和后端交互...
155
  # text_input.change(
@@ -184,7 +174,10 @@ def create_tts_interface():
184
  with gr.Group():
185
  gr.Markdown("🎄Examples")
186
  sample_dropdown = gr.Dropdown(
187
- choices=[sample["text"] for sample in example_texts],
 
 
 
188
  show_label=False,
189
  value=None,
190
  interactive=True,
 
8
  tts_generate,
9
  )
10
  from modules.webui import webui_config
 
 
 
 
 
 
11
 
12
 
13
  def create_tts_interface():
 
47
  with gr.Row():
48
  with gr.Group():
49
  gr.Markdown("🎭Style")
50
+ gr.Markdown("TTS_STYLE_GUIDE")
51
  style_input_dropdown = gr.Dropdown(
52
  choices=styles,
53
  # label="Choose Style",
 
132
  "📝Text Input",
133
  elem_id="input-title",
134
  )
135
+ gr.Markdown(f"TTS_TEXT_GUIDE")
 
 
 
 
136
  text_input = gr.Textbox(
137
  show_label=False,
138
  label="Text to Speech",
139
  lines=10,
140
  placeholder="输入文本或选择示例",
141
  elem_id="text-input",
142
+ value=webui_config.localization.DEFAULT_TTS_TEXT,
143
  )
144
  # TODO 字数统计,其实实现很好写,但是就是会触发loading...并且还要和后端交互...
145
  # text_input.change(
 
174
  with gr.Group():
175
  gr.Markdown("🎄Examples")
176
  sample_dropdown = gr.Dropdown(
177
+ choices=[
178
+ sample["text"]
179
+ for sample in webui_config.localization.tts_examples
180
+ ],
181
  show_label=False,
182
  value=None,
183
  interactive=True,
modules/webui/webui_config.py CHANGED
@@ -1,8 +1,12 @@
1
  from typing import Literal
2
 
 
 
3
 
4
  tts_max = 1000
5
  ssml_max = 1000
6
  spliter_threshold = 100
7
  max_batch_size = 8
8
  experimental = False
 
 
 
1
  from typing import Literal
2
 
3
+ from modules.webui.localization_runtime import LocalizationVars
4
+
5
 
6
  tts_max = 1000
7
  ssml_max = 1000
8
  spliter_threshold = 100
9
  max_batch_size = 8
10
  experimental = False
11
+
12
+ localization: LocalizationVars = None
webui.py CHANGED
@@ -81,6 +81,12 @@ if __name__ == "__main__":
81
  help="Enable webui_experimental features",
82
  )
83
 
 
 
 
 
 
 
84
  args = parser.parse_args()
85
 
86
  def get_and_update_env(*args):
@@ -100,6 +106,7 @@ if __name__ == "__main__":
100
  device_id = get_and_update_env(args, "device_id", None, str)
101
  use_cpu = get_and_update_env(args, "use_cpu", [], list)
102
  compile = get_and_update_env(args, "compile", False, bool)
 
103
 
104
  webui_config.experimental = get_and_update_env(
105
  args, "webui_experimental", False, bool
@@ -108,6 +115,7 @@ if __name__ == "__main__":
108
  webui_config.ssml_max = get_and_update_env(args, "ssml_max_len", 5000, int)
109
  webui_config.max_batch_size = get_and_update_env(args, "max_batch_size", 8, int)
110
 
 
111
  demo = create_interface()
112
 
113
  if auth:
@@ -117,8 +125,6 @@ if __name__ == "__main__":
117
  devices.reset_device()
118
  devices.first_time_calculation()
119
 
120
- webui_init()
121
-
122
  demo.queue().launch(
123
  server_name=server_name,
124
  server_port=server_port,
 
81
  help="Enable webui_experimental features",
82
  )
83
 
84
+ parser.add_argument(
85
+ "--language",
86
+ type=str,
87
+ default="zh-CN",
88
+ help="Set the default language for the webui",
89
+ )
90
  args = parser.parse_args()
91
 
92
  def get_and_update_env(*args):
 
106
  device_id = get_and_update_env(args, "device_id", None, str)
107
  use_cpu = get_and_update_env(args, "use_cpu", [], list)
108
  compile = get_and_update_env(args, "compile", False, bool)
109
+ language = get_and_update_env(args, "language", False, bool)
110
 
111
  webui_config.experimental = get_and_update_env(
112
  args, "webui_experimental", False, bool
 
115
  webui_config.ssml_max = get_and_update_env(args, "ssml_max_len", 5000, int)
116
  webui_config.max_batch_size = get_and_update_env(args, "max_batch_size", 8, int)
117
 
118
+ webui_init()
119
  demo = create_interface()
120
 
121
  if auth:
 
125
  devices.reset_device()
126
  devices.first_time_calculation()
127
 
 
 
128
  demo.queue().launch(
129
  server_name=server_name,
130
  server_port=server_port,