txya900619 commited on
Commit
a523a5e
1 Parent(s): 576392b

feat: update model and use zero

Browse files
Files changed (2) hide show
  1. app.py +48 -18
  2. configs/models.yaml +2 -20
app.py CHANGED
@@ -6,6 +6,8 @@ from TTS.utils.synthesizer import Synthesizer
6
  import numpy as np
7
  from huggingface_hub import snapshot_download
8
  from omegaconf import OmegaConf
 
 
9
 
10
  from ipa.ipa import get_ipa, parse_ipa
11
  from replace.tts import ChangedVitsConfig
@@ -31,13 +33,34 @@ def load_model(model_id):
31
  with open(temp_config_path, "w") as f:
32
  f.write(content)
33
  f.close()
34
- return Synthesizer(tts_checkpoint=model_ckpt_path, tts_config_path=temp_config_path)
 
 
 
 
 
35
 
36
 
37
  OmegaConf.register_new_resolver("load_model", load_model)
38
 
39
  models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def text_to_speech(
43
  model_id: str,
@@ -55,19 +78,23 @@ def text_to_speech(
55
  raise gr.Error(
56
  f"句子中的[{','.join(missing_words)}]目前無法轉成 ipa。請嘗試其他句子。"
57
  )
 
58
  if use_default_emb_or_custom == "default":
59
- wav = model.tts(
60
- parse_ipa(ipa),
61
- speaker_name=speaker if len(models_config[model_id]["speaker_mapping"]) > 1 else None,
 
 
 
62
  language_name=dialect,
63
- split_sentences=False,
64
  )
65
  else:
66
- wav = model.tts(
67
- parse_ipa(ipa),
 
68
  speaker_wav=speaker_wav,
69
  language_name=dialect,
70
- split_sentences=False,
71
  )
72
 
73
  return (
@@ -83,7 +110,7 @@ def when_model_selected(model_id):
83
  speaker_drop_down_choices = [
84
  (k, v) for k, v in model_config["speaker_mapping"].items()
85
  ]
86
-
87
  dialect_drop_down_choices = [
88
  (k, v) for k, v in model_config["dialect_mapping"].items()
89
  ]
@@ -91,11 +118,13 @@ def when_model_selected(model_id):
91
  use_default_emb_or_ref_radio_visible = False
92
  if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
93
  use_default_emb_or_ref_radio_visible = True
94
-
95
  return (
96
  gr.update(
97
  choices=speaker_drop_down_choices,
98
- value=speaker_drop_down_choices[0][1] if len(speaker_drop_down_choices) > 0 else None,
 
 
99
  interactive=len(speaker_drop_down_choices) > 1,
100
  ),
101
  gr.update(
@@ -138,9 +167,9 @@ with demo:
138
  label="use default speaker embedding or custom speaker embedding",
139
  choices=["default", "custom"],
140
  value="default",
141
- visible=False,
142
  )
143
- speaker_wav = gr.Microphone(
144
  label="speaker wav",
145
  visible=False,
146
  editable=False,
@@ -185,11 +214,9 @@ with demo:
185
  """
186
  # 臺灣客語語音合成系統
187
  ### Taiwanese Hakka Text-to-Speech System
188
- ### 模型
189
- - **sixian-1f-240417**(四縣腔,單一語者)
190
  ### 研發
191
- - **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)(諾思資訊 North Co., Ltd.)**
192
- - **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)(諾思資訊 North Co., Ltd.)**
193
  """
194
  )
195
  gr.Interface(
@@ -200,7 +227,10 @@ with demo:
200
  speaker_wav,
201
  speaker_drop_down,
202
  dialect_drop_down,
203
- gr.Textbox(label="輸入文字", value="客家族群个六堆運動會會一直延續下去,為臺灣个體育史寫下特別个一頁。"),
 
 
 
204
  ],
205
  outputs=[
206
  gr.Textbox(interactive=False, label="斷詞"),
 
6
  import numpy as np
7
  from huggingface_hub import snapshot_download
8
  from omegaconf import OmegaConf
9
+ import spaces
10
+ import torch
11
 
12
  from ipa.ipa import get_ipa, parse_ipa
13
  from replace.tts import ChangedVitsConfig
 
33
  with open(temp_config_path, "w") as f:
34
  f.write(content)
35
  f.close()
36
+
37
+ return Synthesizer(
38
+ tts_checkpoint=model_ckpt_path,
39
+ tts_config_path=temp_config_path,
40
+ use_cuda=torch.cuda.is_available(),
41
+ )
42
 
43
 
44
  OmegaConf.register_new_resolver("load_model", load_model)
45
 
46
  models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
47
 
48
+ @spaces.GPU
49
+ def _do_tts(model, ipa, language_name,speaker_name=None, speaker_wav=None):
50
+ if speaker_wav is not None:
51
+ return model.tts(
52
+ ipa,
53
+ speaker_wav=speaker_wav,
54
+ language_name=language_name,
55
+ split_sentences=False,
56
+ )
57
+ return model.tts(
58
+ ipa,
59
+ speaker_name=speaker_name,
60
+ language_name=language_name,
61
+ split_sentences=False,
62
+ )
63
+
64
 
65
  def text_to_speech(
66
  model_id: str,
 
78
  raise gr.Error(
79
  f"句子中的[{','.join(missing_words)}]目前無法轉成 ipa。請嘗試其他句子。"
80
  )
81
+ parsed_ipa = parse_ipa(ipa)
82
  if use_default_emb_or_custom == "default":
83
+ wav = _do_tts(
84
+ model,
85
+ parsed_ipa,
86
+ speaker_name=speaker
87
+ if len(models_config[model_id]["speaker_mapping"]) > 1
88
+ else None,
89
  language_name=dialect,
90
+ speaker_wav=speaker_wav,
91
  )
92
  else:
93
+ wav = _do_tts(
94
+ model,
95
+ parsed_ipa,
96
  speaker_wav=speaker_wav,
97
  language_name=dialect,
 
98
  )
99
 
100
  return (
 
110
  speaker_drop_down_choices = [
111
  (k, v) for k, v in model_config["speaker_mapping"].items()
112
  ]
113
+
114
  dialect_drop_down_choices = [
115
  (k, v) for k, v in model_config["dialect_mapping"].items()
116
  ]
 
118
  use_default_emb_or_ref_radio_visible = False
119
  if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
120
  use_default_emb_or_ref_radio_visible = True
121
+
122
  return (
123
  gr.update(
124
  choices=speaker_drop_down_choices,
125
+ value=speaker_drop_down_choices[0][1]
126
+ if len(speaker_drop_down_choices) > 0
127
+ else None,
128
  interactive=len(speaker_drop_down_choices) > 1,
129
  ),
130
  gr.update(
 
167
  label="use default speaker embedding or custom speaker embedding",
168
  choices=["default", "custom"],
169
  value="default",
170
+ visible=True,
171
  )
172
+ speaker_wav = gr.Audio(
173
  label="speaker wav",
174
  visible=False,
175
  editable=False,
 
214
  """
215
  # 臺灣客語語音合成系統
216
  ### Taiwanese Hakka Text-to-Speech System
 
 
217
  ### 研發
218
+ - **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
219
+ - **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
220
  """
221
  )
222
  gr.Interface(
 
227
  speaker_wav,
228
  speaker_drop_down,
229
  dialect_drop_down,
230
+ gr.Textbox(
231
+ label="輸入文字",
232
+ value="客家族群个六堆運動會會一直延續下去,為臺灣个體育史寫下特別个一頁。",
233
+ ),
234
  ],
235
  outputs=[
236
  gr.Textbox(interactive=False, label="斷詞"),
configs/models.yaml CHANGED
@@ -1,23 +1,5 @@
1
- sixian-1f-240417:
2
- model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-1f-240417}
3
- dialect_mapping:
4
- 四縣: sixian
5
- speaker_mapping: # display_name: id
6
- 女(64)/苗栗: XF
7
-
8
- sixian-hailu-mix:
9
- model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-hailu-mix}
10
- dialect_mapping:
11
- 四縣: sixian
12
- 海陸: hailu
13
- speaker_mapping: # display_name: id
14
- 女(64)/苗栗: XF
15
- 男(53)/苗栗: XM
16
- 女(54)/新竹: HF
17
- 男(56)/新竹: HM
18
-
19
- sixian-hailu-mix-se:
20
- model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-hailu-mix-se}
21
  dialect_mapping:
22
  四縣: sixian
23
  海陸: hailu
 
1
+ yourtts-htia-240515:
2
+ model: ${load_model:formospeech/yourtts-htia-240515}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  dialect_mapping:
4
  四縣: sixian
5
  海陸: hailu