CrawfordZhou commited on
Commit
49abf7f
1 Parent(s): ba592d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -47
app.py CHANGED
@@ -18,8 +18,10 @@ logger = logging.getLogger(__name__)
18
 
19
  import torch
20
  import ssl
 
21
  ssl._create_default_https_context = ssl._create_unverified_context
22
  import nltk
 
23
  nltk.download('cmudict')
24
  import utils
25
  from infer import infer, latest_version, get_net_g
@@ -29,6 +31,19 @@ import numpy as np
29
  from config import config
30
 
31
  net_g = None
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  device = config.webui_config.device
34
  if device == "mps":
@@ -36,13 +51,13 @@ if device == "mps":
36
 
37
 
38
  def generate_audio(
39
- slices,
40
- sdp_ratio,
41
- noise_scale,
42
- noise_scale_w,
43
- length_scale,
44
- speaker,
45
- language,
46
  ):
47
  audio_list = []
48
  silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
@@ -67,22 +82,25 @@ def generate_audio(
67
 
68
 
69
  def tts_split(
70
- text: str,
71
- speaker,
72
- sdp_ratio,
73
- noise_scale,
74
- noise_scale_w,
75
- length_scale,
76
- language,
77
- cut_by_sent,
78
- interval_between_para,
79
- interval_between_sent,
 
 
80
  ):
81
  if language == "mix":
82
  return ("invalid", None)
83
  while text.find("\n\n") != -1:
84
  text = text.replace("\n\n", "\n")
85
- para_list = re_matching.cut_para(text)
 
86
  audio_list = []
87
  if not cut_by_sent:
88
  for p in para_list:
@@ -132,27 +150,30 @@ def tts_split(
132
  ) # 对完整句子做音量归一
133
  audio_list.append(audio16bit)
134
  audio_concat = np.concatenate(audio_list)
135
- return ("Success", (44100, audio_concat))
136
 
137
 
138
  def tts_fn(
139
- text: str,
140
- speaker,
141
- sdp_ratio,
142
- noise_scale,
143
- noise_scale_w,
144
- length_scale,
145
- language,
 
 
146
  ):
147
  audio_list = []
 
148
  if language == "mix":
149
- bool_valid, str_valid = re_matching.validate_text(text)
150
  if not bool_valid:
151
  return str_valid, (
152
  hps.data.sampling_rate,
153
  np.concatenate([np.zeros(hps.data.sampling_rate // 2)]),
154
  )
155
- result = re_matching.text_matching(text)
156
  for one in result:
157
  _speaker = one.pop()
158
  for lang, content in one:
@@ -168,7 +189,7 @@ def tts_fn(
168
  )
169
  )
170
  elif language.lower() == "auto":
171
- sentences_list = split_by_language(text, target_languages=["zh", "ja", "en"])
172
  for sentences, lang in sentences_list:
173
  lang = lang.upper()
174
  if lang == "JA":
@@ -189,7 +210,7 @@ def tts_fn(
189
  else:
190
  audio_list.extend(
191
  generate_audio(
192
- text.split("|"),
193
  sdp_ratio,
194
  noise_scale,
195
  noise_scale_w,
@@ -200,7 +221,7 @@ def tts_fn(
200
  )
201
 
202
  audio_concat = np.concatenate(audio_list)
203
- return "Success", (hps.data.sampling_rate, audio_concat)
204
 
205
 
206
  if __name__ == "__main__":
@@ -220,27 +241,26 @@ if __name__ == "__main__":
220
  with gr.Row():
221
  with gr.Column():
222
  gr.Markdown(value="""
223
- AI星瞳①】在线语音合成(Bert-Vits2 2.0中日英)\n
224
- 作者:Xz乔希 https://space.bilibili.com/5859321\n
225
- 声音归属:星瞳_Official https://space.bilibili.com/401315430\n
226
- 【AI星瞳②】https://huggingface.co/spaces/XzJosh/Star-Bert-VITS2\n
227
- 【AI合集】https://www.modelscope.cn/studios/xzjosh/Bert-VITS2\n
228
  Bert-VITS2项目:https://github.com/Stardust-minus/Bert-VITS2\n
 
 
 
229
  使用本模型请严格遵守法律法规!\n
230
- 发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!\n
231
- 【提示】手机端容易误触调节,请刷新恢复默认!每次生成的结果都不一样,效果不好请尝试多次生成与调节,选择最佳结果!\n
232
  """)
233
  text = gr.TextArea(
234
- label="输入文本内容",
235
  placeholder="""
236
- 推荐不同语言分开推理,因为无法连贯且可能影响最终效果!
237
- 如果选择语言为\'auto\',有概率无法识别。
238
- 如果选择语言为\'mix\',必须按照格式输入,否则报错:
239
- 格式举例(zh是中文,jp是日语,en是英语;不区分大小写):
240
- [说话人]<zh>你好 <jp>こんにちは <en>Hello
241
- 另外,所有的语言选项都可以用'|'分割长段实现分句生成。
242
  """,
243
  )
 
 
 
 
244
  speaker = gr.Dropdown(
245
  choices=speakers, value=speakers[0], label="选择说话人"
246
  )
@@ -282,6 +302,7 @@ if __name__ == "__main__":
282
  )
283
  slicer = gr.Button("切分生成", variant="primary")
284
  text_output = gr.Textbox(label="状态信息")
 
285
  audio_output = gr.Audio(label="输出音频")
286
  # explain_image = gr.Image(
287
  # label="参数解释信息",
@@ -294,6 +315,8 @@ if __name__ == "__main__":
294
  tts_fn,
295
  inputs=[
296
  text,
 
 
297
  speaker,
298
  sdp_ratio,
299
  noise_scale,
@@ -301,13 +324,15 @@ if __name__ == "__main__":
301
  length_scale,
302
  language,
303
  ],
304
- outputs=[text_output, audio_output],
305
  )
306
 
307
  slicer.click(
308
  tts_split,
309
  inputs=[
310
  text,
 
 
311
  speaker,
312
  sdp_ratio,
313
  noise_scale,
@@ -318,7 +343,7 @@ if __name__ == "__main__":
318
  interval_between_para,
319
  interval_between_sent,
320
  ],
321
- outputs=[text_output, audio_output],
322
  )
323
 
324
  print("推理页面已开启!")
 
18
 
19
  import torch
20
  import ssl
21
+
22
  ssl._create_default_https_context = ssl._create_unverified_context
23
  import nltk
24
+
25
  nltk.download('cmudict')
26
  import utils
27
  from infer import infer, latest_version, get_net_g
 
31
  from config import config
32
 
33
  net_g = None
34
+ import openai
35
+
36
+ # openai.log = "debug"
37
+ openai.api_base = "https://api.chatanywhere.com.cn/v1"
38
+
39
+
40
+ # 非流式响应
41
+
42
+ def gpt_35_api(gptkey, message):
43
+ openai.api_key = "sk-" + gptkey
44
+ completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": message}])
45
+ return completion.choices[0].message.content
46
+
47
 
48
  device = config.webui_config.device
49
  if device == "mps":
 
51
 
52
 
53
  def generate_audio(
54
+ slices,
55
+ sdp_ratio,
56
+ noise_scale,
57
+ noise_scale_w,
58
+ length_scale,
59
+ speaker,
60
+ language,
61
  ):
62
  audio_list = []
63
  silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
 
82
 
83
 
84
  def tts_split(
85
+ text: str,
86
+ font,
87
+ key,
88
+ speaker,
89
+ sdp_ratio,
90
+ noise_scale,
91
+ noise_scale_w,
92
+ length_scale,
93
+ language,
94
+ cut_by_sent,
95
+ interval_between_para,
96
+ interval_between_sent,
97
  ):
98
  if language == "mix":
99
  return ("invalid", None)
100
  while text.find("\n\n") != -1:
101
  text = text.replace("\n\n", "\n")
102
+ transfer_text = gpt_35_api(key, font + text)
103
+ para_list = re_matching.cut_para(transfer_text)
104
  audio_list = []
105
  if not cut_by_sent:
106
  for p in para_list:
 
150
  ) # 对完整句子做音量归一
151
  audio_list.append(audio16bit)
152
  audio_concat = np.concatenate(audio_list)
153
+ return ("Success", (44100, transfer_text, audio_concat))
154
 
155
 
156
  def tts_fn(
157
+ text: str,
158
+ font,
159
+ key,
160
+ speaker,
161
+ sdp_ratio,
162
+ noise_scale,
163
+ noise_scale_w,
164
+ length_scale,
165
+ language,
166
  ):
167
  audio_list = []
168
+ transfer_text = gpt_35_api(key, font + text)
169
  if language == "mix":
170
+ bool_valid, str_valid = re_matching.validate_text(transfer_text)
171
  if not bool_valid:
172
  return str_valid, (
173
  hps.data.sampling_rate,
174
  np.concatenate([np.zeros(hps.data.sampling_rate // 2)]),
175
  )
176
+ result = re_matching.text_matching(transfer_text)
177
  for one in result:
178
  _speaker = one.pop()
179
  for lang, content in one:
 
189
  )
190
  )
191
  elif language.lower() == "auto":
192
+ sentences_list = split_by_language(transfer_text, target_languages=["zh", "ja", "en"])
193
  for sentences, lang in sentences_list:
194
  lang = lang.upper()
195
  if lang == "JA":
 
210
  else:
211
  audio_list.extend(
212
  generate_audio(
213
+ transfer_text.split("|"),
214
  sdp_ratio,
215
  noise_scale,
216
  noise_scale_w,
 
221
  )
222
 
223
  audio_concat = np.concatenate(audio_list)
224
+ return "Success", (hps.data.sampling_rate, transfer_text, audio_concat)
225
 
226
 
227
  if __name__ == "__main__":
 
241
  with gr.Row():
242
  with gr.Column():
243
  gr.Markdown(value="""
244
+ #【AI星瞳——gpt对话版】在线语音合成(Bert-Vits2 2.0中日英)\n
245
+ ![avatar](https://img1.baidu.com/it/u=381691319,2894195285&fm=253&fmt=auto&app=138&f=JPEG?w=400&h=300)\n
246
+ 作者:[Xz乔希](https://space.bilibili.com/5859321) 集成作者:[碎语碎念](https://space.bilibili.com/4269384) 声音归属:[星瞳_Official](https://space.bilibili.com/401315430) \n
 
 
247
  Bert-VITS2项目:https://github.com/Stardust-minus/Bert-VITS2\n
248
+ GPT_API_free项目:https://github.com/chatanywhere/GPT_API_free\n
249
+ 本项目中的apiKey可以从https://github.com/chatanywhere/GPT_API_free\n
250
+ 免费获取(本项目默认提供了一个,如果没法用了去仓库申请替换就好啦)!\n
251
  使用本模型请严格遵守法律法规!\n
252
+ 发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!\n
 
253
  """)
254
  text = gr.TextArea(
255
+ label="请输入要向星瞳老师提问的问题",
256
  placeholder="""
257
+ 虚拟主播是什么?
 
 
 
 
 
258
  """,
259
  )
260
+ front_text = gr.Text(label="请输入情景语言", placeholder="请输入情景语言",
261
+ value="你是一个叫星瞳的虚拟主播,")
262
+ key = gr.Text(label="GPT Key", placeholder="请输入上面提示中获取的gpt key",
263
+ value="izlrijShDu7tp2rIgvYfibcC2J0Eh3uWfdm9ndrxN5nWrL96")
264
  speaker = gr.Dropdown(
265
  choices=speakers, value=speakers[0], label="选择说话人"
266
  )
 
302
  )
303
  slicer = gr.Button("切分生成", variant="primary")
304
  text_output = gr.Textbox(label="状态信息")
305
+ gpt_output = gr.TextArea(label="星瞳老师的答案")
306
  audio_output = gr.Audio(label="输出音频")
307
  # explain_image = gr.Image(
308
  # label="参数解释信息",
 
315
  tts_fn,
316
  inputs=[
317
  text,
318
+ front_text,
319
+ key,
320
  speaker,
321
  sdp_ratio,
322
  noise_scale,
 
324
  length_scale,
325
  language,
326
  ],
327
+ outputs=[text_output, gpt_output, audio_output],
328
  )
329
 
330
  slicer.click(
331
  tts_split,
332
  inputs=[
333
  text,
334
+ front_text
335
+ key,
336
  speaker,
337
  sdp_ratio,
338
  noise_scale,
 
343
  interval_between_para,
344
  interval_between_sent,
345
  ],
346
+ outputs=[text_output, gpt_output, audio_output],
347
  )
348
 
349
  print("推理页面已开启!")