Spaces:
Sleeping
Sleeping
CrawfordZhou
commited on
Commit
•
49abf7f
1
Parent(s):
ba592d2
Update app.py
Browse files
app.py
CHANGED
@@ -18,8 +18,10 @@ logger = logging.getLogger(__name__)
|
|
18 |
|
19 |
import torch
|
20 |
import ssl
|
|
|
21 |
ssl._create_default_https_context = ssl._create_unverified_context
|
22 |
import nltk
|
|
|
23 |
nltk.download('cmudict')
|
24 |
import utils
|
25 |
from infer import infer, latest_version, get_net_g
|
@@ -29,6 +31,19 @@ import numpy as np
|
|
29 |
from config import config
|
30 |
|
31 |
net_g = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
device = config.webui_config.device
|
34 |
if device == "mps":
|
@@ -36,13 +51,13 @@ if device == "mps":
|
|
36 |
|
37 |
|
38 |
def generate_audio(
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
):
|
47 |
audio_list = []
|
48 |
silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
|
@@ -67,22 +82,25 @@ def generate_audio(
|
|
67 |
|
68 |
|
69 |
def tts_split(
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
):
|
81 |
if language == "mix":
|
82 |
return ("invalid", None)
|
83 |
while text.find("\n\n") != -1:
|
84 |
text = text.replace("\n\n", "\n")
|
85 |
-
|
|
|
86 |
audio_list = []
|
87 |
if not cut_by_sent:
|
88 |
for p in para_list:
|
@@ -132,27 +150,30 @@ def tts_split(
|
|
132 |
) # 对完整句子做音量归一
|
133 |
audio_list.append(audio16bit)
|
134 |
audio_concat = np.concatenate(audio_list)
|
135 |
-
return ("Success", (44100, audio_concat))
|
136 |
|
137 |
|
138 |
def tts_fn(
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
146 |
):
|
147 |
audio_list = []
|
|
|
148 |
if language == "mix":
|
149 |
-
bool_valid, str_valid = re_matching.validate_text(
|
150 |
if not bool_valid:
|
151 |
return str_valid, (
|
152 |
hps.data.sampling_rate,
|
153 |
np.concatenate([np.zeros(hps.data.sampling_rate // 2)]),
|
154 |
)
|
155 |
-
result = re_matching.text_matching(
|
156 |
for one in result:
|
157 |
_speaker = one.pop()
|
158 |
for lang, content in one:
|
@@ -168,7 +189,7 @@ def tts_fn(
|
|
168 |
)
|
169 |
)
|
170 |
elif language.lower() == "auto":
|
171 |
-
sentences_list = split_by_language(
|
172 |
for sentences, lang in sentences_list:
|
173 |
lang = lang.upper()
|
174 |
if lang == "JA":
|
@@ -189,7 +210,7 @@ def tts_fn(
|
|
189 |
else:
|
190 |
audio_list.extend(
|
191 |
generate_audio(
|
192 |
-
|
193 |
sdp_ratio,
|
194 |
noise_scale,
|
195 |
noise_scale_w,
|
@@ -200,7 +221,7 @@ def tts_fn(
|
|
200 |
)
|
201 |
|
202 |
audio_concat = np.concatenate(audio_list)
|
203 |
-
return "Success", (hps.data.sampling_rate, audio_concat)
|
204 |
|
205 |
|
206 |
if __name__ == "__main__":
|
@@ -220,27 +241,26 @@ if __name__ == "__main__":
|
|
220 |
with gr.Row():
|
221 |
with gr.Column():
|
222 |
gr.Markdown(value="""
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
【AI星瞳②】https://huggingface.co/spaces/XzJosh/Star-Bert-VITS2\n
|
227 |
-
【AI合集】https://www.modelscope.cn/studios/xzjosh/Bert-VITS2\n
|
228 |
Bert-VITS2项目:https://github.com/Stardust-minus/Bert-VITS2\n
|
|
|
|
|
|
|
229 |
使用本模型请严格遵守法律法规!\n
|
230 |
-
发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!\n
|
231 |
-
【提示】手机端容易误触调节,请刷新恢复默认!每次生成的结果都不一样,效果不好请尝试多次生成与调节,选择最佳结果!\n
|
232 |
""")
|
233 |
text = gr.TextArea(
|
234 |
-
label="
|
235 |
placeholder="""
|
236 |
-
|
237 |
-
如果选择语言为\'auto\',有概率无法识别。
|
238 |
-
如果选择语言为\'mix\',必须按照格式输入,否则报错:
|
239 |
-
格式举例(zh是中文,jp是日语,en是英语;不区分大小写):
|
240 |
-
[说话人]<zh>你好 <jp>こんにちは <en>Hello
|
241 |
-
另外,所有的语言选项都可以用'|'分割长段实现分句生成。
|
242 |
""",
|
243 |
)
|
|
|
|
|
|
|
|
|
244 |
speaker = gr.Dropdown(
|
245 |
choices=speakers, value=speakers[0], label="选择说话人"
|
246 |
)
|
@@ -282,6 +302,7 @@ if __name__ == "__main__":
|
|
282 |
)
|
283 |
slicer = gr.Button("切分生成", variant="primary")
|
284 |
text_output = gr.Textbox(label="状态信息")
|
|
|
285 |
audio_output = gr.Audio(label="输出音频")
|
286 |
# explain_image = gr.Image(
|
287 |
# label="参数解释信息",
|
@@ -294,6 +315,8 @@ if __name__ == "__main__":
|
|
294 |
tts_fn,
|
295 |
inputs=[
|
296 |
text,
|
|
|
|
|
297 |
speaker,
|
298 |
sdp_ratio,
|
299 |
noise_scale,
|
@@ -301,13 +324,15 @@ if __name__ == "__main__":
|
|
301 |
length_scale,
|
302 |
language,
|
303 |
],
|
304 |
-
outputs=[text_output, audio_output],
|
305 |
)
|
306 |
|
307 |
slicer.click(
|
308 |
tts_split,
|
309 |
inputs=[
|
310 |
text,
|
|
|
|
|
311 |
speaker,
|
312 |
sdp_ratio,
|
313 |
noise_scale,
|
@@ -318,7 +343,7 @@ if __name__ == "__main__":
|
|
318 |
interval_between_para,
|
319 |
interval_between_sent,
|
320 |
],
|
321 |
-
outputs=[text_output, audio_output],
|
322 |
)
|
323 |
|
324 |
print("推理页面已开启!")
|
|
|
18 |
|
19 |
import torch
|
20 |
import ssl
|
21 |
+
|
22 |
ssl._create_default_https_context = ssl._create_unverified_context
|
23 |
import nltk
|
24 |
+
|
25 |
nltk.download('cmudict')
|
26 |
import utils
|
27 |
from infer import infer, latest_version, get_net_g
|
|
|
31 |
from config import config
|
32 |
|
33 |
net_g = None
|
34 |
+
import openai
|
35 |
+
|
36 |
+
# openai.log = "debug"
|
37 |
+
openai.api_base = "https://api.chatanywhere.com.cn/v1"
|
38 |
+
|
39 |
+
|
40 |
+
# 非流式响应
|
41 |
+
|
42 |
+
def gpt_35_api(gptkey, message):
|
43 |
+
openai.api_key = "sk-" + gptkey
|
44 |
+
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": message}])
|
45 |
+
return completion.choices[0].message.content
|
46 |
+
|
47 |
|
48 |
device = config.webui_config.device
|
49 |
if device == "mps":
|
|
|
51 |
|
52 |
|
53 |
def generate_audio(
|
54 |
+
slices,
|
55 |
+
sdp_ratio,
|
56 |
+
noise_scale,
|
57 |
+
noise_scale_w,
|
58 |
+
length_scale,
|
59 |
+
speaker,
|
60 |
+
language,
|
61 |
):
|
62 |
audio_list = []
|
63 |
silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
|
|
|
82 |
|
83 |
|
84 |
def tts_split(
|
85 |
+
text: str,
|
86 |
+
font,
|
87 |
+
key,
|
88 |
+
speaker,
|
89 |
+
sdp_ratio,
|
90 |
+
noise_scale,
|
91 |
+
noise_scale_w,
|
92 |
+
length_scale,
|
93 |
+
language,
|
94 |
+
cut_by_sent,
|
95 |
+
interval_between_para,
|
96 |
+
interval_between_sent,
|
97 |
):
|
98 |
if language == "mix":
|
99 |
return ("invalid", None)
|
100 |
while text.find("\n\n") != -1:
|
101 |
text = text.replace("\n\n", "\n")
|
102 |
+
transfer_text = gpt_35_api(key, font + text)
|
103 |
+
para_list = re_matching.cut_para(transfer_text)
|
104 |
audio_list = []
|
105 |
if not cut_by_sent:
|
106 |
for p in para_list:
|
|
|
150 |
) # 对完整句子做音量归一
|
151 |
audio_list.append(audio16bit)
|
152 |
audio_concat = np.concatenate(audio_list)
|
153 |
+
return ("Success", (44100, transfer_text, audio_concat))
|
154 |
|
155 |
|
156 |
def tts_fn(
|
157 |
+
text: str,
|
158 |
+
font,
|
159 |
+
key,
|
160 |
+
speaker,
|
161 |
+
sdp_ratio,
|
162 |
+
noise_scale,
|
163 |
+
noise_scale_w,
|
164 |
+
length_scale,
|
165 |
+
language,
|
166 |
):
|
167 |
audio_list = []
|
168 |
+
transfer_text = gpt_35_api(key, font + text)
|
169 |
if language == "mix":
|
170 |
+
bool_valid, str_valid = re_matching.validate_text(transfer_text)
|
171 |
if not bool_valid:
|
172 |
return str_valid, (
|
173 |
hps.data.sampling_rate,
|
174 |
np.concatenate([np.zeros(hps.data.sampling_rate // 2)]),
|
175 |
)
|
176 |
+
result = re_matching.text_matching(transfer_text)
|
177 |
for one in result:
|
178 |
_speaker = one.pop()
|
179 |
for lang, content in one:
|
|
|
189 |
)
|
190 |
)
|
191 |
elif language.lower() == "auto":
|
192 |
+
sentences_list = split_by_language(transfer_text, target_languages=["zh", "ja", "en"])
|
193 |
for sentences, lang in sentences_list:
|
194 |
lang = lang.upper()
|
195 |
if lang == "JA":
|
|
|
210 |
else:
|
211 |
audio_list.extend(
|
212 |
generate_audio(
|
213 |
+
transfer_text.split("|"),
|
214 |
sdp_ratio,
|
215 |
noise_scale,
|
216 |
noise_scale_w,
|
|
|
221 |
)
|
222 |
|
223 |
audio_concat = np.concatenate(audio_list)
|
224 |
+
return "Success", (hps.data.sampling_rate, transfer_text, audio_concat)
|
225 |
|
226 |
|
227 |
if __name__ == "__main__":
|
|
|
241 |
with gr.Row():
|
242 |
with gr.Column():
|
243 |
gr.Markdown(value="""
|
244 |
+
#【AI星瞳——gpt对话版】在线语音合成(Bert-Vits2 2.0中日英)\n
|
245 |
+
![avatar](https://img1.baidu.com/it/u=381691319,2894195285&fm=253&fmt=auto&app=138&f=JPEG?w=400&h=300)\n
|
246 |
+
作者:[Xz乔希](https://space.bilibili.com/5859321) 集成作者:[碎语碎念](https://space.bilibili.com/4269384) 声音归属:[星瞳_Official](https://space.bilibili.com/401315430) \n
|
|
|
|
|
247 |
Bert-VITS2项目:https://github.com/Stardust-minus/Bert-VITS2\n
|
248 |
+
GPT_API_free项目:https://github.com/chatanywhere/GPT_API_free\n
|
249 |
+
本项目中的apiKey可以从https://github.com/chatanywhere/GPT_API_free\n
|
250 |
+
免费获取(本项目默认提供了一个,如果没法用了去仓库申请替换就好啦)!\n
|
251 |
使用本模型请严格遵守法律法规!\n
|
252 |
+
发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!\n
|
|
|
253 |
""")
|
254 |
text = gr.TextArea(
|
255 |
+
label="请输入要向星瞳老师提问的问题",
|
256 |
placeholder="""
|
257 |
+
虚拟主播是什么?
|
|
|
|
|
|
|
|
|
|
|
258 |
""",
|
259 |
)
|
260 |
+
front_text = gr.Text(label="请输入情景语言", placeholder="请输入情景语言",
|
261 |
+
value="你是一个叫星瞳的虚拟主播,")
|
262 |
+
key = gr.Text(label="GPT Key", placeholder="请输入上面提示中获取的gpt key",
|
263 |
+
value="izlrijShDu7tp2rIgvYfibcC2J0Eh3uWfdm9ndrxN5nWrL96")
|
264 |
speaker = gr.Dropdown(
|
265 |
choices=speakers, value=speakers[0], label="选择说话人"
|
266 |
)
|
|
|
302 |
)
|
303 |
slicer = gr.Button("切分生成", variant="primary")
|
304 |
text_output = gr.Textbox(label="状态信息")
|
305 |
+
gpt_output = gr.TextArea(label="星瞳老师的答案")
|
306 |
audio_output = gr.Audio(label="输出音频")
|
307 |
# explain_image = gr.Image(
|
308 |
# label="参数解释信息",
|
|
|
315 |
tts_fn,
|
316 |
inputs=[
|
317 |
text,
|
318 |
+
front_text,
|
319 |
+
key,
|
320 |
speaker,
|
321 |
sdp_ratio,
|
322 |
noise_scale,
|
|
|
324 |
length_scale,
|
325 |
language,
|
326 |
],
|
327 |
+
outputs=[text_output, gpt_output, audio_output],
|
328 |
)
|
329 |
|
330 |
slicer.click(
|
331 |
tts_split,
|
332 |
inputs=[
|
333 |
text,
|
334 |
+
front_text
|
335 |
+
key,
|
336 |
speaker,
|
337 |
sdp_ratio,
|
338 |
noise_scale,
|
|
|
343 |
interval_between_para,
|
344 |
interval_between_sent,
|
345 |
],
|
346 |
+
outputs=[text_output, gpt_output, audio_output],
|
347 |
)
|
348 |
|
349 |
print("推理页面已开启!")
|