Mahiruoshi commited on
Commit
d1871c9
1 Parent(s): 44988be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -263
app.py CHANGED
@@ -1,270 +1,24 @@
1
- import logging
2
- logging.getLogger('numba').setLevel(logging.WARNING)
3
- logging.getLogger('matplotlib').setLevel(logging.WARNING)
4
- logging.getLogger('urllib3').setLevel(logging.WARNING)
5
- import json
6
- import re
7
- import numpy as np
8
- import IPython.display as ipd
9
- import torch
10
- import commons
11
- import utils
12
- from models import SynthesizerTrn
13
- from text.symbols import symbols
14
- from text import text_to_sequence
15
  import gradio as gr
 
16
  import time
17
- import datetime
18
- import os
19
- import pickle
20
- import openai
21
- from scipy.io.wavfile import write
22
- def is_japanese(string):
23
- for ch in string:
24
- if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
25
- return True
26
- return False
27
 
28
- def is_english(string):
29
- import re
30
- pattern = re.compile('^[A-Za-z0-9.,:;!?()_*"\' ]+$')
31
- if pattern.fullmatch(string):
32
- return True
33
- else:
34
- return False
35
 
36
- def extrac(text):
37
- text = re.sub("<[^>]*>","",text)
38
- result_list = re.split(r'\n', text)
39
- final_list = []
40
- for i in result_list:
41
- if is_english(i):
42
- i = romajitable.to_kana(i).katakana
43
- i = i.replace('\n','').replace(' ','')
44
- #Current length of single sentence: 20
45
- if len(i)>1:
46
- if len(i) > 20:
47
- try:
48
- cur_list = re.split(r'。|!', i)
49
- for i in cur_list:
50
- if len(i)>1:
51
- final_list.append(i+'。')
52
- except:
53
- pass
54
- else:
55
- final_list.append(i)
56
- final_list = [x for x in final_list if x != '']
57
- print(final_list)
58
- return final_list
59
 
60
- def to_numpy(tensor: torch.Tensor):
61
- return tensor.detach().cpu().numpy() if tensor.requires_grad \
62
- else tensor.detach().numpy()
 
 
63
 
64
- def chatgpt(text):
65
- messages = []
66
- try:
67
- with open('log.pickle', 'rb') as f:
68
- messages = pickle.load(f)
69
- messages.append({"role": "user", "content": text},)
70
- chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
71
- reply = chat.choices[0].message.content
72
- messages.append({"role": "assistant", "content": reply})
73
- print(messages[-1])
74
- if len(messages) == 12:
75
- messages[6:10] = messages[8:]
76
- del messages[-2:]
77
- with open('log.pickle', 'wb') as f:
78
- pickle.dump(messages, f)
79
- return reply
80
- except:
81
- messages.append({"role": "user", "content": text},)
82
- chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
83
- reply = chat.choices[0].message.content
84
- messages.append({"role": "assistant", "content": reply})
85
- print(messages[-1])
86
- if len(messages) == 12:
87
- messages[6:10] = messages[8:]
88
- del messages[-2:]
89
- with open('log.pickle', 'wb') as f:
90
- pickle.dump(messages, f)
91
- return reply
92
-
93
- def get_symbols_from_json(path):
94
- assert os.path.isfile(path)
95
- with open(path, 'r') as f:
96
- data = json.load(f)
97
- return data['symbols']
98
-
99
- def sle(language,text):
100
- text = text.replace('\n', ' ').replace('\r', '').replace(" ", "")
101
- if language == "中文":
102
- tts_input1 = "[ZH]" + text + "[ZH]"
103
- return tts_input1
104
- elif language == "自动":
105
- tts_input1 = f"[JA]{text}[JA]" if is_japanese(text) else f"[ZH]{text}[ZH]"
106
- return tts_input1
107
- elif language == "日文":
108
- tts_input1 = "[JA]" + text + "[JA]"
109
- return tts_input1
110
- elif language == "英文":
111
- tts_input1 = "[EN]" + text + "[EN]"
112
- return tts_input1
113
- elif language == "手动":
114
- return text
115
-
116
- def get_text(text,hps_ms):
117
- text_norm = text_to_sequence(text,hps_ms.data.text_cleaners)
118
- if hps_ms.data.add_blank:
119
- text_norm = commons.intersperse(text_norm, 0)
120
- text_norm = torch.LongTensor(text_norm)
121
- return text_norm
122
-
123
- def create_tts_fn(net_g,hps,speaker_id):
124
- speaker_id = int(speaker_id)
125
- def tts_fn(history,is_gpt,api_key,is_audio,audiopath,repeat_time,text, language, extract, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
126
- repeat_time = int(repeat_time)
127
- if is_gpt:
128
- openai.api_key = api_key
129
- text = chatgpt(text)
130
- history[-1][1] = text
131
- if not extract:
132
- print(text)
133
- t1 = time.time()
134
- stn_tst = get_text(sle(language,text),hps)
135
- with torch.no_grad():
136
- x_tst = stn_tst.unsqueeze(0).to(dev)
137
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
138
- sid = torch.LongTensor([speaker_id]).to(dev)
139
- audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
140
- t2 = time.time()
141
- spending_time = "推理时间为:"+str(t2-t1)+"s"
142
- print(spending_time)
143
- file_path = "subtitles.srt"
144
- write('moe/temp.wav',22050,audio)
145
- try:
146
- write(audiopath + '.wav',22050,audio)
147
- if is_audio:
148
- for i in range(repeat_time):
149
- cmd = 'ffmpeg -y -i ' + audiopath + '.wav' + ' -ar 44100 '+ audiopath.replace('temp','temp'+str(i))
150
- os.system(cmd)
151
- except:
152
- pass
153
- return history,file_path,(hps.data.sampling_rate,audio)
154
- else:
155
- a = ['【','[','(','(']
156
- b = ['】',']',')',')']
157
- for i in a:
158
- text = text.replace(i,'<')
159
- for i in b:
160
- text = text.replace(i,'>')
161
- final_list = extrac(text.replace('“','').replace('”',''))
162
- audio_fin = []
163
- c = 0
164
- t = datetime.timedelta(seconds=0)
165
- f1 = open("subtitles.srt",'w',encoding='utf-8')
166
- for sentence in final_list:
167
- c +=1
168
- stn_tst = get_text(sle(language,sentence),hps)
169
- with torch.no_grad():
170
- x_tst = stn_tst.unsqueeze(0).to(dev)
171
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
172
- sid = torch.LongTensor([speaker_id]).to(dev)
173
- t1 = time.time()
174
- audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
175
- t2 = time.time()
176
- spending_time = "第"+str(c)+"句的推理时间为:"+str(t2-t1)+"s"
177
- print(spending_time)
178
- time_start = str(t).split(".")[0] + "," + str(t.microseconds)[:3]
179
- last_time = datetime.timedelta(seconds=len(audio)/float(22050))
180
- t+=last_time
181
- time_end = str(t).split(".")[0] + "," + str(t.microseconds)[:3]
182
- print(time_end)
183
- f1.write(str(c-1)+'\n'+time_start+' --> '+time_end+'\n'+sentence+'\n\n')
184
- audio_fin.append(audio)
185
- try:
186
- write(audiopath + '.wav',22050,np.concatenate(audio_fin))
187
- if is_audio:
188
- for i in range(repeat_time):
189
- cmd = 'ffmpeg -y -i ' + audiopath + '.wav' + ' -ar 44100 '+ audiopath.replace('temp','temp'+str(i))
190
- os.system(cmd)
191
-
192
- except:
193
- pass
194
-
195
- file_path = "subtitles.srt"
196
- return history,file_path,(hps.data.sampling_rate, np.concatenate(audio_fin))
197
- return tts_fn
198
-
199
- def bot(history,user_message):
200
- return history + [[user_message, None]]
201
-
202
- if __name__ == '__main__':
203
- hps = utils.get_hparams_from_file('checkpoints/tmp/config.json')
204
- dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
205
- models = []
206
- schools = ["Nijigasaki High School","Seisho-Nijigasaki(Recommend)","Seisho Music Academy","Rinmeikan Girls School","Frontier School of Arts","Siegfeld Institute of Music"]
207
- lan = ["中文","日文","自动","手动"]
208
- with open("checkpoints/info.json", "r", encoding="utf-8") as f:
209
- models_info = json.load(f)
210
- for i in models_info:
211
- school = models_info[i]
212
- speakers = school["speakers"]
213
- checkpoint = school["checkpoint"]
214
- phone_dict = {
215
- symbol: i for i, symbol in enumerate(symbols)
216
- }
217
- net_g = SynthesizerTrn(
218
- len(symbols),
219
- hps.data.filter_length // 2 + 1,
220
- hps.train.segment_size // hps.data.hop_length,
221
- n_speakers=hps.data.n_speakers,
222
- **hps.model).to(dev)
223
- _ = net_g.eval()
224
- _ = utils.load_checkpoint(checkpoint, net_g)
225
- content = []
226
- for j in speakers:
227
- sid = int(speakers[j]['sid'])
228
- title = school
229
- example = speakers[j]['speech']
230
- name = speakers[j]["name"]
231
- content.append((sid, name, title, example, create_tts_fn(net_g,hps,sid)))
232
- models.append(content)
233
-
234
- with gr.Blocks() as app:
235
- with gr.Tabs():
236
- for i in schools:
237
- with gr.TabItem(i):
238
- for (sid, name, title, example, tts_fn) in models[schools.index(i)]:
239
- with gr.TabItem(name):
240
- with gr.Column():
241
- with gr.Row():
242
- with gr.Row():
243
- gr.Markdown(
244
- '<div align="center">'
245
- f'<img style="width:auto;height:400px;" src="file/image/{name}.png">'
246
- '</div>'
247
- )
248
- chatbot = gr.Chatbot(elem_id="History")
249
- with gr.Row():
250
- input1 = gr.TextArea(label="Enter text and press enter", value=example,lines = 1)
251
- output1 = gr.Audio(label="采样率22050")
252
- with gr.Accordion(label="Setting", open=False):
253
- input2 = gr.Dropdown(label="Language", choices=lan, value="自动", interactive=True)
254
- input3 = gr.Checkbox(value=False, label="长句切割(小说合成)")
255
- input4 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声比例(noise scale),以控制情感", value=0.267)
256
- input5 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声偏差(noise scale w),以控制音素长短", value=0.7)
257
- input6 = gr.Slider(minimum=0.1, maximum=10, label="duration", value=1)
258
- with gr.Accordion(label="Advanced Setting", open=False):
259
- audio_input3 = gr.Dropdown(label="重复次数", choices=list(range(101)), value='0', interactive=True)
260
- api_input1 = gr.Checkbox(value=False, label="接入chatgpt")
261
- api_input2 = gr.TextArea(label="api-key",lines=1,value = '见 https://openai.com/blog/openai-api')
262
- output2 = gr.outputs.File(label="字幕文件:subtitles.srt")
263
- audio_input1 = gr.Checkbox(value=False, label="修改音频路径(live2d)")
264
- audio_input2 = gr.TextArea(label="音频路径",lines=1,value = '#参考 D:/app_develop/live2d_whole/2010002/sounds/temp.wav')
265
-
266
- input1.submit(bot, inputs = [chatbot,input1], outputs = [chatbot]).then(
267
- tts_fn, inputs=[chatbot,api_input1,api_input2,audio_input1,audio_input2,audio_input3,input1,input2,input3,input4,input5,input6], outputs=[chatbot,output2,output1]
268
  )
269
-
270
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import random
3
  import time
 
 
 
 
 
 
 
 
 
 
4
 
5
+ with gr.Blocks() as demo:
6
+ chatbot = gr.Chatbot()
7
+ msg = gr.Textbox()
8
+ clear = gr.Button("Clear")
 
 
 
9
 
10
+ def user(user_message, history):
11
+ return "", history + [[user_message, None]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ def bot(history):
14
+ bot_message = random.choice(["Yes", "No"])
15
+ history[-1][1] = bot_message
16
+ time.sleep(1)
17
+ return history
18
 
19
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
20
+ bot, chatbot, chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
+ clear.click(lambda: None, None, chatbot, queue=False)
23
+
24
+ demo.launch()