Mahiruoshi commited on
Commit
9c49b65
1 Parent(s): 37eb9e1

Upload 90 files

Browse files
Files changed (46) hide show
  1. app.py +282 -160
  2. checkpoints/Nijigaku/config.json +70 -0
  3. checkpoints/Nijigaku/model.pth +3 -0
  4. checkpoints/info.json +72 -0
  5. image//343/201/202/343/202/213/343/202/213.png +0 -0
  6. image//343/201/204/343/201/241/343/201/210.png +0 -0
  7. image//343/201/213/343/201/231/343/201/277.png +0 -0
  8. image//343/201/227/343/201/232/343/201/217.png +0 -0
  9. image//343/201/233/343/201/244/350/217/234.png +0 -0
  10. image//343/201/252/343/201/252.png +0 -0
  11. image//343/201/262/343/201/213/343/202/212.png +0 -0
  12. image//343/201/276/343/201/262/343/202/213.png +0 -0
  13. image//343/202/204/343/201/241/343/202/210.png +0 -0
  14. image//343/202/206/343/202/206/345/255/220.png +0 -0
  15. image//343/202/250/343/203/236.png +0 -0
  16. image//343/202/257/343/203/255/343/203/207/343/202/243/343/203/274/343/203/214.png +0 -0
  17. image//343/203/237/343/202/242.png +0 -0
  18. image//343/203/237/343/203/201/343/203/253.png +0 -0
  19. image//343/203/241/343/202/244/343/203/225/343/202/241/343/203/263.png +0 -0
  20. image//343/203/251/343/203/251/343/203/225/343/202/243/343/203/263.png +0 -0
  21. image//343/203/251/343/203/263/343/202/270/343/203/245.png +0 -0
  22. image//345/217/214/350/221/211.png +0 -0
  23. image//345/241/201.png +0 -0
  24. image//345/275/274/346/226/271.png +0 -0
  25. image//346/204/233.png +0 -0
  26. image//346/231/266.png +0 -0
  27. image//346/236/234/346/236/227.png +0 -0
  28. image//346/240/236/345/255/220.png +0 -0
  29. image//346/255/251/345/244/242.png +0 -0
  30. image//347/217/240/347/267/222.png +0 -0
  31. image//347/222/203/345/245/210.png +0 -0
  32. image//347/234/237/347/237/242.png +0 -0
  33. image//347/264/224/351/202/243.png +0 -0
  34. image//347/276/216/347/251/272.png +0 -0
  35. image//350/217/257/346/201/213.png +0 -0
  36. image//351/235/231/347/276/275.png +0 -0
  37. image//351/246/231/345/255/220.png +0 -0
  38. image//351/253/230/345/222/262/344/276/221.png +0 -0
  39. monotonic_align/__pycache__/__init__.cpython-39.pyc +0 -0
  40. monotonic_align/__pycache__/core.cpython-39.pyc +0 -0
  41. subtitles.srt +4 -0
  42. text/__pycache__/__init__.cpython-39.pyc +0 -0
  43. text/__pycache__/cleaners.cpython-39.pyc +0 -0
  44. text/__pycache__/japanese.cpython-39.pyc +0 -0
  45. text/__pycache__/mandarin.cpython-39.pyc +0 -0
  46. text/__pycache__/symbols.cpython-39.pyc +0 -0
app.py CHANGED
@@ -1,178 +1,300 @@
1
- import time
2
- import matplotlib.pyplot as plt
3
- import IPython.display as ipd
4
- import re
5
- import os
6
  import json
7
- import math
 
 
8
  import torch
9
- from torch import nn
10
- from torch.nn import functional as F
11
- from torch.utils.data import DataLoader
12
- import gradio as gr
13
  import commons
14
  import utils
15
- from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
16
  from models import SynthesizerTrn
17
  from text.symbols import symbols
18
  from text import text_to_sequence
19
- import unicodedata
20
- from scipy.io.wavfile import write
 
 
 
21
  import openai
 
 
 
 
 
 
22
 
23
- def get_text(text, hps):
24
- text_norm = text_to_sequence(text, hps.data.text_cleaners)
25
- if hps.data.add_blank:
26
- text_norm = commons.intersperse(text_norm, 0)
27
- text_norm = torch.LongTensor(text_norm)
28
- return text_norm
29
-
30
- def get_label(text, label):
31
- if f'[{label}]' in text:
32
- return True, text.replace(f'[{label}]', '')
33
- else:
34
- return False, text
35
-
36
- def selection(speaker):
37
- if speaker == "高咲侑(误)":
38
- spk = 0
39
- return spk
40
 
41
- elif speaker == "歩夢":
42
- spk = 1
43
- return spk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- elif speaker == "かすみ":
46
- spk = 2
47
- return spk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- elif speaker == "しずく":
50
- spk = 3
51
- return spk
52
 
53
- elif speaker == "果林":
54
- spk = 4
55
- return spk
56
-
57
- elif speaker == "愛":
58
- spk = 5
59
- return spk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- elif speaker == "彼方":
62
- spk = 6
63
- return spk
 
 
64
 
65
- elif speaker == "せつ菜":
66
- spk = 7
67
- return spk
68
- elif speaker == "エマ":
69
- spk = 8
70
- return spk
71
- elif speaker == "璃奈":
72
- spk = 9
73
- return spk
74
- elif speaker == "栞子":
75
- spk = 10
76
- return spk
77
- elif speaker == "ランジュ":
78
- spk = 11
79
- return spk
80
- elif speaker == "ミア":
81
- spk = 12
82
- return spk
83
- elif speaker == "三色绘恋1":
84
- spk = 13
85
- return spk
86
- elif speaker == "三色绘恋2":
87
- spk = 15
88
- return spk
89
- elif speaker == "派蒙":
90
- spk = 16
91
- return spk
92
- def friend_chat(text,key,call_name,tts_input3):
93
- call_name = call_name
94
- openai.api_key = key
95
- identity = tts_input3
96
- start_sequence = '\n'+str(call_name)+':'
97
- restart_sequence = "\nYou: "
98
- if 1 == 1:
99
- prompt0 = text #当期prompt
100
- if text == 'quit':
101
- return prompt0
102
- prompt = identity + prompt0 + start_sequence
103
- response = openai.Completion.create(
104
- model="text-davinci-003",
105
- prompt=prompt,
106
- temperature=0.5,
107
- max_tokens=1000,
108
- top_p=1.0,
109
- frequency_penalty=0.5,
110
- presence_penalty=0.0,
111
- stop=["\nYou:"]
112
- )
113
- return response['choices'][0]['text'].strip()
114
- def is_japanese(string):
115
- for ch in string:
116
- if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
117
- return True
118
- return False
119
- def sle(language,text,tts_input2,call_name,tts_input3):
120
- if language == "中文":
121
- tts_input1 = "[ZH]" + text.replace('\n','。').replace(' ',',') + "[ZH]"
122
- return tts_input1
123
- if language == "对话":
124
- text = friend_chat(text,tts_input2,call_name,tts_input3).replace('\n','。').replace(' ',',')
125
- text = f"[JA]{text}[JA]" if is_japanese(text) else f"[ZH]{text}[ZH]"
126
- return text
127
- elif language == "日文":
128
- tts_input1 = "[JA]" + text.replace('\n','。').replace(' ',',') + "[JA]"
129
- return tts_input1
130
- def infer(text,tts_input2,tts_input3,language, speaker_id,n_scale= 0.667,n_scale_w = 0.8, l_scale = 1):
131
- speaker_name = speaker_id
132
- speaker_id = int(selection(speaker_id))
133
- text = sle(language,text,tts_input2,speaker_name,tts_input3)
134
- response = text.replace("[ZH]","").replace("[JA]","")
135
- stn_tst = get_text(text, hps_ms)
136
- with torch.no_grad():
137
- x_tst = stn_tst.unsqueeze(0).to(dev)
138
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
139
- sid = torch.LongTensor([speaker_id]).to(dev)
140
- t1 = time.time()
141
- audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
142
- t2 = time.time()
143
- spending_time = "推理时间:"+str(t2-t1)+"s"
144
- print(spending_time)
145
- return response,(hps_ms.data.sampling_rate, audio)
146
- lan = ["中文","日文","对话"]
147
- idols = ["高咲侑(误)","歩夢","かすみ","しずく","果林","愛","彼方","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
148
 
 
 
 
 
 
 
149
 
150
- dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
151
- hps_ms = utils.get_hparams_from_file("config.json")
152
- net_g_ms = SynthesizerTrn(
153
- len(symbols),
154
- hps_ms.data.filter_length // 2 + 1,
155
- hps_ms.train.segment_size // hps_ms.data.hop_length,
156
- n_speakers=hps_ms.data.n_speakers,
157
- **hps_ms.model).to(dev)
158
- _ = net_g_ms.eval()
159
-
160
- _ = utils.load_checkpoint("G_1415000.pth", net_g_ms, None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- inputs = [gr.TextArea(label="输入你的文本,支持vits版在另一个仓库", value="一次審査、二次審査、それぞれの欄に記入をお願いします。"),
163
- gr.TextArea(label="如需使用openai,输入你的openai-key,合成方式选择对话", value="官网"),
164
- gr.TextArea(label="写上你给她的设定", value=""),
165
- gr.Dropdown(label="选择合成方式",choices=lan, value="日文", interactive=True),
166
- gr.Dropdown(label="选择说话人",choices=idols, value="かすみ", interactive=True),
167
- gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.127),
168
- gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8),
169
- gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)]
170
- outputs=[ gr.Textbox(label="回复"),gr.Audio(type="numpy", label="Output audio")]
171
- iface = gr.Interface(
172
- fn=infer,
173
- inputs=inputs,
174
- outputs=outputs,
175
- title="轻量化vits",
176
- description="虹团12人模型",
177
- )
178
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ logging.getLogger('numba').setLevel(logging.WARNING)
3
+ logging.getLogger('matplotlib').setLevel(logging.WARNING)
4
+ logging.getLogger('urllib3').setLevel(logging.WARNING)
 
5
  import json
6
+ import re
7
+ import numpy as np
8
+ import IPython.display as ipd
9
  import torch
 
 
 
 
10
  import commons
11
  import utils
 
12
  from models import SynthesizerTrn
13
  from text.symbols import symbols
14
  from text import text_to_sequence
15
+ import gradio as gr
16
+ import time
17
+ import datetime
18
+ import os
19
+ import pickle
20
  import openai
21
+ from scipy.io.wavfile import write
22
+ def is_japanese(string):
23
+ for ch in string:
24
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
25
+ return True
26
+ return False
27
 
28
+ def is_english(string):
29
+ import re
30
+ pattern = re.compile('^[A-Za-z0-9.,:;!?()_*"\' ]+$')
31
+ if pattern.fullmatch(string):
32
+ return True
33
+ else:
34
+ return False
 
 
 
 
 
 
 
 
 
 
35
 
36
+ def to_html(chat_history):
37
+ chat_html = ""
38
+ for item in chat_history:
39
+ if item['role'] == 'user':
40
+ chat_html += f"""
41
+ <div style="margin-bottom: 20px;">
42
+ <div style="text-align: right; margin-right: 20px;">
43
+ <span style="background-color: #4CAF50; color: black; padding: 10px; border-radius: 10px; display: inline-block; max-width: 80%; word-wrap: break-word;">
44
+ {item['content']}
45
+ </span>
46
+ </div>
47
+ </div>
48
+ """
49
+ else:
50
+ chat_html += f"""
51
+ <div style="margin-bottom: 20px;">
52
+ <div style="text-align: left; margin-left: 20px;">
53
+ <span style="background-color: white; color: black; padding: 10px; border-radius: 10px; display: inline-block; max-width: 80%; word-wrap: break-word;">
54
+ {item['content']}
55
+ </span>
56
+ </div>
57
+ </div>
58
+ """
59
+ output_html = f"""
60
+ <div style="height: 400px; overflow-y: scroll; padding: 10px;">
61
+ {chat_html}
62
+ </div>
63
+ """
64
+ return output_html
65
 
66
+ def extrac(text):
67
+ text = re.sub("<[^>]*>","",text)
68
+ result_list = re.split(r'\n', text)
69
+ final_list = []
70
+ for i in result_list:
71
+ if is_english(i):
72
+ i = romajitable.to_kana(i).katakana
73
+ i = i.replace('\n','').replace(' ','')
74
+ #Current length of single sentence: 20
75
+ if len(i)>1:
76
+ if len(i) > 20:
77
+ try:
78
+ cur_list = re.split(r'。|!', i)
79
+ for i in cur_list:
80
+ if len(i)>1:
81
+ final_list.append(i+'。')
82
+ except:
83
+ pass
84
+ else:
85
+ final_list.append(i)
86
+ final_list = [x for x in final_list if x != '']
87
+ print(final_list)
88
+ return final_list
89
 
90
+ def to_numpy(tensor: torch.Tensor):
91
+ return tensor.detach().cpu().numpy() if tensor.requires_grad \
92
+ else tensor.detach().numpy()
93
 
94
+ def chatgpt(text):
95
+ messages = []
96
+ try:
97
+ with open('log.pickle', 'rb') as f:
98
+ messages = pickle.load(f)
99
+ messages.append({"role": "user", "content": text},)
100
+ chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
101
+ reply = chat.choices[0].message.content
102
+ messages.append({"role": "assistant", "content": reply})
103
+ print(messages[-1])
104
+ if len(messages) == 12:
105
+ messages[6:10] = messages[8:]
106
+ del messages[-2:]
107
+ with open('log.pickle', 'wb') as f:
108
+ messages2 = []
109
+ pickle.dump(messages2, f)
110
+ return reply,messages
111
+ except:
112
+ messages.append({"role": "user", "content": text},)
113
+ chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
114
+ reply = chat.choices[0].message.content
115
+ messages.append({"role": "assistant", "content": reply})
116
+ print(messages[-1])
117
+ if len(messages) == 12:
118
+ messages[6:10] = messages[8:]
119
+ del messages[-2:]
120
+ with open('log.pickle', 'wb') as f:
121
+ pickle.dump(messages, f)
122
+ return reply,messages
123
 
124
+ def get_symbols_from_json(path):
125
+ assert os.path.isfile(path)
126
+ with open(path, 'r') as f:
127
+ data = json.load(f)
128
+ return data['symbols']
129
 
130
+ def sle(language,text):
131
+ text = text.replace('\n', ' ').replace('\r', '').replace(" ", "")
132
+ if language == "中文":
133
+ tts_input1 = "[ZH]" + text + "[ZH]"
134
+ return tts_input1
135
+ elif language == "自动":
136
+ tts_input1 = f"[JA]{text}[JA]" if is_japanese(text) else f"[ZH]{text}[ZH]"
137
+ return tts_input1
138
+ elif language == "日文":
139
+ tts_input1 = "[JA]" + text + "[JA]"
140
+ return tts_input1
141
+ elif language == "英文":
142
+ tts_input1 = "[EN]" + text + "[EN]"
143
+ return tts_input1
144
+ elif language == "手动":
145
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ def get_text(text,hps_ms):
148
+ text_norm = text_to_sequence(text,hps_ms.data.text_cleaners)
149
+ if hps_ms.data.add_blank:
150
+ text_norm = commons.intersperse(text_norm, 0)
151
+ text_norm = torch.LongTensor(text_norm)
152
+ return text_norm
153
 
154
+ def create_tts_fn(net_g,hps,speaker_id):
155
+ speaker_id = int(speaker_id)
156
+ def tts_fn(is_gpt,api_key,is_audio,audiopath,repeat_time,text, language, extract, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
157
+ repeat_ime = int(repeat_time)
158
+ if is_gpt:
159
+ openai.api_key = api_key
160
+ text,messages = chatgpt(text)
161
+ htm = to_html(messages)
162
+ else:
163
+ htm = ''
164
+ if not extract:
165
+ t1 = time.time()
166
+ stn_tst = get_text(sle(language,text),hps)
167
+ with torch.no_grad():
168
+ x_tst = stn_tst.unsqueeze(0).to(dev)
169
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
170
+ sid = torch.LongTensor([speaker_id]).to(dev)
171
+ audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
172
+ t2 = time.time()
173
+ spending_time = "推理时间为:"+str(t2-t1)+"s"
174
+ print(spending_time)
175
+ file_path = "subtitles.srt"
176
+ try:
177
+ write(audiopath + '.wav',22050,audio)
178
+ if is_audio:
179
+ for i in range(repeat_time):
180
+ cmd = 'ffmpeg -y -i ' + audiopath + '.wav' + ' -ar 44100 '+ audiopath.replace('temp','temp'+str(i))
181
+ os.system(cmd)
182
+ except:
183
+ pass
184
+ return (hps.data.sampling_rate, audio),file_path,htm
185
+ else:
186
+ a = ['【','[','(','(']
187
+ b = ['】',']',')',')']
188
+ for i in a:
189
+ text = text.replace(i,'<')
190
+ for i in b:
191
+ text = text.replace(i,'>')
192
+ final_list = extrac(text.replace('“','').replace('”',''))
193
+ audio_fin = []
194
+ c = 0
195
+ t = datetime.timedelta(seconds=0)
196
+ for sentence in final_list:
197
+ try:
198
+ f1 = open("subtitles.srt",'w',encoding='utf-8')
199
+ c +=1
200
+ stn_tst = get_text(sle(language,sentence),hps)
201
+ with torch.no_grad():
202
+ x_tst = stn_tst.unsqueeze(0).to(dev)
203
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
204
+ sid = torch.LongTensor([speaker_id]).to(dev)
205
+ t1 = time.time()
206
+ audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
207
+ t2 = time.time()
208
+ spending_time = "第"+str(c)+"句的推理时间为:"+str(t2-t1)+"s"
209
+ print(spending_time)
210
+ time_start = str(t).split(".")[0] + "," + str(t.microseconds)[:3]
211
+ last_time = datetime.timedelta(seconds=len(audio)/float(22050))
212
+ t+=last_time
213
+ time_end = str(t).split(".")[0] + "," + str(t.microseconds)[:3]
214
+ print(time_end)
215
+ f1.write(str(c-1)+'\n'+time_start+' --> '+time_end+'\n'+sentence+'\n\n')
216
+ audio_fin.append(audio)
217
+ except:
218
+ pass
219
+ try:
220
+ write(audiopath + '.wav',22050,np.concatenate(audio_fin))
221
+ if is_audio:
222
+ for i in range(repeat_time):
223
+ cmd = 'ffmpeg -y -i ' + audiopath + '.wav' + ' -ar 44100 '+ audiopath.replace('temp','temp'+str(i))
224
+ os.system(cmd)
225
+
226
+ except:
227
+ pass
228
+
229
+ file_path = "subtitles.srt"
230
+ return (hps.data.sampling_rate, np.concatenate(audio_fin)),file_path,htm
231
+ return tts_fn
232
 
233
+ if __name__ == '__main__':
234
+ hps = utils.get_hparams_from_file('checkpoints/Nijigaku/config.json')
235
+ dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
236
+ models = []
237
+ schools = ["Nijigasaki High School"]
238
+ lan = ["中文","日文","自动","手动"]
239
+ with open("checkpoints/info.json", "r", encoding="utf-8") as f:
240
+ models_info = json.load(f)
241
+ net_g = SynthesizerTrn(
242
+ len(symbols),
243
+ hps.data.filter_length // 2 + 1,
244
+ hps.train.segment_size // hps.data.hop_length,
245
+ n_speakers=hps.data.n_speakers,
246
+ **hps.model).to(dev)
247
+ _ = net_g.eval()
248
+ _ = utils.load_checkpoint("checkpoints/Nijigaku/model.pth" , net_g)
249
+ for i in models_info:
250
+ school = models_info[i]
251
+ speakers = school["speakers"]
252
+ phone_dict = {
253
+ symbol: i for i, symbol in enumerate(symbols)
254
+ }
255
+ content = []
256
+ for j in speakers:
257
+ sid = int(speakers[j]['sid'])
258
+ title = school
259
+ example = speakers[j]['speech']
260
+ name = speakers[j]["name"]
261
+ content.append((sid, name, title, example, create_tts_fn(net_g,hps,sid)))
262
+ models.append(content)
263
+
264
+ with gr.Blocks() as app:
265
+ with gr.Tabs():
266
+ for i in schools:
267
+ with gr.TabItem(i):
268
+ for (sid, name, title, example, tts_fn) in models[schools.index(i)]:
269
+ with gr.TabItem(name):
270
+ with gr.Column():
271
+ with gr.Row():
272
+ with gr.Row():
273
+ gr.Markdown(
274
+ '<div align="center">'
275
+ f'<img style="width:auto;height:400px;" src="file/image/{name}.png">'
276
+ '</div>'
277
+ )
278
+ output_UI = gr.outputs.HTML()
279
+ with gr.Row():
280
+ with gr.Column(scale=0.85):
281
+ input1 = gr.TextArea(label="Text", value=example,lines = 1)
282
+ with gr.Column(scale=0.15, min_width=0):
283
+ btnVC = gr.Button("Send")
284
+ output1 = gr.Audio(label="采样率22050")
285
+ with gr.Accordion(label="Setting(TTS)", open=False):
286
+ input2 = gr.Dropdown(label="Language", choices=lan, value="自动", interactive=True)
287
+ input4 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声比例(noise scale),以控制情感", value=0.6)
288
+ input5 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声偏差(noise scale w),以控制音素长短", value=0.668)
289
+ input6 = gr.Slider(minimum=0.1, maximum=10, label="duration", value=1)
290
+ with gr.Accordion(label="Advanced Setting(GPT3.5接口+长句子合成,建议克隆本仓库后运行main.py)", open=False):
291
+ input3 = gr.Checkbox(value=False, label="长句切割(小说合成)")
292
+ output2 = gr.outputs.File(label="字幕文件:subtitles.srt")
293
+ api_input1 = gr.Checkbox(value=False, label="接入chatgpt")
294
+ api_input2 = gr.TextArea(label="api-key",lines=1,value = '见 https://openai.com/blog/openai-api')
295
+ audio_input1 = gr.Checkbox(value=False, label="修改音频路径(live2d)")
296
+ audio_input2 = gr.TextArea(label="音频路径",lines=1,value = '#参考 D:/app_develop/live2d_whole/2010002/sounds/temp.wav')
297
+ audio_input3 = gr.Dropdown(label="重复生成次数", choices=list(range(101)), value='0', interactive=True)
298
+ btnVC.click(tts_fn, inputs=[api_input1,api_input2,audio_input1,audio_input2,audio_input3,input1,input2,input3,input4,input5,input6], outputs=[output1,output2,output_UI])
299
+
300
+ app.launch()
checkpoints/Nijigaku/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 5000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 2e-4,
8
+ "betas": [ 0.8, 0.99 ],
9
+ "eps": 1e-9,
10
+ "batch_size": 12,
11
+ "fp16_run": false,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 8192,
14
+ "init_lr_ratio": 1,
15
+ "warmup_epochs": 0,
16
+ "c_mel": 45,
17
+ "c_kl": 1.0,
18
+ "fft_sizes": [ 384, 683, 171 ],
19
+ "hop_sizes": [ 30, 60, 10 ],
20
+ "win_lengths": [ 150, 300, 60 ],
21
+ "window": "hann_window"
22
+ },
23
+ "data": {
24
+ "training_files": "E:/filelist/train_with_paimeng.txt",
25
+ "validation_files": "E:/filelist/val_filelist.txt",
26
+ "text_cleaners": [ "cjke_cleaners" ],
27
+ "max_wav_value": 32768.0,
28
+ "sampling_rate": 22050,
29
+ "filter_length": 1024,
30
+ "hop_length": 256,
31
+ "win_length": 1024,
32
+ "n_mel_channels": 80,
33
+ "mel_fmin": 0.0,
34
+ "mel_fmax": null,
35
+ "add_blank": true,
36
+ "n_speakers": 18,
37
+ "cleaned_text": true
38
+ },
39
+ "model": {
40
+ "ms_istft_vits": true,
41
+ "mb_istft_vits": false,
42
+ "istft_vits": false,
43
+ "subbands": 4,
44
+ "gen_istft_n_fft": 16,
45
+ "gen_istft_hop_size": 4,
46
+ "inter_channels": 192,
47
+ "hidden_channels": 192,
48
+ "filter_channels": 768,
49
+ "n_heads": 2,
50
+ "n_layers": 6,
51
+ "kernel_size": 3,
52
+ "p_dropout": 0.1,
53
+ "resblock": "1",
54
+ "resblock_kernel_sizes": [ 3, 7, 11 ],
55
+ "resblock_dilation_sizes": [
56
+ [ 1, 3, 5 ],
57
+ [ 1, 3, 5 ],
58
+ [ 1, 3, 5 ]
59
+ ],
60
+ "upsample_rates": [ 4, 4 ],
61
+ "upsample_initial_channel": 512,
62
+ "upsample_kernel_sizes": [ 16, 16 ],
63
+ "n_layers_q": 3,
64
+ "use_spectral_norm": false,
65
+ "gin_channels": 256,
66
+ "use_sdp": false
67
+ }
68
+
69
+ }
70
+
checkpoints/Nijigaku/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62dae224dce4ae492158dc675b96d3aaa45335a69f4118870d645c3fb718df63
3
+ size 455712599
checkpoints/info.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Nijigasaki High School":{
3
+ "speakers":{
4
+ "歩夢":{
5
+ "sid": 1,
6
+ "speech": "みなさん、はじめまして。上原歩夢です。",
7
+ "name": "歩夢"
8
+ },
9
+ "かすみ":{
10
+ "sid": 2,
11
+ "speech": "みんなのアイドルかすみんだよー。",
12
+ "name": "かすみ"
13
+ },
14
+ "しずく":{
15
+ "sid": 3,
16
+ "speech": "みなさん、こんにちは。しずくです。",
17
+ "name": "しずく"
18
+ },
19
+ "果林":{
20
+ "sid": 4,
21
+ "speech": "ハーイ。 朝香果林よ。よろしくね",
22
+ "name": "果林"
23
+ },
24
+ "愛":{
25
+ "sid": 5,
26
+ "speech": "ちっすー。アタシは愛。",
27
+ "name": "愛"
28
+ },
29
+ "彼方":{
30
+ "sid": 6,
31
+ "speech": "ちっすー。アタシは愛。",
32
+ "name": "彼方"
33
+ },
34
+ "せつ菜":{
35
+ "sid": 7,
36
+ "speech": "絶えぬ命は,常世に在らず。終わらぬ芝居も,夢幻のごとく。儚く燃えゆく,さだめであれば。舞台に刻まん,刹那の瞬き。",
37
+ "name": "せつ菜"
38
+ },
39
+ "エマ":{
40
+ "sid": 8,
41
+ "speech": "こんにちは、エマです。自然溢れるスイスからやってきましたっ。",
42
+ "name": "エマ"
43
+ },
44
+ "璃奈":{
45
+ "sid": 9,
46
+ "speech": "私、天王寺璃奈。とってもきゅーとな女の子。ホントだよ?",
47
+ "name": "璃奈"
48
+ },
49
+ "栞子":{
50
+ "sid": 10,
51
+ "speech": "みなさん、初めまして。三船栞子と申します。",
52
+ "name": "栞子"
53
+ },
54
+ "ランジュ":{
55
+ "sid": 11,
56
+ "speech": "你好啊,我是钟岚珠。",
57
+ "name": "ランジュ"
58
+ },
59
+ "ミア":{
60
+ "sid": 12,
61
+ "speech": "ボクはミア・テイラー。",
62
+ "name": "ミア"
63
+ },
64
+ "高咲侑":{
65
+ "sid": 0,
66
+ "speech": "只选一个做不到啊",
67
+ "name": "高咲侑"
68
+ }
69
+ },
70
+ "checkpoint": "checkpoints/Nijigaku/model.pth"
71
+ }
72
+ }
image//343/201/202/343/202/213/343/202/213.png ADDED
image//343/201/204/343/201/241/343/201/210.png ADDED
image//343/201/213/343/201/231/343/201/277.png ADDED
image//343/201/227/343/201/232/343/201/217.png ADDED
image//343/201/233/343/201/244/350/217/234.png ADDED
image//343/201/252/343/201/252.png ADDED
image//343/201/262/343/201/213/343/202/212.png ADDED
image//343/201/276/343/201/262/343/202/213.png ADDED
image//343/202/204/343/201/241/343/202/210.png ADDED
image//343/202/206/343/202/206/345/255/220.png ADDED
image//343/202/250/343/203/236.png ADDED
image//343/202/257/343/203/255/343/203/207/343/202/243/343/203/274/343/203/214.png ADDED
image//343/203/237/343/202/242.png ADDED
image//343/203/237/343/203/201/343/203/253.png ADDED
image//343/203/241/343/202/244/343/203/225/343/202/241/343/203/263.png ADDED
image//343/203/251/343/203/251/343/203/225/343/202/243/343/203/263.png ADDED
image//343/203/251/343/203/263/343/202/270/343/203/245.png ADDED
image//345/217/214/350/221/211.png ADDED
image//345/241/201.png ADDED
image//345/275/274/346/226/271.png ADDED
image//346/204/233.png ADDED
image//346/231/266.png ADDED
image//346/236/234/346/236/227.png ADDED
image//346/240/236/345/255/220.png ADDED
image//346/255/251/345/244/242.png ADDED
image//347/217/240/347/267/222.png ADDED
image//347/222/203/345/245/210.png ADDED
image//347/234/237/347/237/242.png ADDED
image//347/264/224/351/202/243.png ADDED
image//347/276/216/347/251/272.png ADDED
image//350/217/257/346/201/213.png ADDED
image//351/235/231/347/276/275.png ADDED
image//351/246/231/345/255/220.png ADDED
image//351/253/230/345/222/262/344/276/221.png ADDED
monotonic_align/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (842 Bytes). View file
 
monotonic_align/__pycache__/core.cpython-39.pyc ADDED
Binary file (993 Bytes). View file
 
subtitles.srt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 0
2
+ 0:00:00,0 --> 0:00:02,333
3
+ ちっすー。アタシは愛。
4
+
text/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (2.14 kB). View file
 
text/__pycache__/cleaners.cpython-39.pyc ADDED
Binary file (6.31 kB). View file
 
text/__pycache__/japanese.cpython-39.pyc ADDED
Binary file (4.44 kB). View file
 
text/__pycache__/mandarin.cpython-39.pyc ADDED
Binary file (6.41 kB). View file
 
text/__pycache__/symbols.cpython-39.pyc ADDED
Binary file (438 Bytes). View file